diff options
Diffstat (limited to 'net')
581 files changed, 30049 insertions, 20058 deletions
diff --git a/net/6lowpan/Kconfig b/net/6lowpan/Kconfig new file mode 100644 index 000000000000..028a5c6d1f61 --- /dev/null +++ b/net/6lowpan/Kconfig @@ -0,0 +1,6 @@ +config 6LOWPAN + bool "6LoWPAN Support" + depends on IPV6 + ---help--- + This enables IPv6 over Low power Wireless Personal Area Network - + "6LoWPAN" which is supported by IEEE 802.15.4 or Bluetooth stacks. diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile new file mode 100644 index 000000000000..415886bb456a --- /dev/null +++ b/net/6lowpan/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_6LOWPAN) := 6lowpan.o + +6lowpan-y := iphc.o diff --git a/net/ieee802154/6lowpan_iphc.c b/net/6lowpan/iphc.c index 211b5686d719..142eef55c9e2 100644 --- a/net/ieee802154/6lowpan_iphc.c +++ b/net/6lowpan/iphc.c @@ -3,8 +3,7 @@ * written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com> */ -/* - * Based on patches from Jon Smirl <jonsmirl@gmail.com> +/* Based on patches from Jon Smirl <jonsmirl@gmail.com> * Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com> * * This program is free software; you can redistribute it and/or modify @@ -58,16 +57,15 @@ #include <net/ipv6.h> #include <net/af_ieee802154.h> -/* - * Uncompress address function for source and +/* Uncompress address function for source and * destination address(non-multicast). * * address_mode is sam value or dam value. */ static int uncompress_addr(struct sk_buff *skb, - struct in6_addr *ipaddr, const u8 address_mode, - const u8 *lladdr, const u8 addr_type, - const u8 addr_len) + struct in6_addr *ipaddr, const u8 address_mode, + const u8 *lladdr, const u8 addr_type, + const u8 addr_len) { bool fail; @@ -140,13 +138,12 @@ static int uncompress_addr(struct sk_buff *skb, return 0; } -/* - * Uncompress address function for source context +/* Uncompress address function for source context * based address(non-multicast). */ static int uncompress_context_based_src_addr(struct sk_buff *skb, - struct in6_addr *ipaddr, - const u8 sam) + struct in6_addr *ipaddr, + const u8 sam) { switch (sam) { case LOWPAN_IPHC_ADDR_00: @@ -175,13 +172,13 @@ static int uncompress_context_based_src_addr(struct sk_buff *skb, } static int skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr, - struct net_device *dev, skb_delivery_cb deliver_skb) + struct net_device *dev, skb_delivery_cb deliver_skb) { struct sk_buff *new; int stat; new = skb_copy_expand(skb, sizeof(struct ipv6hdr), skb_tailroom(skb), - GFP_ATOMIC); + GFP_ATOMIC); kfree_skb(skb); if (!new) @@ -196,7 +193,7 @@ static int skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr, new->dev = dev; raw_dump_table(__func__, "raw skb data dump before receiving", - new->data, new->len); + new->data, new->len); stat = deliver_skb(new, dev); @@ -208,10 +205,9 @@ static int skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr, /* Uncompress function for multicast destination address, * when M bit is set. */ -static int -lowpan_uncompress_multicast_daddr(struct sk_buff *skb, - struct in6_addr *ipaddr, - const u8 dam) +static int lowpan_uncompress_multicast_daddr(struct sk_buff *skb, + struct in6_addr *ipaddr, + const u8 dam) { bool fail; @@ -257,41 +253,41 @@ lowpan_uncompress_multicast_daddr(struct sk_buff *skb, } raw_dump_inline(NULL, "Reconstructed ipv6 multicast addr is", - ipaddr->s6_addr, 16); + ipaddr->s6_addr, 16); return 0; } -static int -uncompress_udp_header(struct sk_buff *skb, struct udphdr *uh) +static int uncompress_udp_header(struct sk_buff *skb, struct udphdr *uh) { bool fail; u8 tmp = 0, val = 0; - if (!uh) - goto err; - - fail = lowpan_fetch_skb(skb, &tmp, 1); + fail = lowpan_fetch_skb(skb, &tmp, sizeof(tmp)); if ((tmp & LOWPAN_NHC_UDP_MASK) == LOWPAN_NHC_UDP_ID) { pr_debug("UDP header uncompression\n"); switch (tmp & LOWPAN_NHC_UDP_CS_P_11) { case LOWPAN_NHC_UDP_CS_P_00: - fail |= lowpan_fetch_skb(skb, &uh->source, 2); - fail |= lowpan_fetch_skb(skb, &uh->dest, 2); + fail |= lowpan_fetch_skb(skb, &uh->source, + sizeof(uh->source)); + fail |= lowpan_fetch_skb(skb, &uh->dest, + sizeof(uh->dest)); break; case LOWPAN_NHC_UDP_CS_P_01: - fail |= lowpan_fetch_skb(skb, &uh->source, 2); - fail |= lowpan_fetch_skb(skb, &val, 1); + fail |= lowpan_fetch_skb(skb, &uh->source, + sizeof(uh->source)); + fail |= lowpan_fetch_skb(skb, &val, sizeof(val)); uh->dest = htons(val + LOWPAN_NHC_UDP_8BIT_PORT); break; case LOWPAN_NHC_UDP_CS_P_10: - fail |= lowpan_fetch_skb(skb, &val, 1); + fail |= lowpan_fetch_skb(skb, &val, sizeof(val)); uh->source = htons(val + LOWPAN_NHC_UDP_8BIT_PORT); - fail |= lowpan_fetch_skb(skb, &uh->dest, 2); + fail |= lowpan_fetch_skb(skb, &uh->dest, + sizeof(uh->dest)); break; case LOWPAN_NHC_UDP_CS_P_11: - fail |= lowpan_fetch_skb(skb, &val, 1); + fail |= lowpan_fetch_skb(skb, &val, sizeof(val)); uh->source = htons(LOWPAN_NHC_UDP_4BIT_PORT + (val >> 4)); uh->dest = htons(LOWPAN_NHC_UDP_4BIT_PORT + @@ -300,7 +296,6 @@ uncompress_udp_header(struct sk_buff *skb, struct udphdr *uh) default: pr_debug("ERROR: unknown UDP format\n"); goto err; - break; } pr_debug("uncompressed UDP ports: src = %d, dst = %d\n", @@ -311,11 +306,11 @@ uncompress_udp_header(struct sk_buff *skb, struct udphdr *uh) pr_debug_ratelimited("checksum elided currently not supported\n"); goto err; } else { - fail |= lowpan_fetch_skb(skb, &uh->check, 2); + fail |= lowpan_fetch_skb(skb, &uh->check, + sizeof(uh->check)); } - /* - * UDP lenght needs to be infered from the lower layers + /* UDP length needs to be infered from the lower layers * here, we obtain the hint from the remaining size of the * frame */ @@ -338,21 +333,21 @@ err: static const u8 lowpan_ttl_values[] = { 0, 1, 64, 255 }; int lowpan_process_data(struct sk_buff *skb, struct net_device *dev, - const u8 *saddr, const u8 saddr_type, const u8 saddr_len, - const u8 *daddr, const u8 daddr_type, const u8 daddr_len, - u8 iphc0, u8 iphc1, skb_delivery_cb deliver_skb) + const u8 *saddr, const u8 saddr_type, const u8 saddr_len, + const u8 *daddr, const u8 daddr_type, const u8 daddr_len, + u8 iphc0, u8 iphc1, skb_delivery_cb deliver_skb) { struct ipv6hdr hdr = {}; u8 tmp, num_context = 0; int err; raw_dump_table(__func__, "raw skb data dump uncompressed", - skb->data, skb->len); + skb->data, skb->len); /* another if the CID flag is set */ if (iphc1 & LOWPAN_IPHC_CID) { pr_debug("CID flag is set, increase header with one\n"); - if (lowpan_fetch_skb_u8(skb, &num_context)) + if (lowpan_fetch_skb(skb, &num_context, sizeof(num_context))) goto drop; } @@ -360,12 +355,11 @@ int lowpan_process_data(struct sk_buff *skb, struct net_device *dev, /* Traffic Class and Flow Label */ switch ((iphc0 & LOWPAN_IPHC_TF) >> 3) { - /* - * Traffic Class and FLow Label carried in-line + /* Traffic Class and FLow Label carried in-line * ECN + DSCP + 4-bit Pad + Flow Label (4 bytes) */ case 0: /* 00b */ - if (lowpan_fetch_skb_u8(skb, &tmp)) + if (lowpan_fetch_skb(skb, &tmp, sizeof(tmp))) goto drop; memcpy(&hdr.flow_lbl, &skb->data[0], 3); @@ -374,23 +368,21 @@ int lowpan_process_data(struct sk_buff *skb, struct net_device *dev, hdr.flow_lbl[0] = ((tmp >> 2) & 0x30) | (tmp << 6) | (hdr.flow_lbl[0] & 0x0f); break; - /* - * Traffic class carried in-line + /* Traffic class carried in-line * ECN + DSCP (1 byte), Flow Label is elided */ case 2: /* 10b */ - if (lowpan_fetch_skb_u8(skb, &tmp)) + if (lowpan_fetch_skb(skb, &tmp, sizeof(tmp))) goto drop; hdr.priority = ((tmp >> 2) & 0x0f); hdr.flow_lbl[0] = ((tmp << 6) & 0xC0) | ((tmp >> 2) & 0x30); break; - /* - * Flow Label carried in-line + /* Flow Label carried in-line * ECN + 2-bit Pad + Flow Label (3 bytes), DSCP is elided */ case 1: /* 01b */ - if (lowpan_fetch_skb_u8(skb, &tmp)) + if (lowpan_fetch_skb(skb, &tmp, sizeof(tmp))) goto drop; hdr.flow_lbl[0] = (skb->data[0] & 0x0F) | ((tmp >> 2) & 0x30); @@ -407,7 +399,7 @@ int lowpan_process_data(struct sk_buff *skb, struct net_device *dev, /* Next Header */ if ((iphc0 & LOWPAN_IPHC_NH_C) == 0) { /* Next header is carried inline */ - if (lowpan_fetch_skb_u8(skb, &(hdr.nexthdr))) + if (lowpan_fetch_skb(skb, &hdr.nexthdr, sizeof(hdr.nexthdr))) goto drop; pr_debug("NH flag is set, next header carried inline: %02x\n", @@ -415,10 +407,11 @@ int lowpan_process_data(struct sk_buff *skb, struct net_device *dev, } /* Hop Limit */ - if ((iphc0 & 0x03) != LOWPAN_IPHC_TTL_I) + if ((iphc0 & 0x03) != LOWPAN_IPHC_TTL_I) { hdr.hop_limit = lowpan_ttl_values[iphc0 & 0x03]; - else { - if (lowpan_fetch_skb_u8(skb, &(hdr.hop_limit))) + } else { + if (lowpan_fetch_skb(skb, &hdr.hop_limit, + sizeof(hdr.hop_limit))) goto drop; } @@ -428,13 +421,12 @@ int lowpan_process_data(struct sk_buff *skb, struct net_device *dev, if (iphc1 & LOWPAN_IPHC_SAC) { /* Source address context based uncompression */ pr_debug("SAC bit is set. Handle context based source address.\n"); - err = uncompress_context_based_src_addr( - skb, &hdr.saddr, tmp); + err = uncompress_context_based_src_addr(skb, &hdr.saddr, tmp); } else { /* Source address uncompression */ pr_debug("source address stateless compression\n"); err = uncompress_addr(skb, &hdr.saddr, tmp, saddr, - saddr_type, saddr_len); + saddr_type, saddr_len); } /* Check on error of previous branch */ @@ -450,16 +442,17 @@ int lowpan_process_data(struct sk_buff *skb, struct net_device *dev, pr_debug("dest: context-based mcast compression\n"); /* TODO: implement this */ } else { - err = lowpan_uncompress_multicast_daddr( - skb, &hdr.daddr, tmp); + err = lowpan_uncompress_multicast_daddr(skb, &hdr.daddr, + tmp); + if (err) goto drop; } } else { err = uncompress_addr(skb, &hdr.daddr, tmp, daddr, - daddr_type, daddr_len); + daddr_type, daddr_len); pr_debug("dest: stateless compression mode %d dest %pI6c\n", - tmp, &hdr.daddr); + tmp, &hdr.daddr); if (err) goto drop; } @@ -468,11 +461,11 @@ int lowpan_process_data(struct sk_buff *skb, struct net_device *dev, if (iphc0 & LOWPAN_IPHC_NH_C) { struct udphdr uh; struct sk_buff *new; + if (uncompress_udp_header(skb, &uh)) goto drop; - /* - * replace the compressed UDP head by the uncompressed UDP + /* replace the compressed UDP head by the uncompressed UDP * header */ new = skb_copy_expand(skb, sizeof(struct udphdr), @@ -489,7 +482,7 @@ int lowpan_process_data(struct sk_buff *skb, struct net_device *dev, skb_copy_to_linear_data(skb, &uh, sizeof(struct udphdr)); raw_dump_table(__func__, "raw UDP header dump", - (u8 *)&uh, sizeof(uh)); + (u8 *)&uh, sizeof(uh)); hdr.nexthdr = UIP_PROTO_UDP; } @@ -504,8 +497,7 @@ int lowpan_process_data(struct sk_buff *skb, struct net_device *dev, hdr.version, ntohs(hdr.payload_len), hdr.nexthdr, hdr.hop_limit, &hdr.daddr); - raw_dump_table(__func__, "raw header dump", (u8 *)&hdr, - sizeof(hdr)); + raw_dump_table(__func__, "raw header dump", (u8 *)&hdr, sizeof(hdr)); return skb_deliver(skb, &hdr, dev, deliver_skb); @@ -515,9 +507,9 @@ drop: } EXPORT_SYMBOL_GPL(lowpan_process_data); -static u8 lowpan_compress_addr_64(u8 **hc06_ptr, u8 shift, - const struct in6_addr *ipaddr, - const unsigned char *lladdr) +static u8 lowpan_compress_addr_64(u8 **hc_ptr, u8 shift, + const struct in6_addr *ipaddr, + const unsigned char *lladdr) { u8 val = 0; @@ -526,24 +518,22 @@ static u8 lowpan_compress_addr_64(u8 **hc06_ptr, u8 shift, pr_debug("address compression 0 bits\n"); } else if (lowpan_is_iid_16_bit_compressable(ipaddr)) { /* compress IID to 16 bits xxxx::XXXX */ - memcpy(*hc06_ptr, &ipaddr->s6_addr16[7], 2); - *hc06_ptr += 2; + lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr16[7], 2); val = 2; /* 16-bits */ raw_dump_inline(NULL, "Compressed ipv6 addr is (16 bits)", - *hc06_ptr - 2, 2); + *hc_ptr - 2, 2); } else { /* do not compress IID => xxxx::IID */ - memcpy(*hc06_ptr, &ipaddr->s6_addr16[4], 8); - *hc06_ptr += 8; + lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr16[4], 8); val = 1; /* 64-bits */ raw_dump_inline(NULL, "Compressed ipv6 addr is (64 bits)", - *hc06_ptr - 8, 8); + *hc_ptr - 8, 8); } return rol8(val, shift); } -static void compress_udp_header(u8 **hc06_ptr, struct sk_buff *skb) +static void compress_udp_header(u8 **hc_ptr, struct sk_buff *skb) { struct udphdr *uh = udp_hdr(skb); u8 tmp; @@ -555,75 +545,75 @@ static void compress_udp_header(u8 **hc06_ptr, struct sk_buff *skb) pr_debug("UDP header: both ports compression to 4 bits\n"); /* compression value */ tmp = LOWPAN_NHC_UDP_CS_P_11; - lowpan_push_hc_data(hc06_ptr, &tmp, sizeof(tmp)); + lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); /* source and destination port */ tmp = ntohs(uh->dest) - LOWPAN_NHC_UDP_4BIT_PORT + ((ntohs(uh->source) - LOWPAN_NHC_UDP_4BIT_PORT) << 4); - lowpan_push_hc_data(hc06_ptr, &tmp, sizeof(tmp)); + lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); } else if ((ntohs(uh->dest) & LOWPAN_NHC_UDP_8BIT_MASK) == LOWPAN_NHC_UDP_8BIT_PORT) { pr_debug("UDP header: remove 8 bits of dest\n"); /* compression value */ tmp = LOWPAN_NHC_UDP_CS_P_01; - lowpan_push_hc_data(hc06_ptr, &tmp, sizeof(tmp)); + lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); /* source port */ - lowpan_push_hc_data(hc06_ptr, &uh->source, sizeof(uh->source)); + lowpan_push_hc_data(hc_ptr, &uh->source, sizeof(uh->source)); /* destination port */ tmp = ntohs(uh->dest) - LOWPAN_NHC_UDP_8BIT_PORT; - lowpan_push_hc_data(hc06_ptr, &tmp, sizeof(tmp)); + lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); } else if ((ntohs(uh->source) & LOWPAN_NHC_UDP_8BIT_MASK) == LOWPAN_NHC_UDP_8BIT_PORT) { pr_debug("UDP header: remove 8 bits of source\n"); /* compression value */ tmp = LOWPAN_NHC_UDP_CS_P_10; - lowpan_push_hc_data(hc06_ptr, &tmp, sizeof(tmp)); + lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); /* source port */ tmp = ntohs(uh->source) - LOWPAN_NHC_UDP_8BIT_PORT; - lowpan_push_hc_data(hc06_ptr, &tmp, sizeof(tmp)); + lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); /* destination port */ - lowpan_push_hc_data(hc06_ptr, &uh->dest, sizeof(uh->dest)); + lowpan_push_hc_data(hc_ptr, &uh->dest, sizeof(uh->dest)); } else { pr_debug("UDP header: can't compress\n"); /* compression value */ tmp = LOWPAN_NHC_UDP_CS_P_00; - lowpan_push_hc_data(hc06_ptr, &tmp, sizeof(tmp)); + lowpan_push_hc_data(hc_ptr, &tmp, sizeof(tmp)); /* source port */ - lowpan_push_hc_data(hc06_ptr, &uh->source, sizeof(uh->source)); + lowpan_push_hc_data(hc_ptr, &uh->source, sizeof(uh->source)); /* destination port */ - lowpan_push_hc_data(hc06_ptr, &uh->dest, sizeof(uh->dest)); + lowpan_push_hc_data(hc_ptr, &uh->dest, sizeof(uh->dest)); } /* checksum is always inline */ - lowpan_push_hc_data(hc06_ptr, &uh->check, sizeof(uh->check)); + lowpan_push_hc_data(hc_ptr, &uh->check, sizeof(uh->check)); /* skip the UDP header */ skb_pull(skb, sizeof(struct udphdr)); } int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, - unsigned short type, const void *_daddr, - const void *_saddr, unsigned int len) + unsigned short type, const void *_daddr, + const void *_saddr, unsigned int len) { - u8 tmp, iphc0, iphc1, *hc06_ptr; + u8 tmp, iphc0, iphc1, *hc_ptr; struct ipv6hdr *hdr; u8 head[100] = {}; + int addr_type; if (type != ETH_P_IPV6) return -EINVAL; hdr = ipv6_hdr(skb); - hc06_ptr = head + 2; + hc_ptr = head + 2; pr_debug("IPv6 header dump:\n\tversion = %d\n\tlength = %d\n" "\tnexthdr = 0x%02x\n\thop_lim = %d\n\tdest = %pI6c\n", - hdr->version, ntohs(hdr->payload_len), hdr->nexthdr, - hdr->hop_limit, &hdr->daddr); + hdr->version, ntohs(hdr->payload_len), hdr->nexthdr, + hdr->hop_limit, &hdr->daddr); raw_dump_table(__func__, "raw skb network header dump", - skb_network_header(skb), sizeof(struct ipv6hdr)); + skb_network_header(skb), sizeof(struct ipv6hdr)); - /* - * As we copy some bit-length fields, in the IPHC encoding bytes, + /* As we copy some bit-length fields, in the IPHC encoding bytes, * we sometimes use |= * If the field is 0, and the current bit value in memory is 1, * this does not work. We therefore reset the IPHC encoding here @@ -638,49 +628,47 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, raw_dump_inline(__func__, "daddr", (unsigned char *)_daddr, IEEE802154_ADDR_LEN); - raw_dump_table(__func__, - "sending raw skb network uncompressed packet", - skb->data, skb->len); + raw_dump_table(__func__, "sending raw skb network uncompressed packet", + skb->data, skb->len); - /* - * Traffic class, flow label + /* Traffic class, flow label * If flow label is 0, compress it. If traffic class is 0, compress it * We have to process both in the same time as the offset of traffic * class depends on the presence of version and flow label */ - /* hc06 format of TC is ECN | DSCP , original one is DSCP | ECN */ + /* hc format of TC is ECN | DSCP , original one is DSCP | ECN */ tmp = (hdr->priority << 4) | (hdr->flow_lbl[0] >> 4); tmp = ((tmp & 0x03) << 6) | (tmp >> 2); if (((hdr->flow_lbl[0] & 0x0F) == 0) && - (hdr->flow_lbl[1] == 0) && (hdr->flow_lbl[2] == 0)) { + (hdr->flow_lbl[1] == 0) && (hdr->flow_lbl[2] == 0)) { /* flow label can be compressed */ iphc0 |= LOWPAN_IPHC_FL_C; if ((hdr->priority == 0) && - ((hdr->flow_lbl[0] & 0xF0) == 0)) { + ((hdr->flow_lbl[0] & 0xF0) == 0)) { /* compress (elide) all */ iphc0 |= LOWPAN_IPHC_TC_C; } else { /* compress only the flow label */ - *hc06_ptr = tmp; - hc06_ptr += 1; + *hc_ptr = tmp; + hc_ptr += 1; } } else { /* Flow label cannot be compressed */ if ((hdr->priority == 0) && - ((hdr->flow_lbl[0] & 0xF0) == 0)) { + ((hdr->flow_lbl[0] & 0xF0) == 0)) { /* compress only traffic class */ iphc0 |= LOWPAN_IPHC_TC_C; - *hc06_ptr = (tmp & 0xc0) | (hdr->flow_lbl[0] & 0x0F); - memcpy(hc06_ptr + 1, &hdr->flow_lbl[1], 2); - hc06_ptr += 3; + *hc_ptr = (tmp & 0xc0) | (hdr->flow_lbl[0] & 0x0F); + memcpy(hc_ptr + 1, &hdr->flow_lbl[1], 2); + hc_ptr += 3; } else { /* compress nothing */ - memcpy(hc06_ptr, hdr, 4); + memcpy(hc_ptr, hdr, 4); /* replace the top byte with new ECN | DSCP format */ - *hc06_ptr = tmp; - hc06_ptr += 4; + *hc_ptr = tmp; + hc_ptr += 4; } } @@ -690,13 +678,11 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, if (hdr->nexthdr == UIP_PROTO_UDP) iphc0 |= LOWPAN_IPHC_NH_C; - if ((iphc0 & LOWPAN_IPHC_NH_C) == 0) { - *hc06_ptr = hdr->nexthdr; - hc06_ptr += 1; - } + if ((iphc0 & LOWPAN_IPHC_NH_C) == 0) + lowpan_push_hc_data(&hc_ptr, &hdr->nexthdr, + sizeof(hdr->nexthdr)); - /* - * Hop limit + /* Hop limit * if 1: compress, encoding is 01 * if 64: compress, encoding is 10 * if 255: compress, encoding is 11 @@ -713,87 +699,89 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, iphc0 |= LOWPAN_IPHC_TTL_255; break; default: - *hc06_ptr = hdr->hop_limit; - hc06_ptr += 1; - break; + lowpan_push_hc_data(&hc_ptr, &hdr->hop_limit, + sizeof(hdr->hop_limit)); } + addr_type = ipv6_addr_type(&hdr->saddr); /* source address compression */ - if (is_addr_unspecified(&hdr->saddr)) { + if (addr_type == IPV6_ADDR_ANY) { pr_debug("source address is unspecified, setting SAC\n"); iphc1 |= LOWPAN_IPHC_SAC; - /* TODO: context lookup */ - } else if (is_addr_link_local(&hdr->saddr)) { - iphc1 |= lowpan_compress_addr_64(&hc06_ptr, - LOWPAN_IPHC_SAM_BIT, &hdr->saddr, _saddr); - pr_debug("source address unicast link-local %pI6c " - "iphc1 0x%02x\n", &hdr->saddr, iphc1); } else { - pr_debug("send the full source address\n"); - memcpy(hc06_ptr, &hdr->saddr.s6_addr16[0], 16); - hc06_ptr += 16; + if (addr_type & IPV6_ADDR_LINKLOCAL) { + iphc1 |= lowpan_compress_addr_64(&hc_ptr, + LOWPAN_IPHC_SAM_BIT, + &hdr->saddr, _saddr); + pr_debug("source address unicast link-local %pI6c iphc1 0x%02x\n", + &hdr->saddr, iphc1); + } else { + pr_debug("send the full source address\n"); + lowpan_push_hc_data(&hc_ptr, hdr->saddr.s6_addr, 16); + } } + addr_type = ipv6_addr_type(&hdr->daddr); /* destination address compression */ - if (is_addr_mcast(&hdr->daddr)) { + if (addr_type & IPV6_ADDR_MULTICAST) { pr_debug("destination address is multicast: "); iphc1 |= LOWPAN_IPHC_M; if (lowpan_is_mcast_addr_compressable8(&hdr->daddr)) { pr_debug("compressed to 1 octet\n"); iphc1 |= LOWPAN_IPHC_DAM_11; /* use last byte */ - *hc06_ptr = hdr->daddr.s6_addr[15]; - hc06_ptr += 1; + lowpan_push_hc_data(&hc_ptr, + &hdr->daddr.s6_addr[15], 1); } else if (lowpan_is_mcast_addr_compressable32(&hdr->daddr)) { pr_debug("compressed to 4 octets\n"); iphc1 |= LOWPAN_IPHC_DAM_10; /* second byte + the last three */ - *hc06_ptr = hdr->daddr.s6_addr[1]; - memcpy(hc06_ptr + 1, &hdr->daddr.s6_addr[13], 3); - hc06_ptr += 4; + lowpan_push_hc_data(&hc_ptr, + &hdr->daddr.s6_addr[1], 1); + lowpan_push_hc_data(&hc_ptr, + &hdr->daddr.s6_addr[13], 3); } else if (lowpan_is_mcast_addr_compressable48(&hdr->daddr)) { pr_debug("compressed to 6 octets\n"); iphc1 |= LOWPAN_IPHC_DAM_01; /* second byte + the last five */ - *hc06_ptr = hdr->daddr.s6_addr[1]; - memcpy(hc06_ptr + 1, &hdr->daddr.s6_addr[11], 5); - hc06_ptr += 6; + lowpan_push_hc_data(&hc_ptr, + &hdr->daddr.s6_addr[1], 1); + lowpan_push_hc_data(&hc_ptr, + &hdr->daddr.s6_addr[11], 5); } else { pr_debug("using full address\n"); iphc1 |= LOWPAN_IPHC_DAM_00; - memcpy(hc06_ptr, &hdr->daddr.s6_addr[0], 16); - hc06_ptr += 16; + lowpan_push_hc_data(&hc_ptr, hdr->daddr.s6_addr, 16); } } else { - /* TODO: context lookup */ - if (is_addr_link_local(&hdr->daddr)) { - iphc1 |= lowpan_compress_addr_64(&hc06_ptr, + if (addr_type & IPV6_ADDR_LINKLOCAL) { + /* TODO: context lookup */ + iphc1 |= lowpan_compress_addr_64(&hc_ptr, LOWPAN_IPHC_DAM_BIT, &hdr->daddr, _daddr); pr_debug("dest address unicast link-local %pI6c " - "iphc1 0x%02x\n", &hdr->daddr, iphc1); + "iphc1 0x%02x\n", &hdr->daddr, iphc1); } else { pr_debug("dest address unicast %pI6c\n", &hdr->daddr); - memcpy(hc06_ptr, &hdr->daddr.s6_addr16[0], 16); - hc06_ptr += 16; + lowpan_push_hc_data(&hc_ptr, hdr->daddr.s6_addr, 16); } } /* UDP header compression */ if (hdr->nexthdr == UIP_PROTO_UDP) - compress_udp_header(&hc06_ptr, skb); + compress_udp_header(&hc_ptr, skb); head[0] = iphc0; head[1] = iphc1; skb_pull(skb, sizeof(struct ipv6hdr)); skb_reset_transport_header(skb); - memcpy(skb_push(skb, hc06_ptr - head), head, hc06_ptr - head); + memcpy(skb_push(skb, hc_ptr - head), head, hc_ptr - head); skb_reset_network_header(skb); - pr_debug("header len %d skb %u\n", (int)(hc06_ptr - head), skb->len); + pr_debug("header len %d skb %u\n", (int)(hc_ptr - head), skb->len); raw_dump_table(__func__, "raw skb data dump compressed", - skb->data, skb->len); + skb->data, skb->len); return 0; } EXPORT_SYMBOL_GPL(lowpan_header_compress); diff --git a/net/802/fc.c b/net/802/fc.c index 05eea6b98bb8..7c174b6750cd 100644 --- a/net/802/fc.c +++ b/net/802/fc.c @@ -126,6 +126,6 @@ static void fc_setup(struct net_device *dev) */ struct net_device *alloc_fcdev(int sizeof_priv) { - return alloc_netdev(sizeof_priv, "fc%d", fc_setup); + return alloc_netdev(sizeof_priv, "fc%d", NET_NAME_UNKNOWN, fc_setup); } EXPORT_SYMBOL(alloc_fcdev); diff --git a/net/802/fddi.c b/net/802/fddi.c index 9cda40661e0d..59e7346f1193 100644 --- a/net/802/fddi.c +++ b/net/802/fddi.c @@ -207,7 +207,8 @@ static void fddi_setup(struct net_device *dev) */ struct net_device *alloc_fddidev(int sizeof_priv) { - return alloc_netdev(sizeof_priv, "fddi%d", fddi_setup); + return alloc_netdev(sizeof_priv, "fddi%d", NET_NAME_UNKNOWN, + fddi_setup); } EXPORT_SYMBOL(alloc_fddidev); diff --git a/net/802/hippi.c b/net/802/hippi.c index 5ff2a718ddca..2e03f8259dd5 100644 --- a/net/802/hippi.c +++ b/net/802/hippi.c @@ -228,7 +228,8 @@ static void hippi_setup(struct net_device *dev) struct net_device *alloc_hippi_dev(int sizeof_priv) { - return alloc_netdev(sizeof_priv, "hip%d", hippi_setup); + return alloc_netdev(sizeof_priv, "hip%d", NET_NAME_UNKNOWN, + hippi_setup); } EXPORT_SYMBOL(alloc_hippi_dev); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 175273f38cb1..64c6bed4a3d3 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -169,6 +169,7 @@ int register_vlan_dev(struct net_device *dev) if (err < 0) goto out_uninit_mvrp; + vlan->nest_level = dev_get_nest_level(real_dev, is_vlan_dev) + 1; err = register_netdevice(dev); if (err < 0) goto out_uninit_mvrp; @@ -249,7 +250,8 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id) snprintf(name, IFNAMSIZ, "vlan%.4i", vlan_id); } - new_dev = alloc_netdev(sizeof(struct vlan_dev_priv), name, vlan_setup); + new_dev = alloc_netdev(sizeof(struct vlan_dev_priv), name, + NET_NAME_UNKNOWN, vlan_setup); if (new_dev == NULL) return -ENOBUFS; @@ -323,23 +325,24 @@ static void vlan_transfer_features(struct net_device *dev, netdev_update_features(vlandev); } -static void __vlan_device_event(struct net_device *dev, unsigned long event) +static int __vlan_device_event(struct net_device *dev, unsigned long event) { + int err = 0; + switch (event) { case NETDEV_CHANGENAME: vlan_proc_rem_dev(dev); - if (vlan_proc_add_dev(dev) < 0) - pr_warn("failed to change proc name for %s\n", - dev->name); + err = vlan_proc_add_dev(dev); break; case NETDEV_REGISTER: - if (vlan_proc_add_dev(dev) < 0) - pr_warn("failed to add proc entry for %s\n", dev->name); + err = vlan_proc_add_dev(dev); break; case NETDEV_UNREGISTER: vlan_proc_rem_dev(dev); break; } + + return err; } static int vlan_device_event(struct notifier_block *unused, unsigned long event, @@ -354,8 +357,12 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, bool last = false; LIST_HEAD(list); - if (is_vlan_dev(dev)) - __vlan_device_event(dev, event); + if (is_vlan_dev(dev)) { + int err = __vlan_device_event(dev, event); + + if (err) + return notifier_from_errno(err); + } if ((event == NETDEV_UP) && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) { diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 3c32bd257b73..75d427763992 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -63,7 +63,7 @@ bool vlan_do_receive(struct sk_buff **skbp) } /* Must be invoked with rcu_read_lock. */ -struct net_device *__vlan_find_dev_deep(struct net_device *dev, +struct net_device *__vlan_find_dev_deep_rcu(struct net_device *dev, __be16 vlan_proto, u16 vlan_id) { struct vlan_info *vlan_info = rcu_dereference(dev->vlan_info); @@ -81,13 +81,13 @@ struct net_device *__vlan_find_dev_deep(struct net_device *dev, upper_dev = netdev_master_upper_dev_get_rcu(dev); if (upper_dev) - return __vlan_find_dev_deep(upper_dev, + return __vlan_find_dev_deep_rcu(upper_dev, vlan_proto, vlan_id); } return NULL; } -EXPORT_SYMBOL(__vlan_find_dev_deep); +EXPORT_SYMBOL(__vlan_find_dev_deep_rcu); struct net_device *vlan_dev_real_dev(const struct net_device *dev) { @@ -114,8 +114,11 @@ EXPORT_SYMBOL(vlan_dev_vlan_proto); static struct sk_buff *vlan_reorder_header(struct sk_buff *skb) { - if (skb_cow(skb, skb_headroom(skb)) < 0) + if (skb_cow(skb, skb_headroom(skb)) < 0) { + kfree_skb(skb); return NULL; + } + memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN); skb->mac_header += VLAN_HLEN; return skb; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 733ec283ed1b..35a6b6b15e8a 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -385,6 +385,8 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: + case SIOCSHWTSTAMP: + case SIOCGHWTSTAMP: if (netif_device_present(real_dev) && ops->ndo_do_ioctl) err = ops->ndo_do_ioctl(real_dev, &ifrr, cmd); break; @@ -493,48 +495,10 @@ static void vlan_dev_change_rx_flags(struct net_device *dev, int change) } } -static int vlan_calculate_locking_subclass(struct net_device *real_dev) -{ - int subclass = 0; - - while (is_vlan_dev(real_dev)) { - subclass++; - real_dev = vlan_dev_priv(real_dev)->real_dev; - } - - return subclass; -} - -static void vlan_dev_mc_sync(struct net_device *to, struct net_device *from) -{ - int err = 0, subclass; - - subclass = vlan_calculate_locking_subclass(to); - - spin_lock_nested(&to->addr_list_lock, subclass); - err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len); - if (!err) - __dev_set_rx_mode(to); - spin_unlock(&to->addr_list_lock); -} - -static void vlan_dev_uc_sync(struct net_device *to, struct net_device *from) -{ - int err = 0, subclass; - - subclass = vlan_calculate_locking_subclass(to); - - spin_lock_nested(&to->addr_list_lock, subclass); - err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len); - if (!err) - __dev_set_rx_mode(to); - spin_unlock(&to->addr_list_lock); -} - static void vlan_dev_set_rx_mode(struct net_device *vlan_dev) { - vlan_dev_mc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); - vlan_dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); + dev_mc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); + dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); } /* @@ -562,6 +526,11 @@ static void vlan_dev_set_lockdep_class(struct net_device *dev, int subclass) netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, &subclass); } +static int vlan_dev_get_lock_subclass(struct net_device *dev) +{ + return vlan_dev_priv(dev)->nest_level; +} + static const struct header_ops vlan_header_ops = { .create = vlan_dev_hard_header, .rebuild = vlan_dev_rebuild_header, @@ -597,7 +566,6 @@ static const struct net_device_ops vlan_netdev_ops; static int vlan_dev_init(struct net_device *dev) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; - int subclass = 0; netif_carrier_off(dev); @@ -646,8 +614,7 @@ static int vlan_dev_init(struct net_device *dev) SET_NETDEV_DEVTYPE(dev, &vlan_type); - subclass = vlan_calculate_locking_subclass(dev); - vlan_dev_set_lockdep_class(dev, subclass); + vlan_dev_set_lockdep_class(dev, vlan_dev_get_lock_subclass(dev)); vlan_dev_priv(dev)->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats); if (!vlan_dev_priv(dev)->vlan_pcpu_stats) @@ -662,8 +629,6 @@ static void vlan_dev_uninit(struct net_device *dev) struct vlan_dev_priv *vlan = vlan_dev_priv(dev); int i; - free_percpu(vlan->vlan_pcpu_stats); - vlan->vlan_pcpu_stats = NULL; for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) { while ((pm = vlan->egress_priority_map[i]) != NULL) { vlan->egress_priority_map[i] = pm->next; @@ -678,9 +643,9 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev, struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; netdev_features_t old_features = features; - features &= real_dev->vlan_features; + features = netdev_intersect_features(features, real_dev->vlan_features); features |= NETIF_F_RXCSUM; - features &= real_dev->features; + features = netdev_intersect_features(features, real_dev->features); features |= old_features & NETIF_F_SOFT_FEATURES; features |= NETIF_F_LLTX; @@ -706,38 +671,36 @@ static void vlan_ethtool_get_drvinfo(struct net_device *dev, static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { + struct vlan_pcpu_stats *p; + u32 rx_errors = 0, tx_dropped = 0; + int i; - if (vlan_dev_priv(dev)->vlan_pcpu_stats) { - struct vlan_pcpu_stats *p; - u32 rx_errors = 0, tx_dropped = 0; - int i; - - for_each_possible_cpu(i) { - u64 rxpackets, rxbytes, rxmulticast, txpackets, txbytes; - unsigned int start; - - p = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i); - do { - start = u64_stats_fetch_begin_irq(&p->syncp); - rxpackets = p->rx_packets; - rxbytes = p->rx_bytes; - rxmulticast = p->rx_multicast; - txpackets = p->tx_packets; - txbytes = p->tx_bytes; - } while (u64_stats_fetch_retry_irq(&p->syncp, start)); - - stats->rx_packets += rxpackets; - stats->rx_bytes += rxbytes; - stats->multicast += rxmulticast; - stats->tx_packets += txpackets; - stats->tx_bytes += txbytes; - /* rx_errors & tx_dropped are u32 */ - rx_errors += p->rx_errors; - tx_dropped += p->tx_dropped; - } - stats->rx_errors = rx_errors; - stats->tx_dropped = tx_dropped; + for_each_possible_cpu(i) { + u64 rxpackets, rxbytes, rxmulticast, txpackets, txbytes; + unsigned int start; + + p = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i); + do { + start = u64_stats_fetch_begin_irq(&p->syncp); + rxpackets = p->rx_packets; + rxbytes = p->rx_bytes; + rxmulticast = p->rx_multicast; + txpackets = p->tx_packets; + txbytes = p->tx_bytes; + } while (u64_stats_fetch_retry_irq(&p->syncp, start)); + + stats->rx_packets += rxpackets; + stats->rx_bytes += rxbytes; + stats->multicast += rxmulticast; + stats->tx_packets += txpackets; + stats->tx_bytes += txbytes; + /* rx_errors & tx_dropped are u32 */ + rx_errors += p->rx_errors; + tx_dropped += p->tx_dropped; } + stats->rx_errors = rx_errors; + stats->tx_dropped = tx_dropped; + return stats; } @@ -819,8 +782,18 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_netpoll_cleanup = vlan_dev_netpoll_cleanup, #endif .ndo_fix_features = vlan_dev_fix_features, + .ndo_get_lock_subclass = vlan_dev_get_lock_subclass, }; +static void vlan_dev_free(struct net_device *dev) +{ + struct vlan_dev_priv *vlan = vlan_dev_priv(dev); + + free_percpu(vlan->vlan_pcpu_stats); + vlan->vlan_pcpu_stats = NULL; + free_netdev(dev); +} + void vlan_setup(struct net_device *dev) { ether_setup(dev); @@ -830,7 +803,7 @@ void vlan_setup(struct net_device *dev) dev->tx_queue_len = 0; dev->netdev_ops = &vlan_netdev_ops; - dev->destructor = free_netdev; + dev->destructor = vlan_dev_free; dev->ethtool_ops = &vlan_ethtool_ops; memset(dev->broadcast, 0, ETH_ALEN); diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index 1d0e89213a28..ae63cf72a953 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -171,6 +171,8 @@ int vlan_proc_add_dev(struct net_device *vlandev) struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev); struct vlan_net *vn = net_generic(dev_net(vlandev), vlan_net_id); + if (!strcmp(vlandev->name, name_conf)) + return -EINVAL; vlan->dent = proc_create_data(vlandev->name, S_IFREG|S_IRUSR|S_IWUSR, vn->proc_vlan_dir, &vlandev_fops, vlandev); diff --git a/net/9p/client.c b/net/9p/client.c index 0004cbaac4a4..e86a9bea1d16 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -959,7 +959,6 @@ static int p9_client_version(struct p9_client *c) break; default: return -EINVAL; - break; } if (IS_ERR(req)) diff --git a/net/Kconfig b/net/Kconfig index d92afe4204d9..4051fdfa4367 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -214,6 +214,7 @@ source "drivers/net/appletalk/Kconfig" source "net/x25/Kconfig" source "net/lapb/Kconfig" source "net/phonet/Kconfig" +source "net/6lowpan/Kconfig" source "net/ieee802154/Kconfig" source "net/mac802154/Kconfig" source "net/sched/Kconfig" diff --git a/net/Makefile b/net/Makefile index cbbbe6d657ca..7ed1970074b0 100644 --- a/net/Makefile +++ b/net/Makefile @@ -57,7 +57,8 @@ obj-$(CONFIG_CAIF) += caif/ ifneq ($(CONFIG_DCB),) obj-y += dcb/ endif -obj-y += ieee802154/ +obj-$(CONFIG_6LOWPAN) += 6lowpan/ +obj-$(CONFIG_IEEE802154) += ieee802154/ obj-$(CONFIG_MAC802154) += mac802154/ ifeq ($(CONFIG_NET),y) diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 786ee2f83d5f..c00897f65a31 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1489,8 +1489,6 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, goto drop; /* Queue packet (standard) */ - skb->sk = sock; - if (sock_queue_rcv_skb(sock, skb) < 0) goto drop; @@ -1644,7 +1642,6 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr if (!skb) goto out; - skb->sk = sk; skb_reserve(skb, ddp_dl->header_length); skb_reserve(skb, dev->hard_header_len); skb->dev = dev; @@ -1669,7 +1666,7 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr goto out; } - if (sk->sk_no_check == 1) + if (sk->sk_no_check_tx) ddp->deh_sum = 0; else ddp->deh_sum = atalk_checksum(skb, len + sizeof(*ddp)); @@ -1808,7 +1805,7 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) long amount = 0; if (skb) - amount = skb->len - sizeof(struct ddpehdr); + amount = skb->len - sizeof(struct ddpehdr); rc = put_user(amount, (int __user *)argp); break; } diff --git a/net/appletalk/dev.c b/net/appletalk/dev.c index 6c8016f61866..e4158b8b926d 100644 --- a/net/appletalk/dev.c +++ b/net/appletalk/dev.c @@ -39,6 +39,7 @@ static void ltalk_setup(struct net_device *dev) struct net_device *alloc_ltalkdev(int sizeof_priv) { - return alloc_netdev(sizeof_priv, "lt%d", ltalk_setup); + return alloc_netdev(sizeof_priv, "lt%d", NET_NAME_UNKNOWN, + ltalk_setup); } EXPORT_SYMBOL(alloc_ltalkdev); diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 403e71fa88fe..cc78538d163b 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -682,8 +682,8 @@ static int br2684_create(void __user *arg) netdev = alloc_netdev(sizeof(struct br2684_dev), ni.ifname[0] ? ni.ifname : "nas%d", - (payload == p_routed) ? - br2684_setup_routed : br2684_setup); + NET_NAME_UNKNOWN, + (payload == p_routed) ? br2684_setup_routed : br2684_setup); if (!netdev) return -ENOMEM; diff --git a/net/atm/clip.c b/net/atm/clip.c index ba291ce4bdff..46339040fef0 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -520,7 +520,8 @@ static int clip_create(int number) if (PRIV(dev)->number >= number) number = PRIV(dev)->number + 1; } - dev = alloc_netdev(sizeof(struct clip_priv), "", clip_setup); + dev = alloc_netdev(sizeof(struct clip_priv), "", NET_NAME_UNKNOWN, + clip_setup); if (!dev) return -ENOMEM; clip_priv = PRIV(dev); diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c index 8c93267ce969..c4e09846d1de 100644 --- a/net/atm/pppoatm.c +++ b/net/atm/pppoatm.c @@ -252,7 +252,7 @@ static int pppoatm_may_send(struct pppoatm_vcc *pvcc, int size) * we need to ensure there's a memory barrier after it. The bit * *must* be set before we do the atomic_inc() on pvcc->inflight. * There's no smp_mb__after_set_bit(), so it's this or abuse - * smp_mb__after_clear_bit(). + * smp_mb__after_atomic(). */ test_and_set_bit(BLOCKED, &pvcc->blocked); diff --git a/net/atm/svc.c b/net/atm/svc.c index 1281049c135f..d8e5d0c2ebbc 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -263,17 +263,11 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr, goto out; } } -/* - * Not supported yet - * - * #ifndef CONFIG_SINGLE_SIGITF - */ + vcc->qos.txtp.max_pcr = SELECT_TOP_PCR(vcc->qos.txtp); vcc->qos.txtp.pcr = 0; vcc->qos.txtp.min_pcr = 0; -/* - * #endif - */ + error = vcc_connect(sock, vcc->itf, vcc->vpi, vcc->vci); if (!error) sock->state = SS_CONNECTED; diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index b3bd4ec3fd94..1e8053976e83 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -108,14 +108,15 @@ static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node, int max_if_num) { void *data_ptr; - size_t data_size, old_size; + size_t old_size; int ret = -ENOMEM; spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); - data_size = max_if_num * sizeof(unsigned long) * BATADV_NUM_WORDS; old_size = (max_if_num - 1) * sizeof(unsigned long) * BATADV_NUM_WORDS; - data_ptr = kmalloc(data_size, GFP_ATOMIC); + data_ptr = kmalloc_array(max_if_num, + BATADV_NUM_WORDS * sizeof(unsigned long), + GFP_ATOMIC); if (!data_ptr) goto unlock; @@ -123,7 +124,7 @@ static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node, kfree(orig_node->bat_iv.bcast_own); orig_node->bat_iv.bcast_own = data_ptr; - data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC); + data_ptr = kmalloc_array(max_if_num, sizeof(uint8_t), GFP_ATOMIC); if (!data_ptr) { kfree(orig_node->bat_iv.bcast_own); goto unlock; @@ -164,7 +165,7 @@ static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node, goto free_bcast_own; chunk_size = sizeof(unsigned long) * BATADV_NUM_WORDS; - data_ptr = kmalloc(max_if_num * chunk_size, GFP_ATOMIC); + data_ptr = kmalloc_array(max_if_num, chunk_size, GFP_ATOMIC); if (!data_ptr) goto unlock; @@ -183,7 +184,7 @@ free_bcast_own: if (max_if_num == 0) goto free_own_sum; - data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC); + data_ptr = kmalloc_array(max_if_num, sizeof(uint8_t), GFP_ATOMIC); if (!data_ptr) { kfree(orig_node->bat_iv.bcast_own); goto unlock; @@ -1545,6 +1546,8 @@ out_neigh: if ((orig_neigh_node) && (!is_single_hop_neigh)) batadv_orig_node_free_ref(orig_neigh_node); out: + if (router_ifinfo) + batadv_neigh_ifinfo_free_ref(router_ifinfo); if (router) batadv_neigh_node_free_ref(router); if (router_router) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 6f0d9ec37950..a957c8140721 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -800,11 +800,6 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv, bla_dst = (struct batadv_bla_claim_dst *)hw_dst; bla_dst_own = &bat_priv->bla.claim_dest; - /* check if it is a claim packet in general */ - if (memcmp(bla_dst->magic, bla_dst_own->magic, - sizeof(bla_dst->magic)) != 0) - return 0; - /* if announcement packet, use the source, * otherwise assume it is in the hw_src */ @@ -866,12 +861,13 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, struct sk_buff *skb) { - struct batadv_bla_claim_dst *bla_dst; + struct batadv_bla_claim_dst *bla_dst, *bla_dst_own; uint8_t *hw_src, *hw_dst; - struct vlan_ethhdr *vhdr; + struct vlan_hdr *vhdr, vhdr_buf; struct ethhdr *ethhdr; struct arphdr *arphdr; unsigned short vid; + int vlan_depth = 0; __be16 proto; int headlen; int ret; @@ -882,9 +878,24 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, proto = ethhdr->h_proto; headlen = ETH_HLEN; if (vid & BATADV_VLAN_HAS_TAG) { - vhdr = vlan_eth_hdr(skb); - proto = vhdr->h_vlan_encapsulated_proto; - headlen += VLAN_HLEN; + /* Traverse the VLAN/Ethertypes. + * + * At this point it is known that the first protocol is a VLAN + * header, so start checking at the encapsulated protocol. + * + * The depth of the VLAN headers is recorded to drop BLA claim + * frames encapsulated into multiple VLAN headers (QinQ). + */ + do { + vhdr = skb_header_pointer(skb, headlen, VLAN_HLEN, + &vhdr_buf); + if (!vhdr) + return 0; + + proto = vhdr->h_vlan_encapsulated_proto; + headlen += VLAN_HLEN; + vlan_depth++; + } while (proto == htons(ETH_P_8021Q)); } if (proto != htons(ETH_P_ARP)) @@ -914,6 +925,19 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, hw_src = (uint8_t *)arphdr + sizeof(struct arphdr); hw_dst = hw_src + ETH_ALEN + 4; bla_dst = (struct batadv_bla_claim_dst *)hw_dst; + bla_dst_own = &bat_priv->bla.claim_dest; + + /* check if it is a claim frame in general */ + if (memcmp(bla_dst->magic, bla_dst_own->magic, + sizeof(bla_dst->magic)) != 0) + return 0; + + /* check if there is a claim frame encapsulated deeper in (QinQ) and + * drop that, as this is not supported by BLA but should also not be + * sent via the mesh. + */ + if (vlan_depth > 1) + return 1; /* check if it is a claim frame. */ ret = batadv_check_claim_group(bat_priv, primary_if, hw_src, hw_dst, diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index b758881be108..a12e25efaf6f 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -245,6 +245,7 @@ static int batadv_algorithms_open(struct inode *inode, struct file *file) static int batadv_originators_open(struct inode *inode, struct file *file) { struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, batadv_orig_seq_print_text, net_dev); } @@ -258,18 +259,21 @@ static int batadv_originators_hardif_open(struct inode *inode, struct file *file) { struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, batadv_orig_hardif_seq_print_text, net_dev); } static int batadv_gateways_open(struct inode *inode, struct file *file) { struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, batadv_gw_client_seq_print_text, net_dev); } static int batadv_transtable_global_open(struct inode *inode, struct file *file) { struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, batadv_tt_global_seq_print_text, net_dev); } @@ -277,6 +281,7 @@ static int batadv_transtable_global_open(struct inode *inode, struct file *file) static int batadv_bla_claim_table_open(struct inode *inode, struct file *file) { struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, batadv_bla_claim_table_seq_print_text, net_dev); } @@ -285,6 +290,7 @@ static int batadv_bla_backbone_table_open(struct inode *inode, struct file *file) { struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, batadv_bla_backbone_table_seq_print_text, net_dev); } @@ -300,6 +306,7 @@ static int batadv_bla_backbone_table_open(struct inode *inode, static int batadv_dat_cache_open(struct inode *inode, struct file *file) { struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, batadv_dat_cache_seq_print_text, net_dev); } #endif @@ -307,6 +314,7 @@ static int batadv_dat_cache_open(struct inode *inode, struct file *file) static int batadv_transtable_local_open(struct inode *inode, struct file *file) { struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, batadv_tt_local_seq_print_text, net_dev); } @@ -319,6 +327,7 @@ struct batadv_debuginfo { static int batadv_nc_nodes_open(struct inode *inode, struct file *file) { struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, batadv_nc_nodes_seq_print_text, net_dev); } #endif @@ -333,7 +342,7 @@ struct batadv_debuginfo batadv_debuginfo_##_name = { \ .llseek = seq_lseek, \ .release = single_release, \ } \ -}; +} /* the following attributes are general and therefore they will be directly * placed in the BATADV_DEBUGFS_SUBDIR subdirectory of debugfs @@ -395,7 +404,7 @@ struct batadv_debuginfo batadv_hardif_debuginfo_##_name = { \ .llseek = seq_lseek, \ .release = single_release, \ }, \ -}; +} static BATADV_HARDIF_DEBUGINFO(originators, S_IRUGO, batadv_originators_hardif_open); diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index b25fd64d727b..b5981113c9a7 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -537,7 +537,8 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) if (!bat_priv->orig_hash) return NULL; - res = kmalloc(BATADV_DAT_CANDIDATES_NUM * sizeof(*res), GFP_ATOMIC); + res = kmalloc_array(BATADV_DAT_CANDIDATES_NUM, sizeof(*res), + GFP_ATOMIC); if (!res) return NULL; @@ -594,7 +595,7 @@ static bool batadv_dat_send_data(struct batadv_priv *bat_priv, if (!neigh_node) goto free_orig; - tmp_skb = pskb_copy(skb, GFP_ATOMIC); + tmp_skb = pskb_copy_for_clone(skb, GFP_ATOMIC); if (!batadv_send_skb_prepare_unicast_4addr(bat_priv, tmp_skb, cand[i].orig_node, packet_subtype)) { @@ -662,6 +663,7 @@ static void batadv_dat_tvlv_container_update(struct batadv_priv *bat_priv) void batadv_dat_status_update(struct net_device *net_dev) { struct batadv_priv *bat_priv = netdev_priv(net_dev); + batadv_dat_tvlv_container_update(bat_priv); } @@ -940,8 +942,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, * additional DAT answer may trigger kernel warnings about * a packet coming from the wrong port. */ - if (batadv_is_my_client(bat_priv, dat_entry->mac_addr, - BATADV_NO_FLAGS)) { + if (batadv_is_my_client(bat_priv, dat_entry->mac_addr, vid)) { ret = true; goto out; } diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index bcc4bea632fa..022d18ab27a6 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -128,6 +128,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node, { struct batadv_frag_table_entry *chain; struct batadv_frag_list_entry *frag_entry_new = NULL, *frag_entry_curr; + struct batadv_frag_list_entry *frag_entry_last = NULL; struct batadv_frag_packet *frag_packet; uint8_t bucket; uint16_t seqno, hdr_size = sizeof(struct batadv_frag_packet); @@ -180,11 +181,14 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node, ret = true; goto out; } + + /* store current entry because it could be the last in list */ + frag_entry_last = frag_entry_curr; } - /* Reached the end of the list, so insert after 'frag_entry_curr'. */ - if (likely(frag_entry_curr)) { - hlist_add_after(&frag_entry_curr->list, &frag_entry_new->list); + /* Reached the end of the list, so insert after 'frag_entry_last'. */ + if (likely(frag_entry_last)) { + hlist_add_after(&frag_entry_last->list, &frag_entry_new->list); chain->size += skb->len - hdr_size; chain->timestamp = jiffies; ret = true; @@ -418,12 +422,13 @@ bool batadv_frag_send_packet(struct sk_buff *skb, struct batadv_neigh_node *neigh_node) { struct batadv_priv *bat_priv; - struct batadv_hard_iface *primary_if; + struct batadv_hard_iface *primary_if = NULL; struct batadv_frag_packet frag_header; struct sk_buff *skb_fragment; unsigned mtu = neigh_node->if_incoming->net_dev->mtu; unsigned header_size = sizeof(frag_header); unsigned max_fragment_size, max_packet_size; + bool ret = false; /* To avoid merge and refragmentation at next-hops we never send * fragments larger than BATADV_FRAG_MAX_FRAG_SIZE @@ -483,7 +488,11 @@ bool batadv_frag_send_packet(struct sk_buff *skb, skb->len + ETH_HLEN); batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); - return true; + ret = true; + out_err: - return false; + if (primary_if) + batadv_hardif_free_ref(primary_if); + + return ret; } diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index c835e137423b..90cff585b37d 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -42,8 +42,10 @@ static void batadv_gw_node_free_ref(struct batadv_gw_node *gw_node) { - if (atomic_dec_and_test(&gw_node->refcount)) + if (atomic_dec_and_test(&gw_node->refcount)) { + batadv_orig_node_free_ref(gw_node->orig_node); kfree_rcu(gw_node, rcu); + } } static struct batadv_gw_node * @@ -406,9 +408,14 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, if (gateway->bandwidth_down == 0) return; + if (!atomic_inc_not_zero(&orig_node->refcount)) + return; + gw_node = kzalloc(sizeof(*gw_node), GFP_ATOMIC); - if (!gw_node) + if (!gw_node) { + batadv_orig_node_free_ref(orig_node); return; + } INIT_HLIST_NODE(&gw_node->list); gw_node->orig_node = orig_node; diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index b851cc580853..fbda6b54baff 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -83,7 +83,7 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) return true; /* no more parents..stop recursion */ - if (net_dev->iflink == net_dev->ifindex) + if (net_dev->iflink == 0 || net_dev->iflink == net_dev->ifindex) return false; /* recurse over the parent device */ diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c index 63bdf7e94f1e..7c1c63080e20 100644 --- a/net/batman-adv/hash.c +++ b/net/batman-adv/hash.c @@ -46,12 +46,12 @@ struct batadv_hashtable *batadv_hash_new(uint32_t size) if (!hash) return NULL; - hash->table = kmalloc(sizeof(*hash->table) * size, GFP_ATOMIC); + hash->table = kmalloc_array(size, sizeof(*hash->table), GFP_ATOMIC); if (!hash->table) goto free_hash; - hash->list_locks = kmalloc(sizeof(*hash->list_locks) * size, - GFP_ATOMIC); + hash->list_locks = kmalloc_array(size, sizeof(*hash->list_locks), + GFP_ATOMIC); if (!hash->list_locks) goto free_table; diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 770dc890ceef..a1fcd884f0b1 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -24,7 +24,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2014.2.0" +#define BATADV_SOURCE_VERSION "2014.4.0" #endif /* B.A.T.M.A.N. parameters */ @@ -238,21 +238,29 @@ enum batadv_dbg_level { int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...) __printf(2, 3); -#define batadv_dbg(type, bat_priv, fmt, arg...) \ +/* possibly ratelimited debug output */ +#define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...) \ do { \ - if (atomic_read(&bat_priv->log_level) & type) \ + if (atomic_read(&bat_priv->log_level) & type && \ + (!ratelimited || net_ratelimit())) \ batadv_debug_log(bat_priv, fmt, ## arg);\ } \ while (0) #else /* !CONFIG_BATMAN_ADV_DEBUG */ -__printf(3, 4) -static inline void batadv_dbg(int type __always_unused, - struct batadv_priv *bat_priv __always_unused, - const char *fmt __always_unused, ...) +__printf(4, 5) +static inline void _batadv_dbg(int type __always_unused, + struct batadv_priv *bat_priv __always_unused, + int ratelimited __always_unused, + const char *fmt __always_unused, ...) { } #endif +#define batadv_dbg(type, bat_priv, arg...) \ + _batadv_dbg(type, bat_priv, 0, ## arg) +#define batadv_dbg_ratelimited(type, bat_priv, arg...) \ + _batadv_dbg(type, bat_priv, 1, ## arg) + #define batadv_info(net_dev, fmt, arg...) \ do { \ struct net_device *_netdev = (net_dev); \ diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 8c7ca811de6e..96b66fd30f96 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -415,7 +415,7 @@ batadv_mcast_forw_ipv4_node_get(struct batadv_priv *bat_priv) hlist_for_each_entry_rcu(tmp_orig_node, &bat_priv->mcast.want_all_ipv4_list, mcast_want_all_ipv4_node) { - if (!atomic_inc_not_zero(&orig_node->refcount)) + if (!atomic_inc_not_zero(&tmp_orig_node->refcount)) continue; orig_node = tmp_orig_node; @@ -442,7 +442,7 @@ batadv_mcast_forw_ipv6_node_get(struct batadv_priv *bat_priv) hlist_for_each_entry_rcu(tmp_orig_node, &bat_priv->mcast.want_all_ipv6_list, mcast_want_all_ipv6_node) { - if (!atomic_inc_not_zero(&orig_node->refcount)) + if (!atomic_inc_not_zero(&tmp_orig_node->refcount)) continue; orig_node = tmp_orig_node; @@ -493,7 +493,7 @@ batadv_mcast_forw_unsnoop_node_get(struct batadv_priv *bat_priv) hlist_for_each_entry_rcu(tmp_orig_node, &bat_priv->mcast.want_all_unsnoopables_list, mcast_want_all_unsnoopables_node) { - if (!atomic_inc_not_zero(&orig_node->refcount)) + if (!atomic_inc_not_zero(&tmp_orig_node->refcount)) continue; orig_node = tmp_orig_node; diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index a9546fe541eb..8d04d174669e 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -86,6 +86,7 @@ static void batadv_nc_tvlv_container_update(struct batadv_priv *bat_priv) void batadv_nc_status_update(struct net_device *net_dev) { struct batadv_priv *bat_priv = netdev_priv(net_dev); + batadv_nc_tvlv_container_update(bat_priv); } @@ -1343,7 +1344,7 @@ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv, struct ethhdr *ethhdr; /* Copy skb header to change the mac header */ - skb = pskb_copy(skb, GFP_ATOMIC); + skb = pskb_copy_for_clone(skb, GFP_ATOMIC); if (!skb) return; diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index ffd9dfbd9b0e..6a484514cd3e 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -501,12 +501,17 @@ batadv_neigh_node_get(const struct batadv_orig_node *orig_node, static void batadv_orig_ifinfo_free_rcu(struct rcu_head *rcu) { struct batadv_orig_ifinfo *orig_ifinfo; + struct batadv_neigh_node *router; orig_ifinfo = container_of(rcu, struct batadv_orig_ifinfo, rcu); if (orig_ifinfo->if_outgoing != BATADV_IF_DEFAULT) batadv_hardif_free_ref_now(orig_ifinfo->if_outgoing); + /* this is the last reference to this object */ + router = rcu_dereference_protected(orig_ifinfo->router, true); + if (router) + batadv_neigh_node_free_ref_now(router); kfree(orig_ifinfo); } @@ -702,6 +707,47 @@ free_orig_node: } /** + * batadv_purge_neigh_ifinfo - purge obsolete ifinfo entries from neighbor + * @bat_priv: the bat priv with all the soft interface information + * @neigh: orig node which is to be checked + */ +static void +batadv_purge_neigh_ifinfo(struct batadv_priv *bat_priv, + struct batadv_neigh_node *neigh) +{ + struct batadv_neigh_ifinfo *neigh_ifinfo; + struct batadv_hard_iface *if_outgoing; + struct hlist_node *node_tmp; + + spin_lock_bh(&neigh->ifinfo_lock); + + /* for all ifinfo objects for this neighinator */ + hlist_for_each_entry_safe(neigh_ifinfo, node_tmp, + &neigh->ifinfo_list, list) { + if_outgoing = neigh_ifinfo->if_outgoing; + + /* always keep the default interface */ + if (if_outgoing == BATADV_IF_DEFAULT) + continue; + + /* don't purge if the interface is not (going) down */ + if ((if_outgoing->if_status != BATADV_IF_INACTIVE) && + (if_outgoing->if_status != BATADV_IF_NOT_IN_USE) && + (if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED)) + continue; + + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "neighbor/ifinfo purge: neighbor %pM, iface: %s\n", + neigh->addr, if_outgoing->net_dev->name); + + hlist_del_rcu(&neigh_ifinfo->list); + batadv_neigh_ifinfo_free_ref(neigh_ifinfo); + } + + spin_unlock_bh(&neigh->ifinfo_lock); +} + +/** * batadv_purge_orig_ifinfo - purge obsolete ifinfo entries from originator * @bat_priv: the bat priv with all the soft interface information * @orig_node: orig node which is to be checked @@ -800,6 +846,11 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, hlist_del_rcu(&neigh_node->list); batadv_neigh_node_free_ref(neigh_node); + } else { + /* only necessary if not the whole neighbor is to be + * deleted, but some interface has been removed. + */ + batadv_purge_neigh_ifinfo(bat_priv, neigh_node); } } @@ -857,7 +908,7 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv, { struct batadv_neigh_node *best_neigh_node; struct batadv_hard_iface *hard_iface; - bool changed; + bool changed_ifinfo, changed_neigh; if (batadv_has_timed_out(orig_node->last_seen, 2 * BATADV_PURGE_TIMEOUT)) { @@ -867,10 +918,10 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv, jiffies_to_msecs(orig_node->last_seen)); return true; } - changed = batadv_purge_orig_ifinfo(bat_priv, orig_node); - changed = changed || batadv_purge_orig_neighbors(bat_priv, orig_node); + changed_ifinfo = batadv_purge_orig_ifinfo(bat_priv, orig_node); + changed_neigh = batadv_purge_orig_neighbors(bat_priv, orig_node); - if (!changed) + if (!changed_ifinfo && !changed_neigh) return false; /* first for NULL ... */ @@ -1028,7 +1079,8 @@ int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset) bat_priv->bat_algo_ops->bat_orig_print(bat_priv, seq, hard_iface); out: - batadv_hardif_free_ref(hard_iface); + if (hard_iface) + batadv_hardif_free_ref(hard_iface); return 0; } diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 35141534938e..35f76f2f7824 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -706,11 +706,11 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, if (batadv_tt_local_client_is_roaming(bat_priv, ethhdr->h_dest, vid)) { if (batadv_reroute_unicast_packet(bat_priv, unicast_packet, ethhdr->h_dest, vid)) - net_ratelimited_function(batadv_dbg, BATADV_DBG_TT, - bat_priv, - "Rerouting unicast packet to %pM (dst=%pM): Local Roaming\n", - unicast_packet->dest, - ethhdr->h_dest); + batadv_dbg_ratelimited(BATADV_DBG_TT, + bat_priv, + "Rerouting unicast packet to %pM (dst=%pM): Local Roaming\n", + unicast_packet->dest, + ethhdr->h_dest); /* at this point the mesh destination should have been * substituted with the originator address found in the global * table. If not, let the packet go untouched anyway because @@ -752,10 +752,10 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, */ if (batadv_reroute_unicast_packet(bat_priv, unicast_packet, ethhdr->h_dest, vid)) { - net_ratelimited_function(batadv_dbg, BATADV_DBG_TT, bat_priv, - "Rerouting unicast packet to %pM (dst=%pM): TTVN mismatch old_ttvn=%u new_ttvn=%u\n", - unicast_packet->dest, ethhdr->h_dest, - old_ttvn, curr_ttvn); + batadv_dbg_ratelimited(BATADV_DBG_TT, bat_priv, + "Rerouting unicast packet to %pM (dst=%pM): TTVN mismatch old_ttvn=%u new_ttvn=%u\n", + unicast_packet->dest, ethhdr->h_dest, + old_ttvn, curr_ttvn); return 1; } diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 744a59b85e15..5467955eb27c 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -448,10 +448,15 @@ out: * possibly free it * @softif_vlan: the vlan object to release */ -void batadv_softif_vlan_free_ref(struct batadv_softif_vlan *softif_vlan) +void batadv_softif_vlan_free_ref(struct batadv_softif_vlan *vlan) { - if (atomic_dec_and_test(&softif_vlan->refcount)) - kfree_rcu(softif_vlan, rcu); + if (atomic_dec_and_test(&vlan->refcount)) { + spin_lock_bh(&vlan->bat_priv->softif_vlan_list_lock); + hlist_del_rcu(&vlan->list); + spin_unlock_bh(&vlan->bat_priv->softif_vlan_list_lock); + + kfree_rcu(vlan, rcu); + } } /** @@ -505,6 +510,7 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) if (!vlan) return -ENOMEM; + vlan->bat_priv = bat_priv; vlan->vid = vid; atomic_set(&vlan->refcount, 1); @@ -516,6 +522,10 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) return err; } + spin_lock_bh(&bat_priv->softif_vlan_list_lock); + hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list); + spin_unlock_bh(&bat_priv->softif_vlan_list_lock); + /* add a new TT local entry. This one will be marked with the NOPURGE * flag */ @@ -523,10 +533,6 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) bat_priv->soft_iface->dev_addr, vid, BATADV_NULL_IFINDEX, BATADV_NO_MARK); - spin_lock_bh(&bat_priv->softif_vlan_list_lock); - hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list); - spin_unlock_bh(&bat_priv->softif_vlan_list_lock); - return 0; } @@ -538,18 +544,13 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) static void batadv_softif_destroy_vlan(struct batadv_priv *bat_priv, struct batadv_softif_vlan *vlan) { - spin_lock_bh(&bat_priv->softif_vlan_list_lock); - hlist_del_rcu(&vlan->list); - spin_unlock_bh(&bat_priv->softif_vlan_list_lock); - - batadv_sysfs_del_vlan(bat_priv, vlan); - /* explicitly remove the associated TT local entry because it is marked * with the NOPURGE flag */ batadv_tt_local_remove(bat_priv, bat_priv->soft_iface->dev_addr, vlan->vid, "vlan interface destroyed", false); + batadv_sysfs_del_vlan(bat_priv, vlan); batadv_softif_vlan_free_ref(vlan); } @@ -567,6 +568,8 @@ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto, unsigned short vid) { struct batadv_priv *bat_priv = netdev_priv(dev); + struct batadv_softif_vlan *vlan; + int ret; /* only 802.1Q vlans are supported. * batman-adv does not know how to handle other types @@ -576,7 +579,36 @@ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto, vid |= BATADV_VLAN_HAS_TAG; - return batadv_softif_create_vlan(bat_priv, vid); + /* if a new vlan is getting created and it already exists, it means that + * it was not deleted yet. batadv_softif_vlan_get() increases the + * refcount in order to revive the object. + * + * if it does not exist then create it. + */ + vlan = batadv_softif_vlan_get(bat_priv, vid); + if (!vlan) + return batadv_softif_create_vlan(bat_priv, vid); + + /* recreate the sysfs object if it was already destroyed (and it should + * be since we received a kill_vid() for this vlan + */ + if (!vlan->kobj) { + ret = batadv_sysfs_add_vlan(bat_priv->soft_iface, vlan); + if (ret) { + batadv_softif_vlan_free_ref(vlan); + return ret; + } + } + + /* add a new TT local entry. This one will be marked with the NOPURGE + * flag. This must be added again, even if the vlan object already + * exists, because the entry was deleted by kill_vid() + */ + batadv_tt_local_add(bat_priv->soft_iface, + bat_priv->soft_iface->dev_addr, vid, + BATADV_NULL_IFINDEX, BATADV_NO_MARK); + + return 0; } /** @@ -719,7 +751,7 @@ static int batadv_softif_init_late(struct net_device *dev) atomic_set(&bat_priv->gw.bandwidth_down, 100); atomic_set(&bat_priv->gw.bandwidth_up, 20); atomic_set(&bat_priv->orig_interval, 1000); - atomic_set(&bat_priv->hop_penalty, 15); + atomic_set(&bat_priv->hop_penalty, 30); #ifdef CONFIG_BATMAN_ADV_DEBUG atomic_set(&bat_priv->log_level, 0); #endif @@ -884,7 +916,7 @@ static void batadv_softif_init_early(struct net_device *dev) /* generate random address */ eth_hw_addr_random(dev); - SET_ETHTOOL_OPS(dev, &batadv_ethtool_ops); + dev->ethtool_ops = &batadv_ethtool_ops; memset(priv, 0, sizeof(*priv)); } @@ -895,7 +927,7 @@ struct net_device *batadv_softif_create(const char *name) int ret; soft_iface = alloc_netdev(sizeof(struct batadv_priv), name, - batadv_softif_init_early); + NET_NAME_UNKNOWN, batadv_softif_init_early); if (!soft_iface) return NULL; diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 1ebb0d9e2ea5..f40cb0436eba 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -29,12 +29,14 @@ static struct net_device *batadv_kobj_to_netdev(struct kobject *obj) { struct device *dev = container_of(obj->parent, struct device, kobj); + return to_net_dev(dev); } static struct batadv_priv *batadv_kobj_to_batpriv(struct kobject *obj) { struct net_device *net_dev = batadv_kobj_to_netdev(obj); + return netdev_priv(net_dev); } @@ -106,7 +108,7 @@ struct batadv_attribute batadv_attr_vlan_##_name = { \ .mode = _mode }, \ .show = _show, \ .store = _store, \ -}; +} /* Use this, if you have customized show and store functions */ #define BATADV_ATTR(_name, _mode, _show, _store) \ @@ -115,7 +117,7 @@ struct batadv_attribute batadv_attr_##_name = { \ .mode = _mode }, \ .show = _show, \ .store = _store, \ -}; +} #define BATADV_ATTR_SIF_STORE_BOOL(_name, _post_func) \ ssize_t batadv_store_##_name(struct kobject *kobj, \ @@ -124,6 +126,7 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \ { \ struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \ struct batadv_priv *bat_priv = netdev_priv(net_dev); \ + \ return __batadv_store_bool_attr(buff, count, _post_func, attr, \ &bat_priv->_name, net_dev); \ } @@ -133,6 +136,7 @@ ssize_t batadv_show_##_name(struct kobject *kobj, \ struct attribute *attr, char *buff) \ { \ struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); \ + \ return sprintf(buff, "%s\n", \ atomic_read(&bat_priv->_name) == 0 ? \ "disabled" : "enabled"); \ @@ -155,6 +159,7 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \ { \ struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \ struct batadv_priv *bat_priv = netdev_priv(net_dev); \ + \ return __batadv_store_uint_attr(buff, count, _min, _max, \ _post_func, attr, \ &bat_priv->_name, net_dev); \ @@ -165,6 +170,7 @@ ssize_t batadv_show_##_name(struct kobject *kobj, \ struct attribute *attr, char *buff) \ { \ struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); \ + \ return sprintf(buff, "%i\n", atomic_read(&bat_priv->_name)); \ } \ @@ -188,6 +194,7 @@ ssize_t batadv_store_vlan_##_name(struct kobject *kobj, \ size_t res = __batadv_store_bool_attr(buff, count, _post_func, \ attr, &vlan->_name, \ bat_priv->soft_iface); \ + \ batadv_softif_vlan_free_ref(vlan); \ return res; \ } @@ -202,6 +209,7 @@ ssize_t batadv_show_vlan_##_name(struct kobject *kobj, \ size_t res = sprintf(buff, "%s\n", \ atomic_read(&vlan->_name) == 0 ? \ "disabled" : "enabled"); \ + \ batadv_softif_vlan_free_ref(vlan); \ return res; \ } @@ -324,12 +332,14 @@ static ssize_t batadv_show_bat_algo(struct kobject *kobj, struct attribute *attr, char *buff) { struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); + return sprintf(buff, "%s\n", bat_priv->bat_algo_ops->name); } static void batadv_post_gw_reselect(struct net_device *net_dev) { struct batadv_priv *bat_priv = netdev_priv(net_dev); + batadv_gw_reselect(bat_priv); } @@ -890,32 +900,24 @@ int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type, bat_kobj = &bat_priv->soft_iface->dev.kobj; - uevent_env[0] = kmalloc(strlen(BATADV_UEV_TYPE_VAR) + - strlen(batadv_uev_type_str[type]) + 1, - GFP_ATOMIC); + uevent_env[0] = kasprintf(GFP_ATOMIC, + "%s%s", BATADV_UEV_TYPE_VAR, + batadv_uev_type_str[type]); if (!uevent_env[0]) goto out; - sprintf(uevent_env[0], "%s%s", BATADV_UEV_TYPE_VAR, - batadv_uev_type_str[type]); - - uevent_env[1] = kmalloc(strlen(BATADV_UEV_ACTION_VAR) + - strlen(batadv_uev_action_str[action]) + 1, - GFP_ATOMIC); + uevent_env[1] = kasprintf(GFP_ATOMIC, + "%s%s", BATADV_UEV_ACTION_VAR, + batadv_uev_action_str[action]); if (!uevent_env[1]) goto out; - sprintf(uevent_env[1], "%s%s", BATADV_UEV_ACTION_VAR, - batadv_uev_action_str[action]); - /* If the event is DEL, ignore the data field */ if (action != BATADV_UEV_DEL) { - uevent_env[2] = kmalloc(strlen(BATADV_UEV_DATA_VAR) + - strlen(data) + 1, GFP_ATOMIC); + uevent_env[2] = kasprintf(GFP_ATOMIC, + "%s%s", BATADV_UEV_DATA_VAR, data); if (!uevent_env[2]) goto out; - - sprintf(uevent_env[2], "%s%s", BATADV_UEV_DATA_VAR, data); } ret = kobject_uevent_env(bat_kobj, KOBJ_CHANGE, uevent_env); diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index d636bde72c9a..5f59e7f899a0 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -511,6 +511,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, struct batadv_priv *bat_priv = netdev_priv(soft_iface); struct batadv_tt_local_entry *tt_local; struct batadv_tt_global_entry *tt_global = NULL; + struct batadv_softif_vlan *vlan; struct net_device *in_dev = NULL; struct hlist_head *head; struct batadv_tt_orig_list_entry *orig_entry; @@ -572,6 +573,9 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, if (!tt_local) goto out; + /* increase the refcounter of the related vlan */ + vlan = batadv_softif_vlan_get(bat_priv, vid); + batadv_dbg(BATADV_DBG_TT, bat_priv, "Creating new local tt entry: %pM (vid: %d, ttvn: %d)\n", addr, BATADV_PRINT_VID(vid), @@ -604,6 +608,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, if (unlikely(hash_added != 0)) { /* remove the reference for the hash */ batadv_tt_local_entry_free_ref(tt_local); + batadv_softif_vlan_free_ref(vlan); goto out; } @@ -1009,6 +1014,7 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv, { struct batadv_tt_local_entry *tt_local_entry; uint16_t flags, curr_flags = BATADV_NO_FLAGS; + struct batadv_softif_vlan *vlan; tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid); if (!tt_local_entry) @@ -1039,6 +1045,11 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv, hlist_del_rcu(&tt_local_entry->common.hash_entry); batadv_tt_local_entry_free_ref(tt_local_entry); + /* decrease the reference held for this vlan */ + vlan = batadv_softif_vlan_get(bat_priv, vid); + batadv_softif_vlan_free_ref(vlan); + batadv_softif_vlan_free_ref(vlan); + out: if (tt_local_entry) batadv_tt_local_entry_free_ref(tt_local_entry); @@ -1111,6 +1122,7 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv) spinlock_t *list_lock; /* protects write access to the hash lists */ struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_local_entry *tt_local; + struct batadv_softif_vlan *vlan; struct hlist_node *node_tmp; struct hlist_head *head; uint32_t i; @@ -1131,6 +1143,13 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv) tt_local = container_of(tt_common_entry, struct batadv_tt_local_entry, common); + + /* decrease the reference held for this vlan */ + vlan = batadv_softif_vlan_get(bat_priv, + tt_common_entry->vid); + batadv_softif_vlan_free_ref(vlan); + batadv_softif_vlan_free_ref(vlan); + batadv_tt_local_entry_free_ref(tt_local); } spin_unlock_bh(list_lock); @@ -3139,6 +3158,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) struct batadv_hashtable *hash = bat_priv->tt.local_hash; struct batadv_tt_common_entry *tt_common; struct batadv_tt_local_entry *tt_local; + struct batadv_softif_vlan *vlan; struct hlist_node *node_tmp; struct hlist_head *head; spinlock_t *list_lock; /* protects write access to the hash lists */ @@ -3167,6 +3187,12 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) tt_local = container_of(tt_common, struct batadv_tt_local_entry, common); + + /* decrease the reference held for this vlan */ + vlan = batadv_softif_vlan_get(bat_priv, tt_common->vid); + batadv_softif_vlan_free_ref(vlan); + batadv_softif_vlan_free_ref(vlan); + batadv_tt_local_entry_free_ref(tt_local); } spin_unlock_bh(list_lock); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 34891a56773f..8854c05622a9 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -687,6 +687,7 @@ struct batadv_priv_nc { /** * struct batadv_softif_vlan - per VLAN attributes set + * @bat_priv: pointer to the mesh object * @vid: VLAN identifier * @kobj: kobject for sysfs vlan subdirectory * @ap_isolation: AP isolation state @@ -696,6 +697,7 @@ struct batadv_priv_nc { * @rcu: struct used for freeing in a RCU-safe manner */ struct batadv_softif_vlan { + struct batadv_priv *bat_priv; unsigned short vid; struct kobject *kobj; atomic_t ap_isolation; /* boolean */ diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 73492b91105a..206b65ccd5b8 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -1,5 +1,5 @@ /* - Copyright (c) 2013 Intel Corp. + Copyright (c) 2013-2014 Intel Corp. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 and @@ -14,6 +14,8 @@ #include <linux/if_arp.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> +#include <linux/module.h> +#include <linux/debugfs.h> #include <net/ipv6.h> #include <net/ip6_route.h> @@ -25,16 +27,20 @@ #include <net/bluetooth/hci_core.h> #include <net/bluetooth/l2cap.h> -#include "6lowpan.h" - #include <net/6lowpan.h> /* for the compression support */ +#define VERSION "0.1" + +static struct dentry *lowpan_psm_debugfs; +static struct dentry *lowpan_control_debugfs; + #define IFACE_NAME_TEMPLATE "bt%d" #define EUI64_ADDR_LEN 8 struct skb_cb { struct in6_addr addr; - struct l2cap_conn *conn; + struct l2cap_chan *chan; + int status; }; #define lowpan_cb(skb) ((struct skb_cb *)((skb)->cb)) @@ -48,9 +54,19 @@ struct skb_cb { static LIST_HEAD(bt_6lowpan_devices); static DEFINE_RWLOCK(devices_lock); +/* If psm is set to 0 (default value), then 6lowpan is disabled. + * Other values are used to indicate a Protocol Service Multiplexer + * value for 6lowpan. + */ +static u16 psm_6lowpan; + +/* We are listening incoming connections via this channel + */ +static struct l2cap_chan *listen_chan; + struct lowpan_peer { struct list_head list; - struct l2cap_conn *conn; + struct l2cap_chan *chan; /* peer addresses in various formats */ unsigned char eui64_addr[EUI64_ADDR_LEN]; @@ -84,6 +100,8 @@ static inline bool peer_del(struct lowpan_dev *dev, struct lowpan_peer *peer) { list_del(&peer->list); + module_put(THIS_MODULE); + if (atomic_dec_and_test(&dev->peer_count)) { BT_DBG("last peer"); return true; @@ -101,13 +119,26 @@ static inline struct lowpan_peer *peer_lookup_ba(struct lowpan_dev *dev, ba, type); list_for_each_entry_safe(peer, tmp, &dev->peers, list) { - BT_DBG("addr %pMR type %d", - &peer->conn->hcon->dst, peer->conn->hcon->dst_type); + BT_DBG("dst addr %pMR dst type %d", + &peer->chan->dst, peer->chan->dst_type); - if (bacmp(&peer->conn->hcon->dst, ba)) + if (bacmp(&peer->chan->dst, ba)) continue; - if (type == peer->conn->hcon->dst_type) + if (type == peer->chan->dst_type) + return peer; + } + + return NULL; +} + +static inline struct lowpan_peer *peer_lookup_chan(struct lowpan_dev *dev, + struct l2cap_chan *chan) +{ + struct lowpan_peer *peer, *tmp; + + list_for_each_entry_safe(peer, tmp, &dev->peers, list) { + if (peer->chan == chan) return peer; } @@ -120,7 +151,7 @@ static inline struct lowpan_peer *peer_lookup_conn(struct lowpan_dev *dev, struct lowpan_peer *peer, *tmp; list_for_each_entry_safe(peer, tmp, &dev->peers, list) { - if (peer->conn == conn) + if (peer->chan->conn == conn) return peer; } @@ -176,16 +207,16 @@ static int give_skb_to_upper(struct sk_buff *skb, struct net_device *dev) return -ENOMEM; ret = netif_rx(skb_cp); - - BT_DBG("receive skb %d", ret); - if (ret < 0) + if (ret < 0) { + BT_DBG("receive skb %d", ret); return NET_RX_DROP; + } return ret; } static int process_data(struct sk_buff *skb, struct net_device *netdev, - struct l2cap_conn *conn) + struct l2cap_chan *chan) { const u8 *saddr, *daddr; u8 iphc0, iphc1; @@ -196,7 +227,7 @@ static int process_data(struct sk_buff *skb, struct net_device *netdev, dev = lowpan_dev(netdev); read_lock_irqsave(&devices_lock, flags); - peer = peer_lookup_conn(dev, conn); + peer = peer_lookup_chan(dev, chan); read_unlock_irqrestore(&devices_lock, flags); if (!peer) goto drop; @@ -225,7 +256,7 @@ drop: } static int recv_pkt(struct sk_buff *skb, struct net_device *dev, - struct l2cap_conn *conn) + struct l2cap_chan *chan) { struct sk_buff *local_skb; int ret; @@ -269,7 +300,7 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev, if (!local_skb) goto drop; - ret = process_data(local_skb, dev, conn); + ret = process_data(local_skb, dev, chan); if (ret != NET_RX_SUCCESS) goto drop; @@ -286,146 +317,44 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev, return NET_RX_SUCCESS; drop: + dev->stats.rx_dropped++; kfree_skb(skb); return NET_RX_DROP; } /* Packet from BT LE device */ -int bt_6lowpan_recv(struct l2cap_conn *conn, struct sk_buff *skb) +static int chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) { struct lowpan_dev *dev; struct lowpan_peer *peer; int err; - peer = lookup_peer(conn); + peer = lookup_peer(chan->conn); if (!peer) return -ENOENT; - dev = lookup_dev(conn); + dev = lookup_dev(chan->conn); if (!dev || !dev->netdev) return -ENOENT; - err = recv_pkt(skb, dev->netdev, conn); - BT_DBG("recv pkt %d", err); - - return err; -} - -static inline int skbuff_copy(void *msg, int len, int count, int mtu, - struct sk_buff *skb, struct net_device *dev) -{ - struct sk_buff **frag; - int sent = 0; - - memcpy(skb_put(skb, count), msg, count); - - sent += count; - msg += count; - len -= count; - - dev->stats.tx_bytes += count; - dev->stats.tx_packets++; - - raw_dump_table(__func__, "Sending", skb->data, skb->len); - - /* Continuation fragments (no L2CAP header) */ - frag = &skb_shinfo(skb)->frag_list; - while (len > 0) { - struct sk_buff *tmp; - - count = min_t(unsigned int, mtu, len); - - tmp = bt_skb_alloc(count, GFP_ATOMIC); - if (!tmp) - return -ENOMEM; - - *frag = tmp; - - memcpy(skb_put(*frag, count), msg, count); - - raw_dump_table(__func__, "Sending fragment", - (*frag)->data, count); - - (*frag)->priority = skb->priority; - - sent += count; - msg += count; - len -= count; - - skb->len += (*frag)->len; - skb->data_len += (*frag)->len; - - frag = &(*frag)->next; - - dev->stats.tx_bytes += count; - dev->stats.tx_packets++; + err = recv_pkt(skb, dev->netdev, chan); + if (err) { + BT_DBG("recv pkt %d", err); + err = -EAGAIN; } - return sent; -} - -static struct sk_buff *create_pdu(struct l2cap_conn *conn, void *msg, - size_t len, u32 priority, - struct net_device *dev) -{ - struct sk_buff *skb; - int err, count; - struct l2cap_hdr *lh; - - /* FIXME: This mtu check should be not needed and atm is only used for - * testing purposes - */ - if (conn->mtu > (L2CAP_LE_MIN_MTU + L2CAP_HDR_SIZE)) - conn->mtu = L2CAP_LE_MIN_MTU + L2CAP_HDR_SIZE; - - count = min_t(unsigned int, (conn->mtu - L2CAP_HDR_SIZE), len); - - BT_DBG("conn %p len %zu mtu %d count %d", conn, len, conn->mtu, count); - - skb = bt_skb_alloc(count + L2CAP_HDR_SIZE, GFP_ATOMIC); - if (!skb) - return ERR_PTR(-ENOMEM); - - skb->priority = priority; - - lh = (struct l2cap_hdr *)skb_put(skb, L2CAP_HDR_SIZE); - lh->cid = cpu_to_le16(L2CAP_FC_6LOWPAN); - lh->len = cpu_to_le16(len); - - err = skbuff_copy(msg, len, count, conn->mtu, skb, dev); - if (unlikely(err < 0)) { - kfree_skb(skb); - BT_DBG("skbuff copy %d failed", err); - return ERR_PTR(err); - } - - return skb; + return err; } -static int conn_send(struct l2cap_conn *conn, - void *msg, size_t len, u32 priority, - struct net_device *dev) +static u8 get_addr_type_from_eui64(u8 byte) { - struct sk_buff *skb; - - skb = create_pdu(conn, msg, len, priority, dev); - if (IS_ERR(skb)) - return -EINVAL; - - BT_DBG("conn %p skb %p len %d priority %u", conn, skb, skb->len, - skb->priority); - - hci_send_acl(conn->hchan, skb, ACL_START); - - return 0; + /* Is universal(0) or local(1) bit */ + return ((byte & 0x02) ? BDADDR_LE_RANDOM : BDADDR_LE_PUBLIC); } -static void get_dest_bdaddr(struct in6_addr *ip6_daddr, - bdaddr_t *addr, u8 *addr_type) +static void copy_to_bdaddr(struct in6_addr *ip6_daddr, bdaddr_t *addr) { - u8 *eui64; - - eui64 = ip6_daddr->s6_addr + 8; + u8 *eui64 = ip6_daddr->s6_addr + 8; addr->b[0] = eui64[7]; addr->b[1] = eui64[6]; @@ -433,16 +362,19 @@ static void get_dest_bdaddr(struct in6_addr *ip6_daddr, addr->b[3] = eui64[2]; addr->b[4] = eui64[1]; addr->b[5] = eui64[0]; +} - addr->b[5] ^= 2; +static void convert_dest_bdaddr(struct in6_addr *ip6_daddr, + bdaddr_t *addr, u8 *addr_type) +{ + copy_to_bdaddr(ip6_daddr, addr); - /* Set universal/local bit to 0 */ - if (addr->b[5] & 1) { - addr->b[5] &= ~1; - *addr_type = ADDR_LE_DEV_PUBLIC; - } else { - *addr_type = ADDR_LE_DEV_RANDOM; - } + /* We need to toggle the U/L bit that we got from IPv6 address + * so that we get the proper address and type of the BD address. + */ + addr->b[5] ^= 0x02; + + *addr_type = get_addr_type_from_eui64(addr->b[5]); } static int header_create(struct sk_buff *skb, struct net_device *netdev, @@ -466,16 +398,17 @@ static int header_create(struct sk_buff *skb, struct net_device *netdev, if (ipv6_addr_is_multicast(&hdr->daddr)) { memcpy(&lowpan_cb(skb)->addr, &hdr->daddr, sizeof(struct in6_addr)); - lowpan_cb(skb)->conn = NULL; + lowpan_cb(skb)->chan = NULL; } else { unsigned long flags; /* Get destination BT device from skb. * If there is no such peer then discard the packet. */ - get_dest_bdaddr(&hdr->daddr, &addr, &addr_type); + convert_dest_bdaddr(&hdr->daddr, &addr, &addr_type); - BT_DBG("dest addr %pMR type %d", &addr, addr_type); + BT_DBG("dest addr %pMR type %d IP %pI6c", &addr, + addr_type, &hdr->daddr); read_lock_irqsave(&devices_lock, flags); peer = peer_lookup_ba(dev, &addr, addr_type); @@ -490,7 +423,7 @@ static int header_create(struct sk_buff *skb, struct net_device *netdev, memcpy(&lowpan_cb(skb)->addr, &hdr->daddr, sizeof(struct in6_addr)); - lowpan_cb(skb)->conn = peer->conn; + lowpan_cb(skb)->chan = peer->chan; } saddr = dev->netdev->dev_addr; @@ -499,14 +432,42 @@ static int header_create(struct sk_buff *skb, struct net_device *netdev, } /* Packet to BT LE device */ -static int send_pkt(struct l2cap_conn *conn, const void *saddr, - const void *daddr, struct sk_buff *skb, +static int send_pkt(struct l2cap_chan *chan, struct sk_buff *skb, struct net_device *netdev) { - raw_dump_table(__func__, "raw skb data dump before fragmentation", - skb->data, skb->len); + struct msghdr msg; + struct kvec iv; + int err; + + /* Remember the skb so that we can send EAGAIN to the caller if + * we run out of credits. + */ + chan->data = skb; + + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = (struct iovec *) &iv; + msg.msg_iovlen = 1; + iv.iov_base = skb->data; + iv.iov_len = skb->len; + + err = l2cap_chan_send(chan, &msg, skb->len); + if (err > 0) { + netdev->stats.tx_bytes += err; + netdev->stats.tx_packets++; + return 0; + } + + if (!err) + err = lowpan_cb(skb)->status; + + if (err < 0) { + if (err == -EAGAIN) + netdev->stats.tx_dropped++; + else + netdev->stats.tx_errors++; + } - return conn_send(conn, skb->data, skb->len, 0, netdev); + return err; } static void send_mcast_pkt(struct sk_buff *skb, struct net_device *netdev) @@ -529,8 +490,7 @@ static void send_mcast_pkt(struct sk_buff *skb, struct net_device *netdev) list_for_each_entry_safe(pentry, ptmp, &dev->peers, list) { local_skb = skb_clone(skb, GFP_ATOMIC); - send_pkt(pentry->conn, netdev->dev_addr, - pentry->eui64_addr, local_skb, netdev); + send_pkt(pentry->chan, local_skb, netdev); kfree_skb(local_skb); } @@ -542,7 +502,6 @@ static void send_mcast_pkt(struct sk_buff *skb, struct net_device *netdev) static netdev_tx_t bt_xmit(struct sk_buff *skb, struct net_device *netdev) { int err = 0; - unsigned char *eui64_addr; struct lowpan_dev *dev; struct lowpan_peer *peer; bdaddr_t addr; @@ -556,20 +515,21 @@ static netdev_tx_t bt_xmit(struct sk_buff *skb, struct net_device *netdev) } else { unsigned long flags; - get_dest_bdaddr(&lowpan_cb(skb)->addr, &addr, &addr_type); - eui64_addr = lowpan_cb(skb)->addr.s6_addr + 8; + convert_dest_bdaddr(&lowpan_cb(skb)->addr, &addr, &addr_type); dev = lowpan_dev(netdev); read_lock_irqsave(&devices_lock, flags); peer = peer_lookup_ba(dev, &addr, addr_type); read_unlock_irqrestore(&devices_lock, flags); - BT_DBG("xmit from %s to %pMR (%pI6c) peer %p", netdev->name, - &addr, &lowpan_cb(skb)->addr, peer); + BT_DBG("xmit %s to %pMR type %d IP %pI6c peer %p", + netdev->name, &addr, addr_type, + &lowpan_cb(skb)->addr, peer); - if (peer && peer->conn) - err = send_pkt(peer->conn, netdev->dev_addr, - eui64_addr, skb, netdev); + if (peer && peer->chan) + err = send_pkt(peer->chan, skb, netdev); + else + err = -ENOENT; } dev_kfree_skb(skb); @@ -620,13 +580,13 @@ static void set_addr(u8 *eui, u8 *addr, u8 addr_type) eui[6] = addr[1]; eui[7] = addr[0]; - eui[0] ^= 2; - - /* Universal/local bit set, RFC 4291 */ - if (addr_type == ADDR_LE_DEV_PUBLIC) - eui[0] |= 1; + /* Universal/local bit set, BT 6lowpan draft ch. 3.2.1 */ + if (addr_type == BDADDR_LE_PUBLIC) + eui[0] &= ~0x02; else - eui[0] &= ~1; + eui[0] |= 0x02; + + BT_DBG("type %d addr %*phC", addr_type, 8, eui); } static void set_dev_addr(struct net_device *netdev, bdaddr_t *addr, @@ -634,7 +594,6 @@ static void set_dev_addr(struct net_device *netdev, bdaddr_t *addr, { netdev->addr_assign_type = NET_ADDR_PERM; set_addr(netdev->dev_addr, addr->b, addr_type); - netdev->dev_addr[0] ^= 2; } static void ifup(struct net_device *netdev) @@ -648,6 +607,17 @@ static void ifup(struct net_device *netdev) rtnl_unlock(); } +static void ifdown(struct net_device *netdev) +{ + int err; + + rtnl_lock(); + err = dev_close(netdev); + if (err < 0) + BT_INFO("iface %s cannot be closed (%d)", netdev->name, err); + rtnl_unlock(); +} + static void do_notify_peers(struct work_struct *work) { struct lowpan_dev *dev = container_of(work, struct lowpan_dev, @@ -661,36 +631,67 @@ static bool is_bt_6lowpan(struct hci_conn *hcon) if (hcon->type != LE_LINK) return false; - return test_bit(HCI_CONN_6LOWPAN, &hcon->flags); + if (!psm_6lowpan) + return false; + + return true; +} + +static struct l2cap_chan *chan_create(void) +{ + struct l2cap_chan *chan; + + chan = l2cap_chan_create(); + if (!chan) + return NULL; + + l2cap_chan_set_defaults(chan); + + chan->chan_type = L2CAP_CHAN_CONN_ORIENTED; + chan->mode = L2CAP_MODE_LE_FLOWCTL; + chan->omtu = 65535; + chan->imtu = chan->omtu; + + return chan; +} + +static struct l2cap_chan *chan_open(struct l2cap_chan *pchan) +{ + struct l2cap_chan *chan; + + chan = chan_create(); + if (!chan) + return NULL; + + chan->remote_mps = chan->omtu; + chan->mps = chan->omtu; + + chan->state = BT_CONNECTED; + + return chan; } -static int add_peer_conn(struct l2cap_conn *conn, struct lowpan_dev *dev) +static struct l2cap_chan *add_peer_chan(struct l2cap_chan *chan, + struct lowpan_dev *dev) { struct lowpan_peer *peer; unsigned long flags; peer = kzalloc(sizeof(*peer), GFP_ATOMIC); if (!peer) - return -ENOMEM; + return NULL; - peer->conn = conn; + peer->chan = chan; memset(&peer->peer_addr, 0, sizeof(struct in6_addr)); /* RFC 2464 ch. 5 */ peer->peer_addr.s6_addr[0] = 0xFE; peer->peer_addr.s6_addr[1] = 0x80; - set_addr((u8 *)&peer->peer_addr.s6_addr + 8, conn->hcon->dst.b, - conn->hcon->dst_type); + set_addr((u8 *)&peer->peer_addr.s6_addr + 8, chan->dst.b, + chan->dst_type); memcpy(&peer->eui64_addr, (u8 *)&peer->peer_addr.s6_addr + 8, EUI64_ADDR_LEN); - peer->eui64_addr[0] ^= 2; /* second bit-flip (Universe/Local) - * is done according RFC2464 - */ - - raw_dump_inline(__func__, "peer IPv6 address", - (unsigned char *)&peer->peer_addr, 16); - raw_dump_inline(__func__, "peer EUI64 address", peer->eui64_addr, 8); write_lock_irqsave(&devices_lock, flags); INIT_LIST_HEAD(&peer->list); @@ -701,40 +702,24 @@ static int add_peer_conn(struct l2cap_conn *conn, struct lowpan_dev *dev) INIT_DELAYED_WORK(&dev->notify_peers, do_notify_peers); schedule_delayed_work(&dev->notify_peers, msecs_to_jiffies(100)); - return 0; + return peer->chan; } -/* This gets called when BT LE 6LoWPAN device is connected. We then - * create network device that acts as a proxy between BT LE device - * and kernel network stack. - */ -int bt_6lowpan_add_conn(struct l2cap_conn *conn) +static int setup_netdev(struct l2cap_chan *chan, struct lowpan_dev **dev) { - struct lowpan_peer *peer = NULL; - struct lowpan_dev *dev; struct net_device *netdev; int err = 0; unsigned long flags; - if (!is_bt_6lowpan(conn->hcon)) - return 0; - - peer = lookup_peer(conn); - if (peer) - return -EEXIST; - - dev = lookup_dev(conn); - if (dev) - return add_peer_conn(conn, dev); - - netdev = alloc_netdev(sizeof(*dev), IFACE_NAME_TEMPLATE, netdev_setup); + netdev = alloc_netdev(sizeof(struct lowpan_dev), IFACE_NAME_TEMPLATE, + NET_NAME_UNKNOWN, netdev_setup); if (!netdev) return -ENOMEM; - set_dev_addr(netdev, &conn->hcon->src, conn->hcon->src_type); + set_dev_addr(netdev, &chan->src, chan->src_type); netdev->netdev_ops = &netdev_ops; - SET_NETDEV_DEV(netdev, &conn->hcon->dev); + SET_NETDEV_DEV(netdev, &chan->conn->hcon->dev); SET_NETDEV_DEVTYPE(netdev, &bt_type); err = register_netdev(netdev); @@ -744,28 +729,61 @@ int bt_6lowpan_add_conn(struct l2cap_conn *conn) goto out; } - BT_DBG("ifindex %d peer bdaddr %pMR my addr %pMR", - netdev->ifindex, &conn->hcon->dst, &conn->hcon->src); + BT_DBG("ifindex %d peer bdaddr %pMR type %d my addr %pMR type %d", + netdev->ifindex, &chan->dst, chan->dst_type, + &chan->src, chan->src_type); set_bit(__LINK_STATE_PRESENT, &netdev->state); - dev = netdev_priv(netdev); - dev->netdev = netdev; - dev->hdev = conn->hcon->hdev; - INIT_LIST_HEAD(&dev->peers); + *dev = netdev_priv(netdev); + (*dev)->netdev = netdev; + (*dev)->hdev = chan->conn->hcon->hdev; + INIT_LIST_HEAD(&(*dev)->peers); write_lock_irqsave(&devices_lock, flags); - INIT_LIST_HEAD(&dev->list); - list_add(&dev->list, &bt_6lowpan_devices); + INIT_LIST_HEAD(&(*dev)->list); + list_add(&(*dev)->list, &bt_6lowpan_devices); write_unlock_irqrestore(&devices_lock, flags); - ifup(netdev); - - return add_peer_conn(conn, dev); + return 0; out: return err; } +static inline void chan_ready_cb(struct l2cap_chan *chan) +{ + struct lowpan_dev *dev; + + dev = lookup_dev(chan->conn); + + BT_DBG("chan %p conn %p dev %p", chan, chan->conn, dev); + + if (!dev) { + if (setup_netdev(chan, &dev) < 0) { + l2cap_chan_del(chan, -ENOENT); + return; + } + } + + if (!try_module_get(THIS_MODULE)) + return; + + add_peer_chan(chan, dev); + ifup(dev->netdev); +} + +static inline struct l2cap_chan *chan_new_conn_cb(struct l2cap_chan *chan) +{ + struct l2cap_chan *pchan; + + pchan = chan_open(chan); + pchan->ops = chan->ops; + + BT_DBG("chan %p pchan %p", chan, pchan); + + return pchan; +} + static void delete_netdev(struct work_struct *work) { struct lowpan_dev *entry = container_of(work, struct lowpan_dev, @@ -776,26 +794,43 @@ static void delete_netdev(struct work_struct *work) /* The entry pointer is deleted in device_event() */ } -int bt_6lowpan_del_conn(struct l2cap_conn *conn) +static void chan_close_cb(struct l2cap_chan *chan) { struct lowpan_dev *entry, *tmp; struct lowpan_dev *dev = NULL; struct lowpan_peer *peer; int err = -ENOENT; unsigned long flags; - bool last = false; + bool last = false, removed = true; - if (!conn || !is_bt_6lowpan(conn->hcon)) - return 0; + BT_DBG("chan %p conn %p", chan, chan->conn); + + if (chan->conn && chan->conn->hcon) { + if (!is_bt_6lowpan(chan->conn->hcon)) + return; + + /* If conn is set, then the netdev is also there and we should + * not remove it. + */ + removed = false; + } write_lock_irqsave(&devices_lock, flags); list_for_each_entry_safe(entry, tmp, &bt_6lowpan_devices, list) { dev = lowpan_dev(entry->netdev); - peer = peer_lookup_conn(dev, conn); + peer = peer_lookup_chan(dev, chan); if (peer) { last = peer_del(dev, peer); err = 0; + + BT_DBG("dev %p removing %speer %p", dev, + last ? "last " : "1 ", peer); + BT_DBG("chan %p orig refcnt %d", chan, + atomic_read(&chan->kref.refcount)); + + l2cap_chan_put(chan); + kfree(peer); break; } } @@ -805,18 +840,402 @@ int bt_6lowpan_del_conn(struct l2cap_conn *conn) cancel_delayed_work_sync(&dev->notify_peers); - /* bt_6lowpan_del_conn() is called with hci dev lock held which - * means that we must delete the netdevice in worker thread. - */ - INIT_WORK(&entry->delete_netdev, delete_netdev); - schedule_work(&entry->delete_netdev); + ifdown(dev->netdev); + + if (!removed) { + INIT_WORK(&entry->delete_netdev, delete_netdev); + schedule_work(&entry->delete_netdev); + } } else { write_unlock_irqrestore(&devices_lock, flags); } + return; +} + +static void chan_state_change_cb(struct l2cap_chan *chan, int state, int err) +{ + BT_DBG("chan %p conn %p state %s err %d", chan, chan->conn, + state_to_string(state), err); +} + +static struct sk_buff *chan_alloc_skb_cb(struct l2cap_chan *chan, + unsigned long hdr_len, + unsigned long len, int nb) +{ + /* Note that we must allocate using GFP_ATOMIC here as + * this function is called originally from netdev hard xmit + * function in atomic context. + */ + return bt_skb_alloc(hdr_len + len, GFP_ATOMIC); +} + +static void chan_suspend_cb(struct l2cap_chan *chan) +{ + struct sk_buff *skb = chan->data; + + BT_DBG("chan %p conn %p skb %p", chan, chan->conn, skb); + + lowpan_cb(skb)->status = -EAGAIN; +} + +static void chan_resume_cb(struct l2cap_chan *chan) +{ + struct sk_buff *skb = chan->data; + + BT_DBG("chan %p conn %p skb %p", chan, chan->conn, skb); + + lowpan_cb(skb)->status = 0; +} + +static long chan_get_sndtimeo_cb(struct l2cap_chan *chan) +{ + return msecs_to_jiffies(1000); +} + +static const struct l2cap_ops bt_6lowpan_chan_ops = { + .name = "L2CAP 6LoWPAN channel", + .new_connection = chan_new_conn_cb, + .recv = chan_recv_cb, + .close = chan_close_cb, + .state_change = chan_state_change_cb, + .ready = chan_ready_cb, + .resume = chan_resume_cb, + .suspend = chan_suspend_cb, + .get_sndtimeo = chan_get_sndtimeo_cb, + .alloc_skb = chan_alloc_skb_cb, + .memcpy_fromiovec = l2cap_chan_no_memcpy_fromiovec, + + .teardown = l2cap_chan_no_teardown, + .defer = l2cap_chan_no_defer, + .set_shutdown = l2cap_chan_no_set_shutdown, +}; + +static inline __u8 bdaddr_type(__u8 type) +{ + if (type == ADDR_LE_DEV_PUBLIC) + return BDADDR_LE_PUBLIC; + else + return BDADDR_LE_RANDOM; +} + +static struct l2cap_chan *chan_get(void) +{ + struct l2cap_chan *pchan; + + pchan = chan_create(); + if (!pchan) + return NULL; + + pchan->ops = &bt_6lowpan_chan_ops; + + return pchan; +} + +static int bt_6lowpan_connect(bdaddr_t *addr, u8 dst_type) +{ + struct l2cap_chan *pchan; + int err; + + pchan = chan_get(); + if (!pchan) + return -EINVAL; + + err = l2cap_chan_connect(pchan, cpu_to_le16(psm_6lowpan), 0, + addr, dst_type); + + BT_DBG("chan %p err %d", pchan, err); + if (err < 0) + l2cap_chan_put(pchan); + return err; } +static int bt_6lowpan_disconnect(struct l2cap_conn *conn, u8 dst_type) +{ + struct lowpan_peer *peer; + + BT_DBG("conn %p dst type %d", conn, dst_type); + + peer = lookup_peer(conn); + if (!peer) + return -ENOENT; + + BT_DBG("peer %p chan %p", peer, peer->chan); + + l2cap_chan_close(peer->chan, ENOENT); + + return 0; +} + +static struct l2cap_chan *bt_6lowpan_listen(void) +{ + bdaddr_t *addr = BDADDR_ANY; + struct l2cap_chan *pchan; + int err; + + if (psm_6lowpan == 0) + return NULL; + + pchan = chan_get(); + if (!pchan) + return NULL; + + pchan->state = BT_LISTEN; + pchan->src_type = BDADDR_LE_PUBLIC; + + BT_DBG("psm 0x%04x chan %p src type %d", psm_6lowpan, pchan, + pchan->src_type); + + err = l2cap_add_psm(pchan, addr, cpu_to_le16(psm_6lowpan)); + if (err) { + l2cap_chan_put(pchan); + BT_ERR("psm cannot be added err %d", err); + return NULL; + } + + return pchan; +} + +static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type, + struct l2cap_conn **conn) +{ + struct hci_conn *hcon; + struct hci_dev *hdev; + bdaddr_t *src = BDADDR_ANY; + int n; + + n = sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx %hhu", + &addr->b[5], &addr->b[4], &addr->b[3], + &addr->b[2], &addr->b[1], &addr->b[0], + addr_type); + + if (n < 7) + return -EINVAL; + + hdev = hci_get_route(addr, src); + if (!hdev) + return -ENOENT; + + hci_dev_lock(hdev); + hcon = hci_conn_hash_lookup_ba(hdev, LE_LINK, addr); + hci_dev_unlock(hdev); + + if (!hcon) + return -ENOENT; + + *conn = (struct l2cap_conn *)hcon->l2cap_data; + + BT_DBG("conn %p dst %pMR type %d", *conn, &hcon->dst, hcon->dst_type); + + return 0; +} + +static void disconnect_all_peers(void) +{ + struct lowpan_dev *entry, *tmp_dev; + struct lowpan_peer *peer, *tmp_peer, *new_peer; + struct list_head peers; + unsigned long flags; + + INIT_LIST_HEAD(&peers); + + /* We make a separate list of peers as the close_cb() will + * modify the device peers list so it is better not to mess + * with the same list at the same time. + */ + + read_lock_irqsave(&devices_lock, flags); + + list_for_each_entry_safe(entry, tmp_dev, &bt_6lowpan_devices, list) { + list_for_each_entry_safe(peer, tmp_peer, &entry->peers, list) { + new_peer = kmalloc(sizeof(*new_peer), GFP_ATOMIC); + if (!new_peer) + break; + + new_peer->chan = peer->chan; + INIT_LIST_HEAD(&new_peer->list); + + list_add(&new_peer->list, &peers); + } + } + + read_unlock_irqrestore(&devices_lock, flags); + + list_for_each_entry_safe(peer, tmp_peer, &peers, list) { + l2cap_chan_close(peer->chan, ENOENT); + kfree(peer); + } +} + +static int lowpan_psm_set(void *data, u64 val) +{ + u16 psm; + + psm = val; + if (psm == 0 || psm_6lowpan != psm) + /* Disconnect existing connections if 6lowpan is + * disabled (psm = 0), or if psm changes. + */ + disconnect_all_peers(); + + psm_6lowpan = psm; + + if (listen_chan) { + l2cap_chan_close(listen_chan, 0); + l2cap_chan_put(listen_chan); + } + + listen_chan = bt_6lowpan_listen(); + + return 0; +} + +static int lowpan_psm_get(void *data, u64 *val) +{ + *val = psm_6lowpan; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(lowpan_psm_fops, lowpan_psm_get, + lowpan_psm_set, "%llu\n"); + +static ssize_t lowpan_control_write(struct file *fp, + const char __user *user_buffer, + size_t count, + loff_t *position) +{ + char buf[32]; + size_t buf_size = min(count, sizeof(buf) - 1); + int ret; + bdaddr_t addr; + u8 addr_type; + struct l2cap_conn *conn = NULL; + + if (copy_from_user(buf, user_buffer, buf_size)) + return -EFAULT; + + buf[buf_size] = '\0'; + + if (memcmp(buf, "connect ", 8) == 0) { + ret = get_l2cap_conn(&buf[8], &addr, &addr_type, &conn); + if (ret == -EINVAL) + return ret; + + if (listen_chan) { + l2cap_chan_close(listen_chan, 0); + l2cap_chan_put(listen_chan); + listen_chan = NULL; + } + + if (conn) { + struct lowpan_peer *peer; + + if (!is_bt_6lowpan(conn->hcon)) + return -EINVAL; + + peer = lookup_peer(conn); + if (peer) { + BT_DBG("6LoWPAN connection already exists"); + return -EALREADY; + } + + BT_DBG("conn %p dst %pMR type %d user %d", conn, + &conn->hcon->dst, conn->hcon->dst_type, + addr_type); + } + + ret = bt_6lowpan_connect(&addr, addr_type); + if (ret < 0) + return ret; + + return count; + } + + if (memcmp(buf, "disconnect ", 11) == 0) { + ret = get_l2cap_conn(&buf[11], &addr, &addr_type, &conn); + if (ret < 0) + return ret; + + ret = bt_6lowpan_disconnect(conn, addr_type); + if (ret < 0) + return ret; + + return count; + } + + return count; +} + +static int lowpan_control_show(struct seq_file *f, void *ptr) +{ + struct lowpan_dev *entry, *tmp_dev; + struct lowpan_peer *peer, *tmp_peer; + unsigned long flags; + + read_lock_irqsave(&devices_lock, flags); + + list_for_each_entry_safe(entry, tmp_dev, &bt_6lowpan_devices, list) { + list_for_each_entry_safe(peer, tmp_peer, &entry->peers, list) + seq_printf(f, "%pMR (type %u)\n", + &peer->chan->dst, peer->chan->dst_type); + } + + read_unlock_irqrestore(&devices_lock, flags); + + return 0; +} + +static int lowpan_control_open(struct inode *inode, struct file *file) +{ + return single_open(file, lowpan_control_show, inode->i_private); +} + +static const struct file_operations lowpan_control_fops = { + .open = lowpan_control_open, + .read = seq_read, + .write = lowpan_control_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static void disconnect_devices(void) +{ + struct lowpan_dev *entry, *tmp, *new_dev; + struct list_head devices; + unsigned long flags; + + INIT_LIST_HEAD(&devices); + + /* We make a separate list of devices because the unregister_netdev() + * will call device_event() which will also want to modify the same + * devices list. + */ + + read_lock_irqsave(&devices_lock, flags); + + list_for_each_entry_safe(entry, tmp, &bt_6lowpan_devices, list) { + new_dev = kmalloc(sizeof(*new_dev), GFP_ATOMIC); + if (!new_dev) + break; + + new_dev->netdev = entry->netdev; + INIT_LIST_HEAD(&new_dev->list); + + list_add(&new_dev->list, &devices); + } + + read_unlock_irqrestore(&devices_lock, flags); + + list_for_each_entry_safe(entry, tmp, &devices, list) { + ifdown(entry->netdev); + BT_DBG("Unregistering netdev %s %p", + entry->netdev->name, entry->netdev); + unregister_netdev(entry->netdev); + kfree(entry); + } +} + static int device_event(struct notifier_block *unused, unsigned long event, void *ptr) { @@ -833,6 +1252,8 @@ static int device_event(struct notifier_block *unused, list_for_each_entry_safe(entry, tmp, &bt_6lowpan_devices, list) { if (entry->netdev == netdev) { + BT_DBG("Unregistered netdev %s %p", + netdev->name, netdev); list_del(&entry->list); kfree(entry); break; @@ -849,12 +1270,37 @@ static struct notifier_block bt_6lowpan_dev_notifier = { .notifier_call = device_event, }; -int bt_6lowpan_init(void) +static int __init bt_6lowpan_init(void) { + lowpan_psm_debugfs = debugfs_create_file("6lowpan_psm", 0644, + bt_debugfs, NULL, + &lowpan_psm_fops); + lowpan_control_debugfs = debugfs_create_file("6lowpan_control", 0644, + bt_debugfs, NULL, + &lowpan_control_fops); + return register_netdevice_notifier(&bt_6lowpan_dev_notifier); } -void bt_6lowpan_cleanup(void) +static void __exit bt_6lowpan_exit(void) { + debugfs_remove(lowpan_psm_debugfs); + debugfs_remove(lowpan_control_debugfs); + + if (listen_chan) { + l2cap_chan_close(listen_chan, 0); + l2cap_chan_put(listen_chan); + } + + disconnect_devices(); + unregister_netdevice_notifier(&bt_6lowpan_dev_notifier); } + +module_init(bt_6lowpan_init); +module_exit(bt_6lowpan_exit); + +MODULE_AUTHOR("Jukka Rissanen <jukka.rissanen@linux.intel.com>"); +MODULE_DESCRIPTION("Bluetooth 6LoWPAN"); +MODULE_VERSION(VERSION); +MODULE_LICENSE("GPL"); diff --git a/net/bluetooth/6lowpan.h b/net/bluetooth/6lowpan.h deleted file mode 100644 index 5d281f1eaf55..000000000000 --- a/net/bluetooth/6lowpan.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - Copyright (c) 2013 Intel Corp. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License version 2 and - only version 2 as published by the Free Software Foundation. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. -*/ - -#ifndef __6LOWPAN_H -#define __6LOWPAN_H - -#include <linux/errno.h> -#include <linux/skbuff.h> -#include <net/bluetooth/l2cap.h> - -#if IS_ENABLED(CONFIG_BT_6LOWPAN) -int bt_6lowpan_recv(struct l2cap_conn *conn, struct sk_buff *skb); -int bt_6lowpan_add_conn(struct l2cap_conn *conn); -int bt_6lowpan_del_conn(struct l2cap_conn *conn); -int bt_6lowpan_init(void); -void bt_6lowpan_cleanup(void); -#else -static int bt_6lowpan_recv(struct l2cap_conn *conn, struct sk_buff *skb) -{ - return -EOPNOTSUPP; -} -static int bt_6lowpan_add_conn(struct l2cap_conn *conn) -{ - return -EOPNOTSUPP; -} -int bt_6lowpan_del_conn(struct l2cap_conn *conn) -{ - return -EOPNOTSUPP; -} -static int bt_6lowpan_init(void) -{ - return -EOPNOTSUPP; -} -static void bt_6lowpan_cleanup(void) { } -#endif - -#endif /* __6LOWPAN_H */ diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig index 06ec14499ca1..600fb29288f4 100644 --- a/net/bluetooth/Kconfig +++ b/net/bluetooth/Kconfig @@ -6,7 +6,6 @@ menuconfig BT tristate "Bluetooth subsystem support" depends on NET && !S390 depends on RFKILL || !RFKILL - select 6LOWPAN_IPHC if BT_6LOWPAN select CRC16 select CRYPTO select CRYPTO_BLKCIPHER @@ -41,10 +40,10 @@ menuconfig BT more information, see <http://www.bluez.org/>. config BT_6LOWPAN - bool "Bluetooth 6LoWPAN support" - depends on BT && IPV6 + tristate "Bluetooth 6LoWPAN support" + depends on BT && 6LOWPAN help - IPv6 compression over Bluetooth. + IPv6 compression over Bluetooth Low Energy. source "net/bluetooth/rfcomm/Kconfig" diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile index ca51246b1016..886e9aa3ecf1 100644 --- a/net/bluetooth/Makefile +++ b/net/bluetooth/Makefile @@ -7,10 +7,12 @@ obj-$(CONFIG_BT_RFCOMM) += rfcomm/ obj-$(CONFIG_BT_BNEP) += bnep/ obj-$(CONFIG_BT_CMTP) += cmtp/ obj-$(CONFIG_BT_HIDP) += hidp/ +obj-$(CONFIG_BT_6LOWPAN) += bluetooth_6lowpan.o + +bluetooth_6lowpan-y := 6lowpan.o bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \ hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o sco.o lib.o \ a2mp.o amp.o -bluetooth-$(CONFIG_BT_6LOWPAN) += 6lowpan.o subdir-ccflags-y += -D__CHECK_ENDIAN__ diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index 9514cc9e850c..5dcade511fdb 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -63,7 +63,7 @@ void a2mp_send(struct amp_mgr *mgr, u8 code, u8 ident, u16 len, void *data) msg.msg_iov = (struct iovec *) &iv; msg.msg_iovlen = 1; - l2cap_chan_send(chan, &msg, total_len, 0); + l2cap_chan_send(chan, &msg, total_len); kfree(cmd); } @@ -693,18 +693,19 @@ static void a2mp_chan_state_change_cb(struct l2cap_chan *chan, int state, } static struct sk_buff *a2mp_chan_alloc_skb_cb(struct l2cap_chan *chan, + unsigned long hdr_len, unsigned long len, int nb) { struct sk_buff *skb; - skb = bt_skb_alloc(len, GFP_KERNEL); + skb = bt_skb_alloc(hdr_len + len, GFP_KERNEL); if (!skb) return ERR_PTR(-ENOMEM); return skb; } -static struct l2cap_ops a2mp_chan_ops = { +static const struct l2cap_ops a2mp_chan_ops = { .name = "L2CAP A2MP channel", .recv = a2mp_chan_recv_cb, .close = a2mp_chan_close_cb, @@ -719,6 +720,7 @@ static struct l2cap_ops a2mp_chan_ops = { .resume = l2cap_chan_no_resume, .set_shutdown = l2cap_chan_no_set_shutdown, .get_sndtimeo = l2cap_chan_no_get_sndtimeo, + .memcpy_fromiovec = l2cap_chan_no_memcpy_fromiovec, }; static struct l2cap_chan *a2mp_chan_open(struct l2cap_conn *conn, bool locked) diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 2021c481cdb6..4dca0299ed96 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -639,7 +639,7 @@ static int bt_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations bt_seq_ops = { +static const struct seq_operations bt_seq_ops = { .start = bt_seq_start, .next = bt_seq_next, .stop = bt_seq_stop, diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c index bb39509b3f06..016cdb66df6c 100644 --- a/net/bluetooth/amp.c +++ b/net/bluetooth/amp.c @@ -113,8 +113,9 @@ struct hci_conn *phylink_add(struct hci_dev *hdev, struct amp_mgr *mgr, { bdaddr_t *dst = &mgr->l2cap_conn->hcon->dst; struct hci_conn *hcon; + u8 role = out ? HCI_ROLE_MASTER : HCI_ROLE_SLAVE; - hcon = hci_conn_add(hdev, AMP_LINK, dst); + hcon = hci_conn_add(hdev, AMP_LINK, dst, role); if (!hcon) return NULL; @@ -125,7 +126,6 @@ struct hci_conn *phylink_add(struct hci_dev *hdev, struct amp_mgr *mgr, hcon->handle = __next_handle(mgr); hcon->remote_id = remote_id; hcon->amp_mgr = amp_mgr_get(mgr); - hcon->out = out; return hcon; } @@ -133,8 +133,8 @@ struct hci_conn *phylink_add(struct hci_dev *hdev, struct amp_mgr *mgr, /* AMP crypto key generation interface */ static int hmac_sha256(u8 *key, u8 ksize, char *plaintext, u8 psize, u8 *output) { - int ret = 0; struct crypto_shash *tfm; + int ret; if (!ksize) return -EINVAL; diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index a841d3e776c5..85bcc21e84d2 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -538,8 +538,9 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock) /* session struct allocated as private part of net_device */ dev = alloc_netdev(sizeof(struct bnep_session), - (*req->device) ? req->device : "bnep%d", - bnep_net_setup); + (*req->device) ? req->device : "bnep%d", + NET_NAME_UNKNOWN, + bnep_net_setup); if (!dev) return -ENOMEM; diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c index cd75e4d64b90..1ca8a87a0787 100644 --- a/net/bluetooth/cmtp/capi.c +++ b/net/bluetooth/cmtp/capi.c @@ -362,12 +362,6 @@ void cmtp_recv_capimsg(struct cmtp_session *session, struct sk_buff *skb) CAPIMSG_SETCONTROL(skb->data, contr); } - if (!ctrl) { - BT_ERR("Can't find controller %d for message", session->num); - kfree_skb(skb); - return; - } - capi_ctr_handle_message(ctrl, appl, skb); } diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index d958e2dca52f..b50dabb3f86a 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -28,6 +28,7 @@ #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> +#include <net/bluetooth/l2cap.h> #include "smp.h" #include "a2mp.h" @@ -65,8 +66,7 @@ static void hci_acl_create_connection(struct hci_conn *conn) conn->state = BT_CONNECT; conn->out = true; - - conn->link_mode = HCI_LM_MASTER; + conn->role = HCI_ROLE_MASTER; conn->attempt++; @@ -135,7 +135,7 @@ void hci_disconnect(struct hci_conn *conn, __u8 reason) hci_send_cmd(conn->hdev, HCI_OP_DISCONNECT, sizeof(cp), &cp); } -static void hci_amp_disconn(struct hci_conn *conn, __u8 reason) +static void hci_amp_disconn(struct hci_conn *conn) { struct hci_cp_disconn_phy_link cp; @@ -144,7 +144,7 @@ static void hci_amp_disconn(struct hci_conn *conn, __u8 reason) conn->state = BT_DISCONN; cp.phy_handle = HCI_PHY_HANDLE(conn->handle); - cp.reason = reason; + cp.reason = hci_proto_disconn_ind(conn); hci_send_cmd(conn->hdev, HCI_OP_DISCONN_PHY_LINK, sizeof(cp), &cp); } @@ -212,14 +212,26 @@ bool hci_setup_sync(struct hci_conn *conn, __u16 handle) return true; } -void hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, - u16 latency, u16 to_multiplier) +u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, + u16 to_multiplier) { - struct hci_cp_le_conn_update cp; struct hci_dev *hdev = conn->hdev; + struct hci_conn_params *params; + struct hci_cp_le_conn_update cp; - memset(&cp, 0, sizeof(cp)); + hci_dev_lock(hdev); + + params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type); + if (params) { + params->conn_min_interval = min; + params->conn_max_interval = max; + params->conn_latency = latency; + params->supervision_timeout = to_multiplier; + } + hci_dev_unlock(hdev); + + memset(&cp, 0, sizeof(cp)); cp.handle = cpu_to_le16(conn->handle); cp.conn_interval_min = cpu_to_le16(min); cp.conn_interval_max = cpu_to_le16(max); @@ -229,6 +241,11 @@ void hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, cp.max_ce_len = cpu_to_le16(0x0000); hci_send_cmd(hdev, HCI_OP_LE_CONN_UPDATE, sizeof(cp), &cp); + + if (params) + return 0x01; + + return 0x00; } void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand, @@ -270,28 +287,24 @@ void hci_sco_setup(struct hci_conn *conn, __u8 status) } } -static void hci_conn_disconnect(struct hci_conn *conn) -{ - __u8 reason = hci_proto_disconn_ind(conn); - - switch (conn->type) { - case AMP_LINK: - hci_amp_disconn(conn, reason); - break; - default: - hci_disconnect(conn, reason); - break; - } -} - static void hci_conn_timeout(struct work_struct *work) { struct hci_conn *conn = container_of(work, struct hci_conn, disc_work.work); + int refcnt = atomic_read(&conn->refcnt); BT_DBG("hcon %p state %s", conn, state_to_string(conn->state)); - if (atomic_read(&conn->refcnt)) + WARN_ON(refcnt < 0); + + /* FIXME: It was observed that in pairing failed scenario, refcnt + * drops below 0. Probably this is because l2cap_conn_del calls + * l2cap_chan_del for each channel, and inside l2cap_chan_del conn is + * dropped. After that loop hci_chan_del is called which also drops + * conn. For now make sure that ACL is alive if refcnt is higher then 0, + * otherwise drop it. + */ + if (refcnt > 0) return; switch (conn->state) { @@ -308,7 +321,31 @@ static void hci_conn_timeout(struct work_struct *work) break; case BT_CONFIG: case BT_CONNECTED: - hci_conn_disconnect(conn); + if (conn->type == AMP_LINK) { + hci_amp_disconn(conn); + } else { + __u8 reason = hci_proto_disconn_ind(conn); + + /* When we are master of an established connection + * and it enters the disconnect timeout, then go + * ahead and try to read the current clock offset. + * + * Processing of the result is done within the + * event handling and hci_clock_offset_evt function. + */ + if (conn->type == ACL_LINK && + conn->role == HCI_ROLE_MASTER) { + struct hci_dev *hdev = conn->hdev; + struct hci_cp_read_clock_offset cp; + + cp.handle = cpu_to_le16(conn->handle); + + hci_send_cmd(hdev, HCI_OP_READ_CLOCK_OFFSET, + sizeof(cp), &cp); + } + + hci_disconnect(conn, reason); + } break; default: conn->state = BT_CLOSED; @@ -325,9 +362,6 @@ static void hci_conn_idle(struct work_struct *work) BT_DBG("hcon %p mode %d", conn, conn->mode); - if (test_bit(HCI_RAW, &hdev->flags)) - return; - if (!lmp_sniff_capable(hdev) || !lmp_sniff_capable(conn)) return; @@ -367,19 +401,34 @@ static void le_conn_timeout(struct work_struct *work) { struct hci_conn *conn = container_of(work, struct hci_conn, le_conn_timeout.work); + struct hci_dev *hdev = conn->hdev; BT_DBG(""); + /* We could end up here due to having done directed advertising, + * so clean up the state if necessary. This should however only + * happen with broken hardware or if low duty cycle was used + * (which doesn't have a timeout of its own). + */ + if (test_bit(HCI_ADVERTISING, &hdev->dev_flags)) { + u8 enable = 0x00; + hci_send_cmd(hdev, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), + &enable); + hci_le_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT); + return; + } + hci_le_create_connection_cancel(conn); } -struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) +struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, + u8 role) { struct hci_conn *conn; BT_DBG("%s dst %pMR", hdev->name, dst); - conn = kzalloc(sizeof(struct hci_conn), GFP_KERNEL); + conn = kzalloc(sizeof(*conn), GFP_KERNEL); if (!conn) return NULL; @@ -387,20 +436,30 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) bacpy(&conn->src, &hdev->bdaddr); conn->hdev = hdev; conn->type = type; + conn->role = role; conn->mode = HCI_CM_ACTIVE; conn->state = BT_OPEN; conn->auth_type = HCI_AT_GENERAL_BONDING; conn->io_capability = hdev->io_capability; conn->remote_auth = 0xff; conn->key_type = 0xff; + conn->tx_power = HCI_TX_POWER_INVALID; + conn->max_tx_power = HCI_TX_POWER_INVALID; set_bit(HCI_CONN_POWER_SAVE, &conn->flags); conn->disc_timeout = HCI_DISCONN_TIMEOUT; + if (conn->role == HCI_ROLE_MASTER) + conn->out = true; + switch (type) { case ACL_LINK: conn->pkt_type = hdev->pkt_type & ACL_PTYPE_MASK; break; + case LE_LINK: + /* conn->src should reflect the local identity address */ + hci_copy_identity_address(hdev, &conn->src, &conn->src_type); + break; case SCO_LINK: if (lmp_esco_capable(hdev)) conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) | @@ -498,7 +557,6 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src) list_for_each_entry(d, &hci_dev_list, list) { if (!test_bit(HCI_UP, &d->flags) || - test_bit(HCI_RAW, &d->flags) || test_bit(HCI_USER_CHANNEL, &d->dev_flags) || d->dev_type != HCI_BREDR) continue; @@ -545,6 +603,11 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status) * favor of connection establishment, we should restart it. */ hci_update_background_scan(hdev); + + /* Re-enable advertising in case this was a failed connection + * attempt as a peripheral. + */ + mgmt_reenable_advertising(hdev); } static void create_le_conn_complete(struct hci_dev *hdev, u8 status) @@ -584,11 +647,6 @@ static void hci_req_add_le_create_conn(struct hci_request *req, if (hci_update_random_address(req, false, &own_addr_type)) return; - /* Save the address type used for this connnection attempt so we able - * to retrieve this information if we need it. - */ - conn->src_type = own_addr_type; - cp.scan_interval = cpu_to_le16(hdev->le_scan_interval); cp.scan_window = cpu_to_le16(hdev->le_scan_window); bacpy(&cp.peer_addr, &conn->dst); @@ -596,7 +654,8 @@ static void hci_req_add_le_create_conn(struct hci_request *req, cp.own_address_type = own_addr_type; cp.conn_interval_min = cpu_to_le16(conn->le_conn_min_interval); cp.conn_interval_max = cpu_to_le16(conn->le_conn_max_interval); - cp.supervision_timeout = cpu_to_le16(0x002a); + cp.conn_latency = cpu_to_le16(conn->le_conn_latency); + cp.supervision_timeout = cpu_to_le16(conn->le_supv_timeout); cp.min_ce_len = cpu_to_le16(0x0000); cp.max_ce_len = cpu_to_le16(0x0000); @@ -605,8 +664,45 @@ static void hci_req_add_le_create_conn(struct hci_request *req, conn->state = BT_CONNECT; } +static void hci_req_directed_advertising(struct hci_request *req, + struct hci_conn *conn) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_le_set_adv_param cp; + u8 own_addr_type; + u8 enable; + + /* Clear the HCI_LE_ADV bit temporarily so that the + * hci_update_random_address knows that it's safe to go ahead + * and write a new random address. The flag will be set back on + * as soon as the SET_ADV_ENABLE HCI command completes. + */ + clear_bit(HCI_LE_ADV, &hdev->dev_flags); + + /* Set require_privacy to false so that the remote device has a + * chance of identifying us. + */ + if (hci_update_random_address(req, false, &own_addr_type) < 0) + return; + + memset(&cp, 0, sizeof(cp)); + cp.type = LE_ADV_DIRECT_IND; + cp.own_address_type = own_addr_type; + cp.direct_addr_type = conn->dst_type; + bacpy(&cp.direct_addr, &conn->dst); + cp.channel_map = hdev->le_adv_channel_map; + + hci_req_add(req, HCI_OP_LE_SET_ADV_PARAM, sizeof(cp), &cp); + + enable = 0x01; + hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); + + conn->state = BT_CONNECT; +} + struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, - u8 dst_type, u8 sec_level, u8 auth_type) + u8 dst_type, u8 sec_level, u16 conn_timeout, + u8 role) { struct hci_conn_params *params; struct hci_conn *conn; @@ -614,9 +710,6 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, struct hci_request req; int err; - if (test_bit(HCI_ADVERTISING, &hdev->flags)) - return ERR_PTR(-ENOTSUPP); - /* Some devices send ATT messages as soon as the physical link is * established. To be able to handle these ATT messages, the user- * space first establishes the connection and then starts the pairing @@ -629,7 +722,6 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, dst); if (conn) { conn->pending_sec_level = sec_level; - conn->auth_type = auth_type; goto done; } @@ -659,29 +751,58 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, dst_type = ADDR_LE_DEV_RANDOM; } - conn = hci_conn_add(hdev, LE_LINK, dst); + conn = hci_conn_add(hdev, LE_LINK, dst, role); if (!conn) return ERR_PTR(-ENOMEM); conn->dst_type = dst_type; - - conn->out = true; - conn->link_mode |= HCI_LM_MASTER; conn->sec_level = BT_SECURITY_LOW; conn->pending_sec_level = sec_level; - conn->auth_type = auth_type; + conn->conn_timeout = conn_timeout; + + hci_req_init(&req, hdev); + + /* Disable advertising if we're active. For master role + * connections most controllers will refuse to connect if + * advertising is enabled, and for slave role connections we + * anyway have to disable it in order to start directed + * advertising. + */ + if (test_bit(HCI_LE_ADV, &hdev->dev_flags)) { + u8 enable = 0x00; + hci_req_add(&req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), + &enable); + } + + /* If requested to connect as slave use directed advertising */ + if (conn->role == HCI_ROLE_SLAVE) { + /* If we're active scanning most controllers are unable + * to initiate advertising. Simply reject the attempt. + */ + if (test_bit(HCI_LE_SCAN, &hdev->dev_flags) && + hdev->le_scan_type == LE_SCAN_ACTIVE) { + skb_queue_purge(&req.cmd_q); + hci_conn_del(conn); + return ERR_PTR(-EBUSY); + } + + hci_req_directed_advertising(&req, conn); + goto create_conn; + } params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type); if (params) { conn->le_conn_min_interval = params->conn_min_interval; conn->le_conn_max_interval = params->conn_max_interval; + conn->le_conn_latency = params->conn_latency; + conn->le_supv_timeout = params->supervision_timeout; } else { conn->le_conn_min_interval = hdev->le_conn_min_interval; conn->le_conn_max_interval = hdev->le_conn_max_interval; + conn->le_conn_latency = hdev->le_conn_latency; + conn->le_supv_timeout = hdev->le_supv_timeout; } - hci_req_init(&req, hdev); - /* If controller is scanning, we stop it since some controllers are * not able to scan and connect at the same time. Also set the * HCI_LE_SCAN_INTERRUPTED flag so that the command complete @@ -695,6 +816,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, hci_req_add_le_create_conn(&req, conn); +create_conn: err = hci_req_run(&req, create_le_conn_complete); if (err) { hci_conn_del(conn); @@ -712,11 +834,11 @@ struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, struct hci_conn *acl; if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) - return ERR_PTR(-ENOTSUPP); + return ERR_PTR(-EOPNOTSUPP); acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst); if (!acl) { - acl = hci_conn_add(hdev, ACL_LINK, dst); + acl = hci_conn_add(hdev, ACL_LINK, dst, HCI_ROLE_MASTER); if (!acl) return ERR_PTR(-ENOMEM); } @@ -745,7 +867,7 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst, sco = hci_conn_hash_lookup_ba(hdev, type, dst); if (!sco) { - sco = hci_conn_add(hdev, type, dst); + sco = hci_conn_add(hdev, type, dst, HCI_ROLE_MASTER); if (!sco) { hci_conn_drop(acl); return ERR_PTR(-ENOMEM); @@ -792,7 +914,8 @@ int hci_conn_check_link_mode(struct hci_conn *conn) return 0; } - if (hci_conn_ssp_enabled(conn) && !(conn->link_mode & HCI_LM_ENCRYPT)) + if (hci_conn_ssp_enabled(conn) && + !test_bit(HCI_CONN_ENCRYPT, &conn->flags)) return 0; return 1; @@ -808,7 +931,7 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) if (sec_level > conn->sec_level) conn->pending_sec_level = sec_level; - else if (conn->link_mode & HCI_LM_AUTH) + else if (test_bit(HCI_CONN_AUTH, &conn->flags)) return 1; /* Make sure we preserve an existing MITM requirement*/ @@ -819,14 +942,17 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->flags)) { struct hci_cp_auth_requested cp; - /* encrypt must be pending if auth is also pending */ - set_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags); - cp.handle = cpu_to_le16(conn->handle); hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED, sizeof(cp), &cp); - if (conn->key_type != 0xff) + + /* If we're already encrypted set the REAUTH_PEND flag, + * otherwise set the ENCRYPT_PEND. + */ + if (test_bit(HCI_CONN_ENCRYPT, &conn->flags)) set_bit(HCI_CONN_REAUTH_PEND, &conn->flags); + else + set_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags); } return 0; @@ -847,7 +973,8 @@ static void hci_conn_encrypt(struct hci_conn *conn) } /* Enable security */ -int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) +int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type, + bool initiator) { BT_DBG("hcon %p", conn); @@ -864,7 +991,7 @@ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) return 1; /* For other security levels we need the link key. */ - if (!(conn->link_mode & HCI_LM_AUTH)) + if (!test_bit(HCI_CONN_AUTH, &conn->flags)) goto auth; /* An authenticated FIPS approved combination key has sufficient @@ -900,11 +1027,14 @@ auth: if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags)) return 0; + if (initiator) + set_bit(HCI_CONN_AUTH_INITIATOR, &conn->flags); + if (!hci_conn_auth(conn, sec_level, auth_type)) return 0; encrypt: - if (conn->link_mode & HCI_LM_ENCRYPT) + if (test_bit(HCI_CONN_ENCRYPT, &conn->flags)) return 1; hci_conn_encrypt(conn); @@ -951,7 +1081,7 @@ int hci_conn_switch_role(struct hci_conn *conn, __u8 role) { BT_DBG("hcon %p", conn); - if (!role && conn->link_mode & HCI_LM_MASTER) + if (role == conn->role) return 1; if (!test_and_set_bit(HCI_CONN_RSWITCH_PEND, &conn->flags)) { @@ -972,9 +1102,6 @@ void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active) BT_DBG("hcon %p mode %d", conn, conn->mode); - if (test_bit(HCI_RAW, &hdev->flags)) - return; - if (conn->mode != HCI_CM_SNIFF) goto timer; @@ -1025,6 +1152,28 @@ void hci_conn_check_pending(struct hci_dev *hdev) hci_dev_unlock(hdev); } +static u32 get_link_mode(struct hci_conn *conn) +{ + u32 link_mode = 0; + + if (conn->role == HCI_ROLE_MASTER) + link_mode |= HCI_LM_MASTER; + + if (test_bit(HCI_CONN_ENCRYPT, &conn->flags)) + link_mode |= HCI_LM_ENCRYPT; + + if (test_bit(HCI_CONN_AUTH, &conn->flags)) + link_mode |= HCI_LM_AUTH; + + if (test_bit(HCI_CONN_SECURE, &conn->flags)) + link_mode |= HCI_LM_SECURE; + + if (test_bit(HCI_CONN_FIPS, &conn->flags)) + link_mode |= HCI_LM_FIPS; + + return link_mode; +} + int hci_get_conn_list(void __user *arg) { struct hci_conn *c; @@ -1060,7 +1209,7 @@ int hci_get_conn_list(void __user *arg) (ci + n)->type = c->type; (ci + n)->out = c->out; (ci + n)->state = c->state; - (ci + n)->link_mode = c->link_mode; + (ci + n)->link_mode = get_link_mode(c); if (++n >= req.conn_num) break; } @@ -1096,7 +1245,7 @@ int hci_get_conn_info(struct hci_dev *hdev, void __user *arg) ci.type = conn->type; ci.out = conn->out; ci.state = conn->state; - ci.link_mode = conn->link_mode; + ci.link_mode = get_link_mode(conn); } hci_dev_unlock(hdev); @@ -1133,7 +1282,7 @@ struct hci_chan *hci_chan_create(struct hci_conn *conn) BT_DBG("%s hcon %p", hdev->name, conn); - chan = kzalloc(sizeof(struct hci_chan), GFP_KERNEL); + chan = kzalloc(sizeof(*chan), GFP_KERNEL); if (!chan) return NULL; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 1c6ffaa8902f..c32d361c0cf7 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -34,6 +34,8 @@ #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> +#include <net/bluetooth/l2cap.h> +#include <net/bluetooth/mgmt.h> #include "smp.h" @@ -52,6 +54,15 @@ DEFINE_RWLOCK(hci_cb_list_lock); /* HCI ID Numbering */ static DEFINE_IDA(hci_index_ida); +/* ----- HCI requests ----- */ + +#define HCI_REQ_DONE 0 +#define HCI_REQ_PEND 1 +#define HCI_REQ_CANCELED 2 + +#define hci_req_lock(d) mutex_lock(&d->req_lock) +#define hci_req_unlock(d) mutex_unlock(&d->req_lock) + /* ---- HCI notifications ---- */ static void hci_notify(struct hci_dev *hdev, int event) @@ -67,7 +78,7 @@ static ssize_t dut_mode_read(struct file *file, char __user *user_buf, struct hci_dev *hdev = file->private_data; char buf[3]; - buf[0] = test_bit(HCI_DUT_MODE, &hdev->dev_flags) ? 'Y': 'N'; + buf[0] = test_bit(HCI_DUT_MODE, &hdev->dbg_flags) ? 'Y': 'N'; buf[1] = '\n'; buf[2] = '\0'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); @@ -93,7 +104,7 @@ static ssize_t dut_mode_write(struct file *file, const char __user *user_buf, if (strtobool(buf, &enable)) return -EINVAL; - if (enable == test_bit(HCI_DUT_MODE, &hdev->dev_flags)) + if (enable == test_bit(HCI_DUT_MODE, &hdev->dbg_flags)) return -EALREADY; hci_req_lock(hdev); @@ -114,7 +125,7 @@ static ssize_t dut_mode_write(struct file *file, const char __user *user_buf, if (err < 0) return err; - change_bit(HCI_DUT_MODE, &hdev->dev_flags); + change_bit(HCI_DUT_MODE, &hdev->dbg_flags); return count; } @@ -189,6 +200,31 @@ static const struct file_operations blacklist_fops = { .release = single_release, }; +static int whitelist_show(struct seq_file *f, void *p) +{ + struct hci_dev *hdev = f->private; + struct bdaddr_list *b; + + hci_dev_lock(hdev); + list_for_each_entry(b, &hdev->whitelist, list) + seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type); + hci_dev_unlock(hdev); + + return 0; +} + +static int whitelist_open(struct inode *inode, struct file *file) +{ + return single_open(file, whitelist_show, inode->i_private); +} + +static const struct file_operations whitelist_fops = { + .open = whitelist_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static int uuids_show(struct seq_file *f, void *p) { struct hci_dev *hdev = f->private; @@ -351,62 +387,13 @@ static int auto_accept_delay_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(auto_accept_delay_fops, auto_accept_delay_get, auto_accept_delay_set, "%llu\n"); -static int ssp_debug_mode_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - struct sk_buff *skb; - __u8 mode; - int err; - - if (val != 0 && val != 1) - return -EINVAL; - - if (!test_bit(HCI_UP, &hdev->flags)) - return -ENETDOWN; - - hci_req_lock(hdev); - mode = val; - skb = __hci_cmd_sync(hdev, HCI_OP_WRITE_SSP_DEBUG_MODE, sizeof(mode), - &mode, HCI_CMD_TIMEOUT); - hci_req_unlock(hdev); - - if (IS_ERR(skb)) - return PTR_ERR(skb); - - err = -bt_to_errno(skb->data[0]); - kfree_skb(skb); - - if (err < 0) - return err; - - hci_dev_lock(hdev); - hdev->ssp_debug_mode = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int ssp_debug_mode_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->ssp_debug_mode; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(ssp_debug_mode_fops, ssp_debug_mode_get, - ssp_debug_mode_set, "%llu\n"); - static ssize_t force_sc_support_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct hci_dev *hdev = file->private_data; char buf[3]; - buf[0] = test_bit(HCI_FORCE_SC, &hdev->dev_flags) ? 'Y': 'N'; + buf[0] = test_bit(HCI_FORCE_SC, &hdev->dbg_flags) ? 'Y': 'N'; buf[1] = '\n'; buf[2] = '\0'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); @@ -431,10 +418,10 @@ static ssize_t force_sc_support_write(struct file *file, if (strtobool(buf, &enable)) return -EINVAL; - if (enable == test_bit(HCI_FORCE_SC, &hdev->dev_flags)) + if (enable == test_bit(HCI_FORCE_SC, &hdev->dbg_flags)) return -EALREADY; - change_bit(HCI_FORCE_SC, &hdev->dev_flags); + change_bit(HCI_FORCE_SC, &hdev->dbg_flags); return count; } @@ -579,6 +566,62 @@ static int sniff_max_interval_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(sniff_max_interval_fops, sniff_max_interval_get, sniff_max_interval_set, "%llu\n"); +static int conn_info_min_age_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val == 0 || val > hdev->conn_info_max_age) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->conn_info_min_age = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int conn_info_min_age_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->conn_info_min_age; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(conn_info_min_age_fops, conn_info_min_age_get, + conn_info_min_age_set, "%llu\n"); + +static int conn_info_max_age_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val == 0 || val < hdev->conn_info_min_age) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->conn_info_max_age = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int conn_info_max_age_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->conn_info_max_age; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(conn_info_max_age_fops, conn_info_max_age_get, + conn_info_max_age_set, "%llu\n"); + static int identity_show(struct seq_file *f, void *p) { struct hci_dev *hdev = f->private; @@ -662,7 +705,7 @@ static ssize_t force_static_address_read(struct file *file, struct hci_dev *hdev = file->private_data; char buf[3]; - buf[0] = test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dev_flags) ? 'Y': 'N'; + buf[0] = test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) ? 'Y': 'N'; buf[1] = '\n'; buf[2] = '\0'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); @@ -687,10 +730,10 @@ static ssize_t force_static_address_write(struct file *file, if (strtobool(buf, &enable)) return -EINVAL; - if (enable == test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dev_flags)) + if (enable == test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags)) return -EALREADY; - change_bit(HCI_FORCE_STATIC_ADDR, &hdev->dev_flags); + change_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags); return count; } @@ -843,182 +886,169 @@ static int conn_max_interval_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(conn_max_interval_fops, conn_max_interval_get, conn_max_interval_set, "%llu\n"); -static int adv_channel_map_set(void *data, u64 val) +static int conn_latency_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val < 0x01 || val > 0x07) + if (val > 0x01f3) return -EINVAL; hci_dev_lock(hdev); - hdev->le_adv_channel_map = val; + hdev->le_conn_latency = val; hci_dev_unlock(hdev); return 0; } -static int adv_channel_map_get(void *data, u64 *val) +static int conn_latency_get(void *data, u64 *val) { struct hci_dev *hdev = data; hci_dev_lock(hdev); - *val = hdev->le_adv_channel_map; + *val = hdev->le_conn_latency; hci_dev_unlock(hdev); return 0; } -DEFINE_SIMPLE_ATTRIBUTE(adv_channel_map_fops, adv_channel_map_get, - adv_channel_map_set, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(conn_latency_fops, conn_latency_get, + conn_latency_set, "%llu\n"); -static ssize_t lowpan_read(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) +static int supervision_timeout_set(void *data, u64 val) { - struct hci_dev *hdev = file->private_data; - char buf[3]; + struct hci_dev *hdev = data; - buf[0] = test_bit(HCI_6LOWPAN_ENABLED, &hdev->dev_flags) ? 'Y' : 'N'; - buf[1] = '\n'; - buf[2] = '\0'; - return simple_read_from_buffer(user_buf, count, ppos, buf, 2); + if (val < 0x000a || val > 0x0c80) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->le_supv_timeout = val; + hci_dev_unlock(hdev); + + return 0; } -static ssize_t lowpan_write(struct file *fp, const char __user *user_buffer, - size_t count, loff_t *position) +static int supervision_timeout_get(void *data, u64 *val) { - struct hci_dev *hdev = fp->private_data; - bool enable; - char buf[32]; - size_t buf_size = min(count, (sizeof(buf)-1)); + struct hci_dev *hdev = data; - if (copy_from_user(buf, user_buffer, buf_size)) - return -EFAULT; + hci_dev_lock(hdev); + *val = hdev->le_supv_timeout; + hci_dev_unlock(hdev); - buf[buf_size] = '\0'; + return 0; +} - if (strtobool(buf, &enable) < 0) - return -EINVAL; +DEFINE_SIMPLE_ATTRIBUTE(supervision_timeout_fops, supervision_timeout_get, + supervision_timeout_set, "%llu\n"); - if (enable == test_bit(HCI_6LOWPAN_ENABLED, &hdev->dev_flags)) - return -EALREADY; +static int adv_channel_map_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; - change_bit(HCI_6LOWPAN_ENABLED, &hdev->dev_flags); + if (val < 0x01 || val > 0x07) + return -EINVAL; - return count; -} + hci_dev_lock(hdev); + hdev->le_adv_channel_map = val; + hci_dev_unlock(hdev); -static const struct file_operations lowpan_debugfs_fops = { - .open = simple_open, - .read = lowpan_read, - .write = lowpan_write, - .llseek = default_llseek, -}; + return 0; +} -static int le_auto_conn_show(struct seq_file *sf, void *ptr) +static int adv_channel_map_get(void *data, u64 *val) { - struct hci_dev *hdev = sf->private; - struct hci_conn_params *p; + struct hci_dev *hdev = data; hci_dev_lock(hdev); + *val = hdev->le_adv_channel_map; + hci_dev_unlock(hdev); - list_for_each_entry(p, &hdev->le_conn_params, list) { - seq_printf(sf, "%pMR %u %u\n", &p->addr, p->addr_type, - p->auto_connect); - } + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(adv_channel_map_fops, adv_channel_map_get, + adv_channel_map_set, "%llu\n"); + +static int adv_min_interval_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval) + return -EINVAL; + hci_dev_lock(hdev); + hdev->le_adv_min_interval = val; hci_dev_unlock(hdev); return 0; } -static int le_auto_conn_open(struct inode *inode, struct file *file) +static int adv_min_interval_get(void *data, u64 *val) { - return single_open(file, le_auto_conn_show, inode->i_private); + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->le_adv_min_interval; + hci_dev_unlock(hdev); + + return 0; } -static ssize_t le_auto_conn_write(struct file *file, const char __user *data, - size_t count, loff_t *offset) -{ - struct seq_file *sf = file->private_data; - struct hci_dev *hdev = sf->private; - u8 auto_connect = 0; - bdaddr_t addr; - u8 addr_type; - char *buf; - int err = 0; - int n; +DEFINE_SIMPLE_ATTRIBUTE(adv_min_interval_fops, adv_min_interval_get, + adv_min_interval_set, "%llu\n"); - /* Don't allow partial write */ - if (*offset != 0) - return -EINVAL; +static int adv_max_interval_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; - if (count < 3) + if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval) return -EINVAL; - buf = kzalloc(count, GFP_KERNEL); - if (!buf) - return -ENOMEM; + hci_dev_lock(hdev); + hdev->le_adv_max_interval = val; + hci_dev_unlock(hdev); - if (copy_from_user(buf, data, count)) { - err = -EFAULT; - goto done; - } + return 0; +} - if (memcmp(buf, "add", 3) == 0) { - n = sscanf(&buf[4], "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx %hhu %hhu", - &addr.b[5], &addr.b[4], &addr.b[3], &addr.b[2], - &addr.b[1], &addr.b[0], &addr_type, - &auto_connect); +static int adv_max_interval_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; - if (n < 7) { - err = -EINVAL; - goto done; - } + hci_dev_lock(hdev); + *val = hdev->le_adv_max_interval; + hci_dev_unlock(hdev); - hci_dev_lock(hdev); - err = hci_conn_params_add(hdev, &addr, addr_type, auto_connect, - hdev->le_conn_min_interval, - hdev->le_conn_max_interval); - hci_dev_unlock(hdev); + return 0; +} - if (err) - goto done; - } else if (memcmp(buf, "del", 3) == 0) { - n = sscanf(&buf[4], "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx %hhu", - &addr.b[5], &addr.b[4], &addr.b[3], &addr.b[2], - &addr.b[1], &addr.b[0], &addr_type); +DEFINE_SIMPLE_ATTRIBUTE(adv_max_interval_fops, adv_max_interval_get, + adv_max_interval_set, "%llu\n"); - if (n < 7) { - err = -EINVAL; - goto done; - } +static int device_list_show(struct seq_file *f, void *ptr) +{ + struct hci_dev *hdev = f->private; + struct hci_conn_params *p; - hci_dev_lock(hdev); - hci_conn_params_del(hdev, &addr, addr_type); - hci_dev_unlock(hdev); - } else if (memcmp(buf, "clr", 3) == 0) { - hci_dev_lock(hdev); - hci_conn_params_clear(hdev); - hci_pend_le_conns_clear(hdev); - hci_update_background_scan(hdev); - hci_dev_unlock(hdev); - } else { - err = -EINVAL; + hci_dev_lock(hdev); + list_for_each_entry(p, &hdev->le_conn_params, list) { + seq_printf(f, "%pMR %u %u\n", &p->addr, p->addr_type, + p->auto_connect); } + hci_dev_unlock(hdev); -done: - kfree(buf); + return 0; +} - if (err) - return err; - else - return count; +static int device_list_open(struct inode *inode, struct file *file) +{ + return single_open(file, device_list_show, inode->i_private); } -static const struct file_operations le_auto_conn_fops = { - .open = le_auto_conn_open, +static const struct file_operations device_list_fops = { + .open = device_list_open, .read = seq_read, - .write = le_auto_conn_write, .llseek = seq_lseek, .release = single_release, }; @@ -1374,9 +1404,6 @@ static void le_setup(struct hci_request *req) /* Read LE Supported States */ hci_req_add(req, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL); - /* Read LE Advertising Channel TX Power */ - hci_req_add(req, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL); - /* Read LE White List Size */ hci_req_add(req, HCI_OP_LE_READ_WHITE_LIST_SIZE, 0, NULL); @@ -1451,14 +1478,17 @@ static void hci_setup_event_mask(struct hci_request *req) /* Use a different default for LE-only devices */ memset(events, 0, sizeof(events)); events[0] |= 0x10; /* Disconnection Complete */ - events[0] |= 0x80; /* Encryption Change */ events[1] |= 0x08; /* Read Remote Version Information Complete */ events[1] |= 0x20; /* Command Complete */ events[1] |= 0x40; /* Command Status */ events[1] |= 0x80; /* Hardware Error */ events[2] |= 0x04; /* Number of Completed Packets */ events[3] |= 0x02; /* Data Buffer Overflow */ - events[5] |= 0x80; /* Encryption Key Refresh Complete */ + + if (hdev->le_features[0] & HCI_LE_ENCRYPTION) { + events[0] |= 0x80; /* Encryption Change */ + events[5] |= 0x80; /* Encryption Key Refresh Complete */ + } } if (lmp_inq_rssi_capable(hdev)) @@ -1497,13 +1527,6 @@ static void hci_setup_event_mask(struct hci_request *req) events[7] |= 0x20; /* LE Meta-Event */ hci_req_add(req, HCI_OP_SET_EVENT_MASK, sizeof(events), events); - - if (lmp_le_capable(hdev)) { - memset(events, 0, sizeof(events)); - events[0] = 0x1f; - hci_req_add(req, HCI_OP_LE_SET_EVENT_MASK, - sizeof(events), events); - } } static void hci_init2_req(struct hci_request *req, unsigned long opt) @@ -1518,8 +1541,6 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt) if (lmp_le_capable(hdev)) le_setup(req); - hci_setup_event_mask(req); - /* AVM Berlin (31), aka "BlueFRITZ!", doesn't support the read * local supported commands HCI command. */ @@ -1602,7 +1623,7 @@ static void hci_set_le_support(struct hci_request *req) if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) { cp.le = 0x01; - cp.simul = lmp_le_br_capable(hdev); + cp.simul = 0x00; } if (cp.le != lmp_host_le_capable(hdev)) @@ -1636,7 +1657,7 @@ static void hci_set_event_mask_page_2(struct hci_request *req) } /* Enable Authenticated Payload Timeout Expired event if supported */ - if (lmp_ping_capable(hdev)) + if (lmp_ping_capable(hdev) || hdev->le_features[0] & HCI_LE_PING) events[2] |= 0x80; hci_req_add(req, HCI_OP_SET_EVENT_MASK_PAGE_2, sizeof(events), events); @@ -1647,6 +1668,8 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt) struct hci_dev *hdev = req->hdev; u8 p; + hci_setup_event_mask(req); + /* Some Broadcom based Bluetooth controllers do not support the * Delete Stored Link Key command. They are clearly indicating its * absence in the bit mask of supported commands. @@ -1673,8 +1696,33 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt) if (hdev->commands[5] & 0x10) hci_setup_link_policy(req); - if (lmp_le_capable(hdev)) + if (lmp_le_capable(hdev)) { + u8 events[8]; + + memset(events, 0, sizeof(events)); + events[0] = 0x0f; + + if (hdev->le_features[0] & HCI_LE_ENCRYPTION) + events[0] |= 0x10; /* LE Long Term Key Request */ + + /* If controller supports the Connection Parameters Request + * Link Layer Procedure, enable the corresponding event. + */ + if (hdev->le_features[0] & HCI_LE_CONN_PARAM_REQ_PROC) + events[0] |= 0x20; /* LE Remote Connection + * Parameter Request + */ + + hci_req_add(req, HCI_OP_LE_SET_EVENT_MASK, sizeof(events), + events); + + if (hdev->commands[25] & 0x40) { + /* Read LE Advertising Channel TX Power */ + hci_req_add(req, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL); + } + hci_set_le_support(req); + } /* Read features beyond page 1 if available */ for (p = 2; p < HCI_MAX_PAGES && p <= hdev->max_page; p++) { @@ -1694,13 +1742,21 @@ static void hci_init4_req(struct hci_request *req, unsigned long opt) if (hdev->commands[22] & 0x04) hci_set_event_mask_page_2(req); + /* Read local codec list if the HCI command is supported */ + if (hdev->commands[29] & 0x20) + hci_req_add(req, HCI_OP_READ_LOCAL_CODECS, 0, NULL); + + /* Get MWS transport configuration if the HCI command is supported */ + if (hdev->commands[30] & 0x08) + hci_req_add(req, HCI_OP_GET_MWS_TRANSPORT_CONFIG, 0, NULL); + /* Check for Synchronization Train support */ if (lmp_sync_train_capable(hdev)) hci_req_add(req, HCI_OP_READ_SYNC_TRAIN_PARAMS, 0, NULL); /* Enable Secure Connections if supported and configured */ if ((lmp_sc_capable(hdev) || - test_bit(HCI_FORCE_SC, &hdev->dev_flags)) && + test_bit(HCI_FORCE_SC, &hdev->dbg_flags)) && test_bit(HCI_SC_ENABLED, &hdev->dev_flags)) { u8 support = 0x01; hci_req_add(req, HCI_OP_WRITE_SC_SUPPORT, @@ -1757,8 +1813,15 @@ static int __hci_init(struct hci_dev *hdev) debugfs_create_u16("hci_revision", 0444, hdev->debugfs, &hdev->hci_rev); debugfs_create_file("blacklist", 0444, hdev->debugfs, hdev, &blacklist_fops); + debugfs_create_file("whitelist", 0444, hdev->debugfs, hdev, + &whitelist_fops); debugfs_create_file("uuids", 0444, hdev->debugfs, hdev, &uuids_fops); + debugfs_create_file("conn_info_min_age", 0644, hdev->debugfs, hdev, + &conn_info_min_age_fops); + debugfs_create_file("conn_info_max_age", 0644, hdev->debugfs, hdev, + &conn_info_max_age_fops); + if (lmp_bredr_capable(hdev)) { debugfs_create_file("inquiry_cache", 0444, hdev->debugfs, hdev, &inquiry_cache_fops); @@ -1773,8 +1836,6 @@ static int __hci_init(struct hci_dev *hdev) if (lmp_ssp_capable(hdev)) { debugfs_create_file("auto_accept_delay", 0644, hdev->debugfs, hdev, &auto_accept_delay_fops); - debugfs_create_file("ssp_debug_mode", 0644, hdev->debugfs, - hdev, &ssp_debug_mode_fops); debugfs_create_file("force_sc_support", 0644, hdev->debugfs, hdev, &force_sc_support_fops); debugfs_create_file("sc_only_mode", 0444, hdev->debugfs, @@ -1822,17 +1883,58 @@ static int __hci_init(struct hci_dev *hdev) hdev, &conn_min_interval_fops); debugfs_create_file("conn_max_interval", 0644, hdev->debugfs, hdev, &conn_max_interval_fops); + debugfs_create_file("conn_latency", 0644, hdev->debugfs, + hdev, &conn_latency_fops); + debugfs_create_file("supervision_timeout", 0644, hdev->debugfs, + hdev, &supervision_timeout_fops); debugfs_create_file("adv_channel_map", 0644, hdev->debugfs, hdev, &adv_channel_map_fops); - debugfs_create_file("6lowpan", 0644, hdev->debugfs, hdev, - &lowpan_debugfs_fops); - debugfs_create_file("le_auto_conn", 0644, hdev->debugfs, hdev, - &le_auto_conn_fops); + debugfs_create_file("adv_min_interval", 0644, hdev->debugfs, + hdev, &adv_min_interval_fops); + debugfs_create_file("adv_max_interval", 0644, hdev->debugfs, + hdev, &adv_max_interval_fops); + debugfs_create_file("device_list", 0444, hdev->debugfs, hdev, + &device_list_fops); + debugfs_create_u16("discov_interleaved_timeout", 0644, + hdev->debugfs, + &hdev->discov_interleaved_timeout); } return 0; } +static void hci_init0_req(struct hci_request *req, unsigned long opt) +{ + struct hci_dev *hdev = req->hdev; + + BT_DBG("%s %ld", hdev->name, opt); + + /* Reset */ + if (!test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) + hci_reset_req(req, 0); + + /* Read Local Version */ + hci_req_add(req, HCI_OP_READ_LOCAL_VERSION, 0, NULL); + + /* Read BD Address */ + if (hdev->set_bdaddr) + hci_req_add(req, HCI_OP_READ_BD_ADDR, 0, NULL); +} + +static int __hci_unconf_init(struct hci_dev *hdev) +{ + int err; + + if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) + return 0; + + err = __hci_req_sync(hdev, hci_init0_req, 0, HCI_INIT_TIMEOUT); + if (err < 0) + return err; + + return 0; +} + static void hci_scan_req(struct hci_request *req, unsigned long opt) { __u8 scan = opt; @@ -1913,16 +2015,20 @@ bool hci_discovery_active(struct hci_dev *hdev) void hci_discovery_set_state(struct hci_dev *hdev, int state) { + int old_state = hdev->discovery.state; + BT_DBG("%s state %u -> %u", hdev->name, hdev->discovery.state, state); - if (hdev->discovery.state == state) + if (old_state == state) return; + hdev->discovery.state = state; + switch (state) { case DISCOVERY_STOPPED: hci_update_background_scan(hdev); - if (hdev->discovery.state != DISCOVERY_STARTING) + if (old_state != DISCOVERY_STARTING) mgmt_discovering(hdev, 0); break; case DISCOVERY_STARTING: @@ -1935,8 +2041,6 @@ void hci_discovery_set_state(struct hci_dev *hdev, int state) case DISCOVERY_STOPPING: break; } - - hdev->discovery.state = state; } void hci_inquiry_cache_flush(struct hci_dev *hdev) @@ -2023,23 +2127,24 @@ void hci_inquiry_cache_update_resolve(struct hci_dev *hdev, list_add(&ie->list, pos); } -bool hci_inquiry_cache_update(struct hci_dev *hdev, struct inquiry_data *data, - bool name_known, bool *ssp) +u32 hci_inquiry_cache_update(struct hci_dev *hdev, struct inquiry_data *data, + bool name_known) { struct discovery_state *cache = &hdev->discovery; struct inquiry_entry *ie; + u32 flags = 0; BT_DBG("cache %p, %pMR", cache, &data->bdaddr); hci_remove_remote_oob_data(hdev, &data->bdaddr); - if (ssp) - *ssp = data->ssp_mode; + if (!data->ssp_mode) + flags |= MGMT_DEV_FOUND_LEGACY_PAIRING; ie = hci_inquiry_cache_lookup(hdev, &data->bdaddr); if (ie) { - if (ie->data.ssp_mode && ssp) - *ssp = true; + if (!ie->data.ssp_mode) + flags |= MGMT_DEV_FOUND_LEGACY_PAIRING; if (ie->name_state == NAME_NEEDED && data->rssi != ie->data.rssi) { @@ -2051,9 +2156,11 @@ bool hci_inquiry_cache_update(struct hci_dev *hdev, struct inquiry_data *data, } /* Entry not in the cache. Add new one. */ - ie = kzalloc(sizeof(struct inquiry_entry), GFP_ATOMIC); - if (!ie) - return false; + ie = kzalloc(sizeof(*ie), GFP_KERNEL); + if (!ie) { + flags |= MGMT_DEV_FOUND_CONFIRM_NAME; + goto done; + } list_add(&ie->all, &cache->all); @@ -2076,9 +2183,10 @@ update: cache->timestamp = jiffies; if (ie->name_state == NAME_NOT_KNOWN) - return false; + flags |= MGMT_DEV_FOUND_CONFIRM_NAME; - return true; +done: + return flags; } static int inquiry_cache_dump(struct hci_dev *hdev, int num, __u8 *buf) @@ -2127,12 +2235,6 @@ static void hci_inq_req(struct hci_request *req, unsigned long opt) hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp); } -static int wait_inquiry(void *word) -{ - schedule(); - return signal_pending(current); -} - int hci_inquiry(void __user *arg) { __u8 __user *ptr = arg; @@ -2154,6 +2256,11 @@ int hci_inquiry(void __user *arg) goto done; } + if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) { + err = -EOPNOTSUPP; + goto done; + } + if (hdev->dev_type != HCI_BREDR) { err = -EOPNOTSUPP; goto done; @@ -2183,7 +2290,7 @@ int hci_inquiry(void __user *arg) /* Wait until Inquiry procedure finishes (HCI_INQUIRY flag is * cleared). If it is interrupted by a signal, return -EINTR. */ - if (wait_on_bit(&hdev->flags, HCI_INQUIRY, wait_inquiry, + if (wait_on_bit(&hdev->flags, HCI_INQUIRY, TASK_INTERRUPTIBLE)) return -EINTR; } @@ -2236,7 +2343,8 @@ static int hci_dev_do_open(struct hci_dev *hdev) goto done; } - if (!test_bit(HCI_SETUP, &hdev->dev_flags)) { + if (!test_bit(HCI_SETUP, &hdev->dev_flags) && + !test_bit(HCI_CONFIG, &hdev->dev_flags)) { /* Check for rfkill but allow the HCI setup stage to * proceed (which in itself doesn't cause any RF activity). */ @@ -2279,14 +2387,47 @@ static int hci_dev_do_open(struct hci_dev *hdev) atomic_set(&hdev->cmd_cnt, 1); set_bit(HCI_INIT, &hdev->flags); - if (hdev->setup && test_bit(HCI_SETUP, &hdev->dev_flags)) - ret = hdev->setup(hdev); + if (test_bit(HCI_SETUP, &hdev->dev_flags)) { + if (hdev->setup) + ret = hdev->setup(hdev); - if (!ret) { - if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) - set_bit(HCI_RAW, &hdev->flags); + /* The transport driver can set these quirks before + * creating the HCI device or in its setup callback. + * + * In case any of them is set, the controller has to + * start up as unconfigured. + */ + if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) || + test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks)) + set_bit(HCI_UNCONFIGURED, &hdev->dev_flags); + + /* For an unconfigured controller it is required to + * read at least the version information provided by + * the Read Local Version Information command. + * + * If the set_bdaddr driver callback is provided, then + * also the original Bluetooth public device address + * will be read using the Read BD Address command. + */ + if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) + ret = __hci_unconf_init(hdev); + } - if (!test_bit(HCI_RAW, &hdev->flags) && + if (test_bit(HCI_CONFIG, &hdev->dev_flags)) { + /* If public address change is configured, ensure that + * the address gets programmed. If the driver does not + * support changing the public address, fail the power + * on procedure. + */ + if (bacmp(&hdev->public_addr, BDADDR_ANY) && + hdev->set_bdaddr) + ret = hdev->set_bdaddr(hdev, &hdev->public_addr); + else + ret = -EADDRNOTAVAIL; + } + + if (!ret) { + if (!test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) && !test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) ret = __hci_init(hdev); } @@ -2299,6 +2440,8 @@ static int hci_dev_do_open(struct hci_dev *hdev) set_bit(HCI_UP, &hdev->flags); hci_notify(hdev, HCI_DEV_UP); if (!test_bit(HCI_SETUP, &hdev->dev_flags) && + !test_bit(HCI_CONFIG, &hdev->dev_flags) && + !test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) && !test_bit(HCI_USER_CHANNEL, &hdev->dev_flags) && hdev->dev_type == HCI_BREDR) { hci_dev_lock(hdev); @@ -2323,7 +2466,7 @@ static int hci_dev_do_open(struct hci_dev *hdev) } hdev->close(hdev); - hdev->flags = 0; + hdev->flags &= BIT(HCI_RAW); } done: @@ -2342,6 +2485,21 @@ int hci_dev_open(__u16 dev) if (!hdev) return -ENODEV; + /* Devices that are marked as unconfigured can only be powered + * up as user channel. Trying to bring them up as normal devices + * will result into a failure. Only user channel operation is + * possible. + * + * When this function is called for a user channel, the flag + * HCI_USER_CHANNEL will be set first before attempting to + * open the device. + */ + if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) && + !test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { + err = -EOPNOTSUPP; + goto done; + } + /* We need to ensure that no other power on/off work is pending * before proceeding to call hci_dev_do_open. This is * particularly important if the setup procedure has not yet @@ -2356,13 +2514,34 @@ int hci_dev_open(__u16 dev) */ flush_workqueue(hdev->req_workqueue); + /* For controllers not using the management interface and that + * are brought up using legacy ioctl, set the HCI_BONDABLE bit + * so that pairing works for them. Once the management interface + * is in use this bit will be cleared again and userspace has + * to explicitly enable it. + */ + if (!test_bit(HCI_USER_CHANNEL, &hdev->dev_flags) && + !test_bit(HCI_MGMT, &hdev->dev_flags)) + set_bit(HCI_BONDABLE, &hdev->dev_flags); + err = hci_dev_do_open(hdev); +done: hci_dev_put(hdev); - return err; } +/* This function requires the caller holds hdev->lock */ +static void hci_pend_le_actions_clear(struct hci_dev *hdev) +{ + struct hci_conn_params *p; + + list_for_each_entry(p, &hdev->le_conn_params, list) + list_del_init(&p->action); + + BT_DBG("All LE pending actions cleared"); +} + static int hci_dev_do_close(struct hci_dev *hdev) { BT_DBG("%s %p", hdev->name, hdev); @@ -2373,7 +2552,7 @@ static int hci_dev_do_close(struct hci_dev *hdev) hci_req_lock(hdev); if (!test_and_clear_bit(HCI_UP, &hdev->flags)) { - del_timer_sync(&hdev->cmd_timer); + cancel_delayed_work_sync(&hdev->cmd_timer); hci_req_unlock(hdev); return 0; } @@ -2400,7 +2579,7 @@ static int hci_dev_do_close(struct hci_dev *hdev) hci_dev_lock(hdev); hci_inquiry_cache_flush(hdev); hci_conn_hash_flush(hdev); - hci_pend_le_conns_clear(hdev); + hci_pend_le_actions_clear(hdev); hci_dev_unlock(hdev); hci_notify(hdev, HCI_DEV_DOWN); @@ -2411,8 +2590,8 @@ static int hci_dev_do_close(struct hci_dev *hdev) /* Reset device */ skb_queue_purge(&hdev->cmd_q); atomic_set(&hdev->cmd_cnt, 1); - if (!test_bit(HCI_RAW, &hdev->flags) && - !test_bit(HCI_AUTO_OFF, &hdev->dev_flags) && + if (!test_bit(HCI_AUTO_OFF, &hdev->dev_flags) && + !test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) && test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) { set_bit(HCI_INIT, &hdev->flags); __hci_req_sync(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT); @@ -2429,7 +2608,7 @@ static int hci_dev_do_close(struct hci_dev *hdev) /* Drop last sent command */ if (hdev->sent_cmd) { - del_timer_sync(&hdev->cmd_timer); + cancel_delayed_work_sync(&hdev->cmd_timer); kfree_skb(hdev->sent_cmd); hdev->sent_cmd = NULL; } @@ -2442,7 +2621,7 @@ static int hci_dev_do_close(struct hci_dev *hdev) hdev->close(hdev); /* Clear flags */ - hdev->flags = 0; + hdev->flags &= BIT(HCI_RAW); hdev->dev_flags &= ~HCI_PERSISTENT_MASK; if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) { @@ -2511,6 +2690,11 @@ int hci_dev_reset(__u16 dev) goto done; } + if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) { + ret = -EOPNOTSUPP; + goto done; + } + /* Drop queues */ skb_queue_purge(&hdev->rx_q); skb_queue_purge(&hdev->cmd_q); @@ -2526,8 +2710,7 @@ int hci_dev_reset(__u16 dev) atomic_set(&hdev->cmd_cnt, 1); hdev->acl_cnt = 0; hdev->sco_cnt = 0; hdev->le_cnt = 0; - if (!test_bit(HCI_RAW, &hdev->flags)) - ret = __hci_req_sync(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT); + ret = __hci_req_sync(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT); done: hci_req_unlock(hdev); @@ -2549,6 +2732,11 @@ int hci_dev_reset_stat(__u16 dev) goto done; } + if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) { + ret = -EOPNOTSUPP; + goto done; + } + memset(&hdev->stat, 0, sizeof(struct hci_dev_stats)); done: @@ -2556,6 +2744,42 @@ done: return ret; } +static void hci_update_scan_state(struct hci_dev *hdev, u8 scan) +{ + bool conn_changed, discov_changed; + + BT_DBG("%s scan 0x%02x", hdev->name, scan); + + if ((scan & SCAN_PAGE)) + conn_changed = !test_and_set_bit(HCI_CONNECTABLE, + &hdev->dev_flags); + else + conn_changed = test_and_clear_bit(HCI_CONNECTABLE, + &hdev->dev_flags); + + if ((scan & SCAN_INQUIRY)) { + discov_changed = !test_and_set_bit(HCI_DISCOVERABLE, + &hdev->dev_flags); + } else { + clear_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags); + discov_changed = test_and_clear_bit(HCI_DISCOVERABLE, + &hdev->dev_flags); + } + + if (!test_bit(HCI_MGMT, &hdev->dev_flags)) + return; + + if (conn_changed || discov_changed) { + /* In case this was disabled through mgmt */ + set_bit(HCI_BREDR_ENABLED, &hdev->dev_flags); + + if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) + mgmt_update_adv_data(hdev); + + mgmt_new_settings(hdev); + } +} + int hci_dev_cmd(unsigned int cmd, void __user *arg) { struct hci_dev *hdev; @@ -2574,6 +2798,11 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) goto done; } + if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) { + err = -EOPNOTSUPP; + goto done; + } + if (hdev->dev_type != HCI_BREDR) { err = -EOPNOTSUPP; goto done; @@ -2611,6 +2840,12 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) case HCISETSCAN: err = hci_req_sync(hdev, hci_scan_req, dr.dev_opt, HCI_INIT_TIMEOUT); + + /* Ensure that the connectable and discoverable states + * get correctly modified as this was a non-mgmt change. + */ + if (!err) + hci_update_scan_state(hdev, dr.dev_opt); break; case HCISETLINKPOL: @@ -2671,14 +2906,17 @@ int hci_get_dev_list(void __user *arg) read_lock(&hci_dev_list_lock); list_for_each_entry(hdev, &hci_dev_list, list) { - if (test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) - cancel_delayed_work(&hdev->power_off); + unsigned long flags = hdev->flags; - if (!test_bit(HCI_MGMT, &hdev->dev_flags)) - set_bit(HCI_PAIRABLE, &hdev->dev_flags); + /* When the auto-off is configured it means the transport + * is running, but in that case still indicate that the + * device is actually down. + */ + if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) + flags &= ~BIT(HCI_UP); (dr + n)->dev_id = hdev->id; - (dr + n)->dev_opt = hdev->flags; + (dr + n)->dev_opt = flags; if (++n >= dev_num) break; @@ -2698,6 +2936,7 @@ int hci_get_dev_info(void __user *arg) { struct hci_dev *hdev; struct hci_dev_info di; + unsigned long flags; int err = 0; if (copy_from_user(&di, arg, sizeof(di))) @@ -2707,16 +2946,19 @@ int hci_get_dev_info(void __user *arg) if (!hdev) return -ENODEV; - if (test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) - cancel_delayed_work_sync(&hdev->power_off); - - if (!test_bit(HCI_MGMT, &hdev->dev_flags)) - set_bit(HCI_PAIRABLE, &hdev->dev_flags); + /* When the auto-off is configured it means the transport + * is running, but in that case still indicate that the + * device is actually down. + */ + if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) + flags = hdev->flags & ~BIT(HCI_UP); + else + flags = hdev->flags; strcpy(di.name, hdev->name); di.bdaddr = hdev->bdaddr; di.type = (hdev->bus & 0x0f) | ((hdev->dev_type & 0x03) << 4); - di.flags = hdev->flags; + di.flags = flags; di.pkt_type = hdev->pkt_type; if (lmp_bredr_capable(hdev)) { di.acl_mtu = hdev->acl_mtu; @@ -2756,7 +2998,8 @@ static int hci_rfkill_set_block(void *data, bool blocked) if (blocked) { set_bit(HCI_RFKILLED, &hdev->dev_flags); - if (!test_bit(HCI_SETUP, &hdev->dev_flags)) + if (!test_bit(HCI_SETUP, &hdev->dev_flags) && + !test_bit(HCI_CONFIG, &hdev->dev_flags)) hci_dev_do_close(hdev); } else { clear_bit(HCI_RFKILLED, &hdev->dev_flags); @@ -2787,6 +3030,7 @@ static void hci_power_on(struct work_struct *work) * valid, it is important to turn the device back off. */ if (test_bit(HCI_RFKILLED, &hdev->dev_flags) || + test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) || (hdev->dev_type == HCI_BREDR && !bacmp(&hdev->bdaddr, BDADDR_ANY) && !bacmp(&hdev->static_addr, BDADDR_ANY))) { @@ -2797,8 +3041,34 @@ static void hci_power_on(struct work_struct *work) HCI_AUTO_OFF_TIMEOUT); } - if (test_and_clear_bit(HCI_SETUP, &hdev->dev_flags)) + if (test_and_clear_bit(HCI_SETUP, &hdev->dev_flags)) { + /* For unconfigured devices, set the HCI_RAW flag + * so that userspace can easily identify them. + */ + if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) + set_bit(HCI_RAW, &hdev->flags); + + /* For fully configured devices, this will send + * the Index Added event. For unconfigured devices, + * it will send Unconfigued Index Added event. + * + * Devices with HCI_QUIRK_RAW_DEVICE are ignored + * and no event will be send. + */ + mgmt_index_added(hdev); + } else if (test_and_clear_bit(HCI_CONFIG, &hdev->dev_flags)) { + /* When the controller is now configured, then it + * is important to clear the HCI_RAW flag. + */ + if (!test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) + clear_bit(HCI_RAW, &hdev->flags); + + /* Powering on the controller with HCI_CONFIG set only + * happens with the transition from unconfigured to + * configured. This will send the Index Added event. + */ mgmt_index_added(hdev); + } } static void hci_power_off(struct work_struct *work) @@ -2913,16 +3183,16 @@ static bool hci_persistent_key(struct hci_dev *hdev, struct hci_conn *conn, return false; } -static bool ltk_type_master(u8 type) +static u8 ltk_role(u8 type) { - if (type == HCI_SMP_STK || type == HCI_SMP_LTK) - return true; + if (type == SMP_LTK) + return HCI_ROLE_MASTER; - return false; + return HCI_ROLE_SLAVE; } struct smp_ltk *hci_find_ltk(struct hci_dev *hdev, __le16 ediv, __le64 rand, - bool master) + u8 role) { struct smp_ltk *k; @@ -2930,7 +3200,7 @@ struct smp_ltk *hci_find_ltk(struct hci_dev *hdev, __le16 ediv, __le64 rand, if (k->ediv != ediv || k->rand != rand) continue; - if (ltk_type_master(k->type) != master) + if (ltk_role(k->type) != role) continue; return k; @@ -2940,14 +3210,14 @@ struct smp_ltk *hci_find_ltk(struct hci_dev *hdev, __le16 ediv, __le64 rand, } struct smp_ltk *hci_find_ltk_by_addr(struct hci_dev *hdev, bdaddr_t *bdaddr, - u8 addr_type, bool master) + u8 addr_type, u8 role) { struct smp_ltk *k; list_for_each_entry(k, &hdev->long_term_keys, list) if (addr_type == k->bdaddr_type && bacmp(bdaddr, &k->bdaddr) == 0 && - ltk_type_master(k->type) == master) + ltk_role(k->type) == role) return k; return NULL; @@ -2990,12 +3260,12 @@ struct smp_irk *hci_find_irk_by_addr(struct hci_dev *hdev, bdaddr_t *bdaddr, return NULL; } -int hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn, int new_key, - bdaddr_t *bdaddr, u8 *val, u8 type, u8 pin_len) +struct link_key *hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn, + bdaddr_t *bdaddr, u8 *val, u8 type, + u8 pin_len, bool *persistent) { struct link_key *key, *old_key; u8 old_key_type; - bool persistent; old_key = hci_find_link_key(hdev, bdaddr); if (old_key) { @@ -3005,7 +3275,7 @@ int hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn, int new_key, old_key_type = conn ? conn->key_type : 0xff; key = kzalloc(sizeof(*key), GFP_KERNEL); if (!key) - return -ENOMEM; + return NULL; list_add(&key->list, &hdev->link_keys); } @@ -3030,17 +3300,11 @@ int hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn, int new_key, else key->type = type; - if (!new_key) - return 0; - - persistent = hci_persistent_key(hdev, conn, type, old_key_type); - - mgmt_new_link_key(hdev, key, persistent); + if (persistent) + *persistent = hci_persistent_key(hdev, conn, type, + old_key_type); - if (conn) - conn->flush_key = !persistent; - - return 0; + return key; } struct smp_ltk *hci_add_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr, @@ -3048,9 +3312,9 @@ struct smp_ltk *hci_add_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 tk[16], u8 enc_size, __le16 ediv, __le64 rand) { struct smp_ltk *key, *old_key; - bool master = ltk_type_master(type); + u8 role = ltk_role(type); - old_key = hci_find_ltk_by_addr(hdev, bdaddr, addr_type, master); + old_key = hci_find_ltk_by_addr(hdev, bdaddr, addr_type, role); if (old_key) key = old_key; else { @@ -3146,9 +3410,10 @@ void hci_remove_irk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type) } /* HCI command timer function */ -static void hci_cmd_timeout(unsigned long arg) +static void hci_cmd_timeout(struct work_struct *work) { - struct hci_dev *hdev = (void *) arg; + struct hci_dev *hdev = container_of(work, struct hci_dev, + cmd_timer.work); if (hdev->sent_cmd) { struct hci_command_hdr *sent = (void *) hdev->sent_cmd->data; @@ -3254,12 +3519,12 @@ int hci_add_remote_oob_ext_data(struct hci_dev *hdev, bdaddr_t *bdaddr, return 0; } -struct bdaddr_list *hci_blacklist_lookup(struct hci_dev *hdev, +struct bdaddr_list *hci_bdaddr_list_lookup(struct list_head *bdaddr_list, bdaddr_t *bdaddr, u8 type) { struct bdaddr_list *b; - list_for_each_entry(b, &hdev->blacklist, list) { + list_for_each_entry(b, bdaddr_list, list) { if (!bacmp(&b->bdaddr, bdaddr) && b->bdaddr_type == type) return b; } @@ -3267,11 +3532,11 @@ struct bdaddr_list *hci_blacklist_lookup(struct hci_dev *hdev, return NULL; } -static void hci_blacklist_clear(struct hci_dev *hdev) +void hci_bdaddr_list_clear(struct list_head *bdaddr_list) { struct list_head *p, *n; - list_for_each_safe(p, n, &hdev->blacklist) { + list_for_each_safe(p, n, bdaddr_list) { struct bdaddr_list *b = list_entry(p, struct bdaddr_list, list); list_del(p); @@ -3279,99 +3544,38 @@ static void hci_blacklist_clear(struct hci_dev *hdev) } } -int hci_blacklist_add(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type) +int hci_bdaddr_list_add(struct list_head *list, bdaddr_t *bdaddr, u8 type) { struct bdaddr_list *entry; if (!bacmp(bdaddr, BDADDR_ANY)) return -EBADF; - if (hci_blacklist_lookup(hdev, bdaddr, type)) + if (hci_bdaddr_list_lookup(list, bdaddr, type)) return -EEXIST; - entry = kzalloc(sizeof(struct bdaddr_list), GFP_KERNEL); + entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return -ENOMEM; bacpy(&entry->bdaddr, bdaddr); entry->bdaddr_type = type; - list_add(&entry->list, &hdev->blacklist); + list_add(&entry->list, list); - return mgmt_device_blocked(hdev, bdaddr, type); + return 0; } -int hci_blacklist_del(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type) +int hci_bdaddr_list_del(struct list_head *list, bdaddr_t *bdaddr, u8 type) { struct bdaddr_list *entry; if (!bacmp(bdaddr, BDADDR_ANY)) { - hci_blacklist_clear(hdev); + hci_bdaddr_list_clear(list); return 0; } - entry = hci_blacklist_lookup(hdev, bdaddr, type); - if (!entry) - return -ENOENT; - - list_del(&entry->list); - kfree(entry); - - return mgmt_device_unblocked(hdev, bdaddr, type); -} - -struct bdaddr_list *hci_white_list_lookup(struct hci_dev *hdev, - bdaddr_t *bdaddr, u8 type) -{ - struct bdaddr_list *b; - - list_for_each_entry(b, &hdev->le_white_list, list) { - if (!bacmp(&b->bdaddr, bdaddr) && b->bdaddr_type == type) - return b; - } - - return NULL; -} - -void hci_white_list_clear(struct hci_dev *hdev) -{ - struct list_head *p, *n; - - list_for_each_safe(p, n, &hdev->le_white_list) { - struct bdaddr_list *b = list_entry(p, struct bdaddr_list, list); - - list_del(p); - kfree(b); - } -} - -int hci_white_list_add(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type) -{ - struct bdaddr_list *entry; - - if (!bacmp(bdaddr, BDADDR_ANY)) - return -EBADF; - - entry = kzalloc(sizeof(struct bdaddr_list), GFP_KERNEL); - if (!entry) - return -ENOMEM; - - bacpy(&entry->bdaddr, bdaddr); - entry->bdaddr_type = type; - - list_add(&entry->list, &hdev->le_white_list); - - return 0; -} - -int hci_white_list_del(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type) -{ - struct bdaddr_list *entry; - - if (!bacmp(bdaddr, BDADDR_ANY)) - return -EBADF; - - entry = hci_white_list_lookup(hdev, bdaddr, type); + entry = hci_bdaddr_list_lookup(list, bdaddr, type); if (!entry) return -ENOENT; @@ -3387,6 +3591,10 @@ struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev, { struct hci_conn_params *params; + /* The conn params list only contains identity addresses */ + if (!hci_is_identity_address(addr, addr_type)) + return NULL; + list_for_each_entry(params, &hdev->le_conn_params, list) { if (bacmp(¶ms->addr, addr) == 0 && params->addr_type == addr_type) { @@ -3414,62 +3622,98 @@ static bool is_connected(struct hci_dev *hdev, bdaddr_t *addr, u8 type) return true; } -static bool is_identity_address(bdaddr_t *addr, u8 addr_type) +/* This function requires the caller holds hdev->lock */ +struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list, + bdaddr_t *addr, u8 addr_type) { - if (addr_type == ADDR_LE_DEV_PUBLIC) - return true; + struct hci_conn_params *param; - /* Check for Random Static address type */ - if ((addr->b[5] & 0xc0) == 0xc0) - return true; + /* The list only contains identity addresses */ + if (!hci_is_identity_address(addr, addr_type)) + return NULL; - return false; + list_for_each_entry(param, list, action) { + if (bacmp(¶m->addr, addr) == 0 && + param->addr_type == addr_type) + return param; + } + + return NULL; } /* This function requires the caller holds hdev->lock */ -int hci_conn_params_add(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type, - u8 auto_connect, u16 conn_min_interval, - u16 conn_max_interval) +struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev, + bdaddr_t *addr, u8 addr_type) { struct hci_conn_params *params; - if (!is_identity_address(addr, addr_type)) - return -EINVAL; + if (!hci_is_identity_address(addr, addr_type)) + return NULL; params = hci_conn_params_lookup(hdev, addr, addr_type); if (params) - goto update; + return params; params = kzalloc(sizeof(*params), GFP_KERNEL); if (!params) { BT_ERR("Out of memory"); - return -ENOMEM; + return NULL; } bacpy(¶ms->addr, addr); params->addr_type = addr_type; list_add(¶ms->list, &hdev->le_conn_params); + INIT_LIST_HEAD(¶ms->action); -update: - params->conn_min_interval = conn_min_interval; - params->conn_max_interval = conn_max_interval; - params->auto_connect = auto_connect; + params->conn_min_interval = hdev->le_conn_min_interval; + params->conn_max_interval = hdev->le_conn_max_interval; + params->conn_latency = hdev->le_conn_latency; + params->supervision_timeout = hdev->le_supv_timeout; + params->auto_connect = HCI_AUTO_CONN_DISABLED; + + BT_DBG("addr %pMR (type %u)", addr, addr_type); + + return params; +} + +/* This function requires the caller holds hdev->lock */ +int hci_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type, + u8 auto_connect) +{ + struct hci_conn_params *params; + + params = hci_conn_params_add(hdev, addr, addr_type); + if (!params) + return -EIO; + + if (params->auto_connect == auto_connect) + return 0; + + list_del_init(¶ms->action); switch (auto_connect) { case HCI_AUTO_CONN_DISABLED: case HCI_AUTO_CONN_LINK_LOSS: - hci_pend_le_conn_del(hdev, addr, addr_type); + hci_update_background_scan(hdev); + break; + case HCI_AUTO_CONN_REPORT: + list_add(¶ms->action, &hdev->pend_le_reports); + hci_update_background_scan(hdev); break; + case HCI_AUTO_CONN_DIRECT: case HCI_AUTO_CONN_ALWAYS: - if (!is_connected(hdev, addr, addr_type)) - hci_pend_le_conn_add(hdev, addr, addr_type); + if (!is_connected(hdev, addr, addr_type)) { + list_add(¶ms->action, &hdev->pend_le_conns); + hci_update_background_scan(hdev); + } break; } - BT_DBG("addr %pMR (type %u) auto_connect %u conn_min_interval 0x%.4x " - "conn_max_interval 0x%.4x", addr, addr_type, auto_connect, - conn_min_interval, conn_max_interval); + params->auto_connect = auto_connect; + + BT_DBG("addr %pMR (type %u) auto_connect %u", addr, addr_type, + auto_connect); return 0; } @@ -3483,97 +3727,44 @@ void hci_conn_params_del(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type) if (!params) return; - hci_pend_le_conn_del(hdev, addr, addr_type); - + list_del(¶ms->action); list_del(¶ms->list); kfree(params); + hci_update_background_scan(hdev); + BT_DBG("addr %pMR (type %u)", addr, addr_type); } /* This function requires the caller holds hdev->lock */ -void hci_conn_params_clear(struct hci_dev *hdev) +void hci_conn_params_clear_disabled(struct hci_dev *hdev) { struct hci_conn_params *params, *tmp; list_for_each_entry_safe(params, tmp, &hdev->le_conn_params, list) { + if (params->auto_connect != HCI_AUTO_CONN_DISABLED) + continue; list_del(¶ms->list); kfree(params); } - BT_DBG("All LE connection parameters were removed"); -} - -/* This function requires the caller holds hdev->lock */ -struct bdaddr_list *hci_pend_le_conn_lookup(struct hci_dev *hdev, - bdaddr_t *addr, u8 addr_type) -{ - struct bdaddr_list *entry; - - list_for_each_entry(entry, &hdev->pend_le_conns, list) { - if (bacmp(&entry->bdaddr, addr) == 0 && - entry->bdaddr_type == addr_type) - return entry; - } - - return NULL; + BT_DBG("All LE disabled connection parameters were removed"); } /* This function requires the caller holds hdev->lock */ -void hci_pend_le_conn_add(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type) +void hci_conn_params_clear_all(struct hci_dev *hdev) { - struct bdaddr_list *entry; - - entry = hci_pend_le_conn_lookup(hdev, addr, addr_type); - if (entry) - goto done; + struct hci_conn_params *params, *tmp; - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) { - BT_ERR("Out of memory"); - return; + list_for_each_entry_safe(params, tmp, &hdev->le_conn_params, list) { + list_del(¶ms->action); + list_del(¶ms->list); + kfree(params); } - bacpy(&entry->bdaddr, addr); - entry->bdaddr_type = addr_type; - - list_add(&entry->list, &hdev->pend_le_conns); - - BT_DBG("addr %pMR (type %u)", addr, addr_type); - -done: - hci_update_background_scan(hdev); -} - -/* This function requires the caller holds hdev->lock */ -void hci_pend_le_conn_del(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type) -{ - struct bdaddr_list *entry; - - entry = hci_pend_le_conn_lookup(hdev, addr, addr_type); - if (!entry) - goto done; - - list_del(&entry->list); - kfree(entry); - - BT_DBG("addr %pMR (type %u)", addr, addr_type); - -done: hci_update_background_scan(hdev); -} -/* This function requires the caller holds hdev->lock */ -void hci_pend_le_conns_clear(struct hci_dev *hdev) -{ - struct bdaddr_list *entry, *tmp; - - list_for_each_entry_safe(entry, tmp, &hdev->pend_le_conns, list) { - list_del(&entry->list); - kfree(entry); - } - - BT_DBG("All LE pending connections cleared"); + BT_DBG("All LE connection parameters were removed"); } static void inquiry_complete(struct hci_dev *hdev, u8 status) @@ -3663,7 +3854,7 @@ static void set_random_addr(struct hci_request *req, bdaddr_t *rpa) * In this kind of scenario skip the update and let the random * address be updated at the next cycle. */ - if (test_bit(HCI_ADVERTISING, &hdev->dev_flags) || + if (test_bit(HCI_LE_ADV, &hdev->dev_flags) || hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT)) { BT_DBG("Deferring random address update"); return; @@ -3725,7 +3916,7 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy, * the HCI command if the current random address is already the * static one. */ - if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dev_flags) || + if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) || !bacmp(&hdev->bdaddr, BDADDR_ANY)) { *own_addr_type = ADDR_LE_DEV_RANDOM; if (bacmp(&hdev->static_addr, &hdev->random_addr)) @@ -3754,7 +3945,7 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy, void hci_copy_identity_address(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 *bdaddr_type) { - if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dev_flags) || + if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) || !bacmp(&hdev->bdaddr, BDADDR_ANY)) { bacpy(bdaddr, &hdev->static_addr); *bdaddr_type = ADDR_LE_DEV_RANDOM; @@ -3769,7 +3960,7 @@ struct hci_dev *hci_alloc_dev(void) { struct hci_dev *hdev; - hdev = kzalloc(sizeof(struct hci_dev), GFP_KERNEL); + hdev = kzalloc(sizeof(*hdev), GFP_KERNEL); if (!hdev) return NULL; @@ -3778,6 +3969,7 @@ struct hci_dev *hci_alloc_dev(void) hdev->link_mode = (HCI_LM_ACCEPT); hdev->num_iac = 0x01; /* One IAC support is mandatory */ hdev->io_capability = 0x03; /* No Input No Output */ + hdev->manufacturer = 0xffff; /* Default to internal use */ hdev->inq_tx_power = HCI_TX_POWER_INVALID; hdev->adv_tx_power = HCI_TX_POWER_INVALID; @@ -3785,18 +3977,26 @@ struct hci_dev *hci_alloc_dev(void) hdev->sniff_min_interval = 80; hdev->le_adv_channel_map = 0x07; + hdev->le_adv_min_interval = 0x0800; + hdev->le_adv_max_interval = 0x0800; hdev->le_scan_interval = 0x0060; hdev->le_scan_window = 0x0030; hdev->le_conn_min_interval = 0x0028; hdev->le_conn_max_interval = 0x0038; + hdev->le_conn_latency = 0x0000; + hdev->le_supv_timeout = 0x002a; hdev->rpa_timeout = HCI_DEFAULT_RPA_TIMEOUT; + hdev->discov_interleaved_timeout = DISCOV_INTERLEAVED_TIMEOUT; + hdev->conn_info_min_age = DEFAULT_CONN_INFO_MIN_AGE; + hdev->conn_info_max_age = DEFAULT_CONN_INFO_MAX_AGE; mutex_init(&hdev->lock); mutex_init(&hdev->req_lock); INIT_LIST_HEAD(&hdev->mgmt_pending); INIT_LIST_HEAD(&hdev->blacklist); + INIT_LIST_HEAD(&hdev->whitelist); INIT_LIST_HEAD(&hdev->uuids); INIT_LIST_HEAD(&hdev->link_keys); INIT_LIST_HEAD(&hdev->long_term_keys); @@ -3805,6 +4005,7 @@ struct hci_dev *hci_alloc_dev(void) INIT_LIST_HEAD(&hdev->le_white_list); INIT_LIST_HEAD(&hdev->le_conn_params); INIT_LIST_HEAD(&hdev->pend_le_conns); + INIT_LIST_HEAD(&hdev->pend_le_reports); INIT_LIST_HEAD(&hdev->conn_hash.list); INIT_WORK(&hdev->rx_work, hci_rx_work); @@ -3822,7 +4023,7 @@ struct hci_dev *hci_alloc_dev(void) init_waitqueue_head(&hdev->req_wait_q); - setup_timer(&hdev->cmd_timer, hci_cmd_timeout, (unsigned long) hdev); + INIT_DELAYED_WORK(&hdev->cmd_timer, hci_cmd_timeout); hci_init_sysfs(hdev); discovery_init(hdev); @@ -3844,7 +4045,7 @@ int hci_register_dev(struct hci_dev *hdev) { int id, error; - if (!hdev->open || !hdev->close) + if (!hdev->open || !hdev->close || !hdev->send) return -EINVAL; /* Do not allow HCI_AMP devices to register at index 0, @@ -3929,6 +4130,12 @@ int hci_register_dev(struct hci_dev *hdev) list_add(&hdev->list, &hci_dev_list); write_unlock(&hci_dev_list_lock); + /* Devices that are marked for raw-only usage are unconfigured + * and should not be included in normal operation. + */ + if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) + set_bit(HCI_UNCONFIGURED, &hdev->dev_flags); + hci_notify(hdev, HCI_DEV_REG); hci_dev_hold(hdev); @@ -3971,7 +4178,8 @@ void hci_unregister_dev(struct hci_dev *hdev) cancel_work_sync(&hdev->power_on); if (!test_bit(HCI_INIT, &hdev->flags) && - !test_bit(HCI_SETUP, &hdev->dev_flags)) { + !test_bit(HCI_SETUP, &hdev->dev_flags) && + !test_bit(HCI_CONFIG, &hdev->dev_flags)) { hci_dev_lock(hdev); mgmt_index_removed(hdev); hci_dev_unlock(hdev); @@ -3999,15 +4207,15 @@ void hci_unregister_dev(struct hci_dev *hdev) destroy_workqueue(hdev->req_workqueue); hci_dev_lock(hdev); - hci_blacklist_clear(hdev); + hci_bdaddr_list_clear(&hdev->blacklist); + hci_bdaddr_list_clear(&hdev->whitelist); hci_uuids_clear(hdev); hci_link_keys_clear(hdev); hci_smp_ltks_clear(hdev); hci_smp_irks_clear(hdev); hci_remote_oob_data_clear(hdev); - hci_white_list_clear(hdev); - hci_conn_params_clear(hdev); - hci_pend_le_conns_clear(hdev); + hci_bdaddr_list_clear(&hdev->le_white_list); + hci_conn_params_clear_all(hdev); hci_dev_unlock(hdev); hci_dev_put(hdev); @@ -4245,6 +4453,8 @@ EXPORT_SYMBOL(hci_unregister_cb); static void hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb) { + int err; + BT_DBG("%s type %d len %d", hdev->name, bt_cb(skb)->pkt_type, skb->len); /* Time stamp */ @@ -4261,8 +4471,11 @@ static void hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb) /* Get rid of skb owner, prior to sending to the driver. */ skb_orphan(skb); - if (hdev->send(hdev, skb) < 0) - BT_ERR("%s sending frame failed", hdev->name); + err = hdev->send(hdev, skb); + if (err < 0) { + BT_ERR("%s sending frame failed (%d)", hdev->name, err); + kfree_skb(skb); + } } void hci_req_init(struct hci_request *req, struct hci_dev *hdev) @@ -4304,6 +4517,11 @@ int hci_req_run(struct hci_request *req, hci_req_complete_t complete) return 0; } +bool hci_req_pending(struct hci_dev *hdev) +{ + return (hdev->req_status == HCI_REQ_PEND); +} + static struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param) { @@ -4736,7 +4954,7 @@ static inline int __get_blocks(struct hci_dev *hdev, struct sk_buff *skb) static void __check_timeout(struct hci_dev *hdev, unsigned int cnt) { - if (!test_bit(HCI_RAW, &hdev->flags)) { + if (!test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) { /* ACL tx timeout must be longer than maximum * link supervision timeout (40.9 seconds) */ if (!cnt && time_after(jiffies, hdev->acl_last_tx + @@ -4919,7 +5137,7 @@ static void hci_sched_le(struct hci_dev *hdev) if (!hci_conn_num(hdev, LE_LINK)) return; - if (!test_bit(HCI_RAW, &hdev->flags)) { + if (!test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) { /* LE tx timeout must be longer than maximum * link supervision timeout (40.9 seconds) */ if (!hdev->le_cnt && hdev->le_pkts && @@ -5164,8 +5382,7 @@ static void hci_rx_work(struct work_struct *work) hci_send_to_sock(hdev, skb); } - if (test_bit(HCI_RAW, &hdev->flags) || - test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { + if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { kfree_skb(skb); continue; } @@ -5225,10 +5442,10 @@ static void hci_cmd_work(struct work_struct *work) atomic_dec(&hdev->cmd_cnt); hci_send_frame(hdev, skb); if (test_bit(HCI_RESET, &hdev->flags)) - del_timer(&hdev->cmd_timer); + cancel_delayed_work(&hdev->cmd_timer); else - mod_timer(&hdev->cmd_timer, - jiffies + HCI_CMD_TIMEOUT); + schedule_delayed_work(&hdev->cmd_timer, + HCI_CMD_TIMEOUT); } else { skb_queue_head(&hdev->cmd_q, skb); queue_work(hdev->workqueue, &hdev->cmd_work); @@ -5245,26 +5462,135 @@ void hci_req_add_le_scan_disable(struct hci_request *req) hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); } +static void add_to_white_list(struct hci_request *req, + struct hci_conn_params *params) +{ + struct hci_cp_le_add_to_white_list cp; + + cp.bdaddr_type = params->addr_type; + bacpy(&cp.bdaddr, ¶ms->addr); + + hci_req_add(req, HCI_OP_LE_ADD_TO_WHITE_LIST, sizeof(cp), &cp); +} + +static u8 update_white_list(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + struct hci_conn_params *params; + struct bdaddr_list *b; + uint8_t white_list_entries = 0; + + /* Go through the current white list programmed into the + * controller one by one and check if that address is still + * in the list of pending connections or list of devices to + * report. If not present in either list, then queue the + * command to remove it from the controller. + */ + list_for_each_entry(b, &hdev->le_white_list, list) { + struct hci_cp_le_del_from_white_list cp; + + if (hci_pend_le_action_lookup(&hdev->pend_le_conns, + &b->bdaddr, b->bdaddr_type) || + hci_pend_le_action_lookup(&hdev->pend_le_reports, + &b->bdaddr, b->bdaddr_type)) { + white_list_entries++; + continue; + } + + cp.bdaddr_type = b->bdaddr_type; + bacpy(&cp.bdaddr, &b->bdaddr); + + hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST, + sizeof(cp), &cp); + } + + /* Since all no longer valid white list entries have been + * removed, walk through the list of pending connections + * and ensure that any new device gets programmed into + * the controller. + * + * If the list of the devices is larger than the list of + * available white list entries in the controller, then + * just abort and return filer policy value to not use the + * white list. + */ + list_for_each_entry(params, &hdev->pend_le_conns, action) { + if (hci_bdaddr_list_lookup(&hdev->le_white_list, + ¶ms->addr, params->addr_type)) + continue; + + if (white_list_entries >= hdev->le_white_list_size) { + /* Select filter policy to accept all advertising */ + return 0x00; + } + + if (hci_find_irk_by_addr(hdev, ¶ms->addr, + params->addr_type)) { + /* White list can not be used with RPAs */ + return 0x00; + } + + white_list_entries++; + add_to_white_list(req, params); + } + + /* After adding all new pending connections, walk through + * the list of pending reports and also add these to the + * white list if there is still space. + */ + list_for_each_entry(params, &hdev->pend_le_reports, action) { + if (hci_bdaddr_list_lookup(&hdev->le_white_list, + ¶ms->addr, params->addr_type)) + continue; + + if (white_list_entries >= hdev->le_white_list_size) { + /* Select filter policy to accept all advertising */ + return 0x00; + } + + if (hci_find_irk_by_addr(hdev, ¶ms->addr, + params->addr_type)) { + /* White list can not be used with RPAs */ + return 0x00; + } + + white_list_entries++; + add_to_white_list(req, params); + } + + /* Select filter policy to use white list */ + return 0x01; +} + void hci_req_add_le_passive_scan(struct hci_request *req) { struct hci_cp_le_set_scan_param param_cp; struct hci_cp_le_set_scan_enable enable_cp; struct hci_dev *hdev = req->hdev; u8 own_addr_type; + u8 filter_policy; - /* Set require_privacy to true to avoid identification from - * unknown peer devices. Since this is passive scanning, no - * SCAN_REQ using the local identity should be sent. Mandating - * privacy is just an extra precaution. + /* Set require_privacy to false since no SCAN_REQ are send + * during passive scanning. Not using an unresolvable address + * here is important so that peer devices using direct + * advertising with our address will be correctly reported + * by the controller. */ - if (hci_update_random_address(req, true, &own_addr_type)) + if (hci_update_random_address(req, false, &own_addr_type)) return; + /* Adding or removing entries from the white list must + * happen before enabling scanning. The controller does + * not allow white list modification while scanning. + */ + filter_policy = update_white_list(req); + memset(¶m_cp, 0, sizeof(param_cp)); param_cp.type = LE_SCAN_PASSIVE; param_cp.interval = cpu_to_le16(hdev->le_scan_interval); param_cp.window = cpu_to_le16(hdev->le_scan_window); param_cp.own_address_type = own_addr_type; + param_cp.filter_policy = filter_policy; hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp), ¶m_cp); @@ -5294,11 +5620,29 @@ void hci_update_background_scan(struct hci_dev *hdev) struct hci_conn *conn; int err; + if (!test_bit(HCI_UP, &hdev->flags) || + test_bit(HCI_INIT, &hdev->flags) || + test_bit(HCI_SETUP, &hdev->dev_flags) || + test_bit(HCI_CONFIG, &hdev->dev_flags) || + test_bit(HCI_AUTO_OFF, &hdev->dev_flags) || + test_bit(HCI_UNREGISTER, &hdev->dev_flags)) + return; + + /* No point in doing scanning if LE support hasn't been enabled */ + if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) + return; + + /* If discovery is active don't interfere with it */ + if (hdev->discovery.state != DISCOVERY_STOPPED) + return; + hci_req_init(&req, hdev); - if (list_empty(&hdev->pend_le_conns)) { - /* If there is no pending LE connections, we should stop - * the background scanning. + if (list_empty(&hdev->pend_le_conns) && + list_empty(&hdev->pend_le_reports)) { + /* If there is no pending LE connections or devices + * to be scanned for, we should stop the background + * scanning. */ /* If controller is not scanning we are done. */ diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 49774912cb01..be35598984d9 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -32,6 +32,7 @@ #include "a2mp.h" #include "amp.h" +#include "smp.h" /* Handle HCI Event packets */ @@ -45,9 +46,13 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb) return; clear_bit(HCI_INQUIRY, &hdev->flags); - smp_mb__after_clear_bit(); /* wake_up_bit advises about this barrier */ + smp_mb__after_atomic(); /* wake_up_bit advises about this barrier */ wake_up_bit(&hdev->flags, HCI_INQUIRY); + hci_dev_lock(hdev); + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + hci_dev_unlock(hdev); + hci_conn_check_pending(hdev); } @@ -96,12 +101,8 @@ static void hci_cc_role_discovery(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_lock(hdev); conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle)); - if (conn) { - if (rp->role) - conn->link_mode &= ~HCI_LM_MASTER; - else - conn->link_mode |= HCI_LM_MASTER; - } + if (conn) + conn->role = rp->role; hci_dev_unlock(hdev); } @@ -170,12 +171,14 @@ static void hci_cc_write_def_link_policy(struct hci_dev *hdev, BT_DBG("%s status 0x%2.2x", hdev->name, status); + if (status) + return; + sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_DEF_LINK_POLICY); if (!sent) return; - if (!status) - hdev->link_policy = get_unaligned_le16(sent); + hdev->link_policy = get_unaligned_le16(sent); } static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) @@ -265,28 +268,30 @@ static void hci_cc_write_auth_enable(struct hci_dev *hdev, struct sk_buff *skb) static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb) { __u8 status = *((__u8 *) skb->data); + __u8 param; void *sent; BT_DBG("%s status 0x%2.2x", hdev->name, status); + if (status) + return; + sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_ENCRYPT_MODE); if (!sent) return; - if (!status) { - __u8 param = *((__u8 *) sent); + param = *((__u8 *) sent); - if (param) - set_bit(HCI_ENCRYPT, &hdev->flags); - else - clear_bit(HCI_ENCRYPT, &hdev->flags); - } + if (param) + set_bit(HCI_ENCRYPT, &hdev->flags); + else + clear_bit(HCI_ENCRYPT, &hdev->flags); } static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) { - __u8 param, status = *((__u8 *) skb->data); - int old_pscan, old_iscan; + __u8 status = *((__u8 *) skb->data); + __u8 param; void *sent; BT_DBG("%s status 0x%2.2x", hdev->name, status); @@ -300,32 +305,19 @@ static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_lock(hdev); if (status) { - mgmt_write_scan_failed(hdev, param, status); hdev->discov_timeout = 0; goto done; } - /* We need to ensure that we set this back on if someone changed - * the scan mode through a raw HCI socket. - */ - set_bit(HCI_BREDR_ENABLED, &hdev->dev_flags); - - old_pscan = test_and_clear_bit(HCI_PSCAN, &hdev->flags); - old_iscan = test_and_clear_bit(HCI_ISCAN, &hdev->flags); - - if (param & SCAN_INQUIRY) { + if (param & SCAN_INQUIRY) set_bit(HCI_ISCAN, &hdev->flags); - if (!old_iscan) - mgmt_discoverable(hdev, 1); - } else if (old_iscan) - mgmt_discoverable(hdev, 0); + else + clear_bit(HCI_ISCAN, &hdev->flags); - if (param & SCAN_PAGE) { + if (param & SCAN_PAGE) set_bit(HCI_PSCAN, &hdev->flags); - if (!old_pscan) - mgmt_connectable(hdev, 1); - } else if (old_pscan) - mgmt_connectable(hdev, 0); + else + clear_bit(HCI_PSCAN, &hdev->flags); done: hci_dev_unlock(hdev); @@ -597,8 +589,10 @@ static void hci_cc_read_flow_control_mode(struct hci_dev *hdev, BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); - if (!rp->status) - hdev->flow_ctl_mode = rp->mode; + if (rp->status) + return; + + hdev->flow_ctl_mode = rp->mode; } static void hci_cc_read_buffer_size(struct hci_dev *hdev, struct sk_buff *skb) @@ -633,8 +627,14 @@ static void hci_cc_read_bd_addr(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); - if (!rp->status) + if (rp->status) + return; + + if (test_bit(HCI_INIT, &hdev->flags)) bacpy(&hdev->bdaddr, &rp->bdaddr); + + if (test_bit(HCI_SETUP, &hdev->dev_flags)) + bacpy(&hdev->setup_addr, &rp->bdaddr); } static void hci_cc_read_page_scan_activity(struct hci_dev *hdev, @@ -644,7 +644,10 @@ static void hci_cc_read_page_scan_activity(struct hci_dev *hdev, BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); - if (test_bit(HCI_INIT, &hdev->flags) && !rp->status) { + if (rp->status) + return; + + if (test_bit(HCI_INIT, &hdev->flags)) { hdev->page_scan_interval = __le16_to_cpu(rp->interval); hdev->page_scan_window = __le16_to_cpu(rp->window); } @@ -676,7 +679,10 @@ static void hci_cc_read_page_scan_type(struct hci_dev *hdev, BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); - if (test_bit(HCI_INIT, &hdev->flags) && !rp->status) + if (rp->status) + return; + + if (test_bit(HCI_INIT, &hdev->flags)) hdev->page_scan_type = rp->type; } @@ -716,6 +722,41 @@ static void hci_cc_read_data_block_size(struct hci_dev *hdev, hdev->block_cnt, hdev->block_len); } +static void hci_cc_read_clock(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_read_clock *rp = (void *) skb->data; + struct hci_cp_read_clock *cp; + struct hci_conn *conn; + + BT_DBG("%s", hdev->name); + + if (skb->len < sizeof(*rp)) + return; + + if (rp->status) + return; + + hci_dev_lock(hdev); + + cp = hci_sent_cmd_data(hdev, HCI_OP_READ_CLOCK); + if (!cp) + goto unlock; + + if (cp->which == 0x00) { + hdev->clock = le32_to_cpu(rp->clock); + goto unlock; + } + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle)); + if (conn) { + conn->clock = le32_to_cpu(rp->clock); + conn->clock_accuracy = le16_to_cpu(rp->accuracy); + } + +unlock: + hci_dev_unlock(hdev); +} + static void hci_cc_read_local_amp_info(struct hci_dev *hdev, struct sk_buff *skb) { @@ -785,8 +826,10 @@ static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev, BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); - if (!rp->status) - hdev->inq_tx_power = rp->tx_power; + if (rp->status) + return; + + hdev->inq_tx_power = rp->tx_power; } static void hci_cc_pin_code_reply(struct hci_dev *hdev, struct sk_buff *skb) @@ -857,8 +900,10 @@ static void hci_cc_le_read_local_features(struct hci_dev *hdev, BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); - if (!rp->status) - memcpy(hdev->le_features, rp->features, 8); + if (rp->status) + return; + + memcpy(hdev->le_features, rp->features, 8); } static void hci_cc_le_read_adv_tx_power(struct hci_dev *hdev, @@ -868,8 +913,10 @@ static void hci_cc_le_read_adv_tx_power(struct hci_dev *hdev, BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); - if (!rp->status) - hdev->adv_tx_power = rp->tx_power; + if (rp->status) + return; + + hdev->adv_tx_power = rp->tx_power; } static void hci_cc_user_confirm_reply(struct hci_dev *hdev, struct sk_buff *skb) @@ -969,14 +1016,16 @@ static void hci_cc_le_set_random_addr(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%2.2x", hdev->name, status); + if (status) + return; + sent = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_RANDOM_ADDR); if (!sent) return; hci_dev_lock(hdev); - if (!status) - bacpy(&hdev->random_addr, sent); + bacpy(&hdev->random_addr, sent); hci_dev_unlock(hdev); } @@ -987,14 +1036,31 @@ static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%2.2x", hdev->name, status); + if (status) + return; + sent = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_ADV_ENABLE); if (!sent) return; hci_dev_lock(hdev); - if (!status) - mgmt_advertising(hdev, *sent); + /* If we're doing connection initation as peripheral. Set a + * timeout in case something goes wrong. + */ + if (*sent) { + struct hci_conn *conn; + + set_bit(HCI_LE_ADV, &hdev->dev_flags); + + conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); + if (conn) + queue_delayed_work(hdev->workqueue, + &conn->le_conn_timeout, + conn->conn_timeout); + } else { + clear_bit(HCI_LE_ADV, &hdev->dev_flags); + } hci_dev_unlock(hdev); } @@ -1006,18 +1072,49 @@ static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%2.2x", hdev->name, status); + if (status) + return; + cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_SCAN_PARAM); if (!cp) return; hci_dev_lock(hdev); - if (!status) - hdev->le_scan_type = cp->type; + hdev->le_scan_type = cp->type; hci_dev_unlock(hdev); } +static bool has_pending_adv_report(struct hci_dev *hdev) +{ + struct discovery_state *d = &hdev->discovery; + + return bacmp(&d->last_adv_addr, BDADDR_ANY); +} + +static void clear_pending_adv_report(struct hci_dev *hdev) +{ + struct discovery_state *d = &hdev->discovery; + + bacpy(&d->last_adv_addr, BDADDR_ANY); + d->last_adv_data_len = 0; +} + +static void store_pending_adv_report(struct hci_dev *hdev, bdaddr_t *bdaddr, + u8 bdaddr_type, s8 rssi, u32 flags, + u8 *data, u8 len) +{ + struct discovery_state *d = &hdev->discovery; + + bacpy(&d->last_adv_addr, bdaddr); + d->last_adv_addr_type = bdaddr_type; + d->last_adv_rssi = rssi; + d->last_adv_flags = flags; + memcpy(d->last_adv_data, data, len); + d->last_adv_data_len = len; +} + static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) { @@ -1026,32 +1123,56 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, BT_DBG("%s status 0x%2.2x", hdev->name, status); - cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_SCAN_ENABLE); - if (!cp) + if (status) return; - if (status) + cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_SCAN_ENABLE); + if (!cp) return; switch (cp->enable) { case LE_SCAN_ENABLE: set_bit(HCI_LE_SCAN, &hdev->dev_flags); + if (hdev->le_scan_type == LE_SCAN_ACTIVE) + clear_pending_adv_report(hdev); break; case LE_SCAN_DISABLE: + /* We do this here instead of when setting DISCOVERY_STOPPED + * since the latter would potentially require waiting for + * inquiry to stop too. + */ + if (has_pending_adv_report(hdev)) { + struct discovery_state *d = &hdev->discovery; + + mgmt_device_found(hdev, &d->last_adv_addr, LE_LINK, + d->last_adv_addr_type, NULL, + d->last_adv_rssi, d->last_adv_flags, + d->last_adv_data, + d->last_adv_data_len, NULL, 0); + } + /* Cancel this timer so that we don't try to disable scanning * when it's already disabled. */ cancel_delayed_work(&hdev->le_scan_disable); clear_bit(HCI_LE_SCAN, &hdev->dev_flags); + /* The HCI_LE_SCAN_INTERRUPTED flag indicates that we * interrupted scanning due to a connect request. Mark - * therefore discovery as stopped. + * therefore discovery as stopped. If this was not + * because of a connect request advertising might have + * been disabled because of active scanning, so + * re-enable it again if necessary. */ if (test_and_clear_bit(HCI_LE_SCAN_INTERRUPTED, &hdev->dev_flags)) hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + else if (!test_bit(HCI_LE_ADV, &hdev->dev_flags) && + hdev->discovery.state == DISCOVERY_FINDING) + mgmt_reenable_advertising(hdev); + break; default: @@ -1067,8 +1188,10 @@ static void hci_cc_le_read_white_list_size(struct hci_dev *hdev, BT_DBG("%s status 0x%2.2x size %u", hdev->name, rp->status, rp->size); - if (!rp->status) - hdev->le_white_list_size = rp->size; + if (rp->status) + return; + + hdev->le_white_list_size = rp->size; } static void hci_cc_le_clear_white_list(struct hci_dev *hdev, @@ -1078,8 +1201,10 @@ static void hci_cc_le_clear_white_list(struct hci_dev *hdev, BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (!status) - hci_white_list_clear(hdev); + if (status) + return; + + hci_bdaddr_list_clear(&hdev->le_white_list); } static void hci_cc_le_add_to_white_list(struct hci_dev *hdev, @@ -1090,12 +1215,15 @@ static void hci_cc_le_add_to_white_list(struct hci_dev *hdev, BT_DBG("%s status 0x%2.2x", hdev->name, status); + if (status) + return; + sent = hci_sent_cmd_data(hdev, HCI_OP_LE_ADD_TO_WHITE_LIST); if (!sent) return; - if (!status) - hci_white_list_add(hdev, &sent->bdaddr, sent->bdaddr_type); + hci_bdaddr_list_add(&hdev->le_white_list, &sent->bdaddr, + sent->bdaddr_type); } static void hci_cc_le_del_from_white_list(struct hci_dev *hdev, @@ -1106,12 +1234,15 @@ static void hci_cc_le_del_from_white_list(struct hci_dev *hdev, BT_DBG("%s status 0x%2.2x", hdev->name, status); + if (status) + return; + sent = hci_sent_cmd_data(hdev, HCI_OP_LE_DEL_FROM_WHITE_LIST); if (!sent) return; - if (!status) - hci_white_list_del(hdev, &sent->bdaddr, sent->bdaddr_type); + hci_bdaddr_list_del(&hdev->le_white_list, &sent->bdaddr, + sent->bdaddr_type); } static void hci_cc_le_read_supported_states(struct hci_dev *hdev, @@ -1121,8 +1252,10 @@ static void hci_cc_le_read_supported_states(struct hci_dev *hdev, BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); - if (!rp->status) - memcpy(hdev->le_states, rp->le_states, 8); + if (rp->status) + return; + + memcpy(hdev->le_states, rp->le_states, 8); } static void hci_cc_write_le_host_supported(struct hci_dev *hdev, @@ -1133,25 +1266,26 @@ static void hci_cc_write_le_host_supported(struct hci_dev *hdev, BT_DBG("%s status 0x%2.2x", hdev->name, status); + if (status) + return; + sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED); if (!sent) return; - if (!status) { - if (sent->le) { - hdev->features[1][0] |= LMP_HOST_LE; - set_bit(HCI_LE_ENABLED, &hdev->dev_flags); - } else { - hdev->features[1][0] &= ~LMP_HOST_LE; - clear_bit(HCI_LE_ENABLED, &hdev->dev_flags); - clear_bit(HCI_ADVERTISING, &hdev->dev_flags); - } - - if (sent->simul) - hdev->features[1][0] |= LMP_HOST_LE_BREDR; - else - hdev->features[1][0] &= ~LMP_HOST_LE_BREDR; + if (sent->le) { + hdev->features[1][0] |= LMP_HOST_LE; + set_bit(HCI_LE_ENABLED, &hdev->dev_flags); + } else { + hdev->features[1][0] &= ~LMP_HOST_LE; + clear_bit(HCI_LE_ENABLED, &hdev->dev_flags); + clear_bit(HCI_ADVERTISING, &hdev->dev_flags); } + + if (sent->simul) + hdev->features[1][0] |= LMP_HOST_LE_BREDR; + else + hdev->features[1][0] &= ~LMP_HOST_LE_BREDR; } static void hci_cc_set_adv_param(struct hci_dev *hdev, struct sk_buff *skb) @@ -1187,6 +1321,59 @@ static void hci_cc_write_remote_amp_assoc(struct hci_dev *hdev, amp_write_rem_assoc_continue(hdev, rp->phy_handle); } +static void hci_cc_read_rssi(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_read_rssi *rp = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + if (rp->status) + return; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle)); + if (conn) + conn->rssi = rp->rssi; + + hci_dev_unlock(hdev); +} + +static void hci_cc_read_tx_power(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_cp_read_tx_power *sent; + struct hci_rp_read_tx_power *rp = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + if (rp->status) + return; + + sent = hci_sent_cmd_data(hdev, HCI_OP_READ_TX_POWER); + if (!sent) + return; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle)); + if (!conn) + goto unlock; + + switch (sent->type) { + case 0x00: + conn->tx_power = rp->tx_power; + break; + case 0x01: + conn->max_tx_power = rp->tx_power; + break; + } + +unlock: + hci_dev_unlock(hdev); +} + static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) { BT_DBG("%s status 0x%2.2x", hdev->name, status); @@ -1227,11 +1414,9 @@ static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) } } else { if (!conn) { - conn = hci_conn_add(hdev, ACL_LINK, &cp->bdaddr); - if (conn) { - conn->out = true; - conn->link_mode |= HCI_LM_MASTER; - } else + conn = hci_conn_add(hdev, ACL_LINK, &cp->bdaddr, + HCI_ROLE_MASTER); + if (!conn) BT_ERR("No memory for new connection"); } } @@ -1342,6 +1527,7 @@ static int hci_outgoing_auth_needed(struct hci_dev *hdev, * is requested. */ if (!hci_conn_ssp_enabled(conn) && !(conn->auth_type & 0x01) && + conn->pending_sec_level != BT_SECURITY_FIPS && conn->pending_sec_level != BT_SECURITY_HIGH && conn->pending_sec_level != BT_SECURITY_MEDIUM) return 0; @@ -1459,6 +1645,8 @@ static void hci_cs_remote_name_req(struct hci_dev *hdev, __u8 status) if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->flags)) { struct hci_cp_auth_requested auth_cp; + set_bit(HCI_CONN_AUTH_INITIATOR, &conn->flags); + auth_cp.handle = __cpu_to_le16(conn->handle); hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED, sizeof(auth_cp), &auth_cp); @@ -1719,7 +1907,7 @@ static void hci_cs_le_create_conn(struct hci_dev *hdev, u8 status) if (cp->filter_policy == HCI_LE_USE_PEER_ADDR) queue_delayed_work(conn->hdev->workqueue, &conn->le_conn_timeout, - HCI_LE_CONN_TIMEOUT); + conn->conn_timeout); unlock: hci_dev_unlock(hdev); @@ -1768,7 +1956,7 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) if (!test_and_clear_bit(HCI_INQUIRY, &hdev->flags)) return; - smp_mb__after_clear_bit(); /* wake_up_bit advises about this barrier */ + smp_mb__after_atomic(); /* wake_up_bit advises about this barrier */ wake_up_bit(&hdev->flags, HCI_INQUIRY); if (!test_bit(HCI_MGMT, &hdev->dev_flags)) @@ -1813,7 +2001,7 @@ static void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_lock(hdev); for (; num_rsp; num_rsp--, info++) { - bool name_known, ssp; + u32 flags; bacpy(&data.bdaddr, &info->bdaddr); data.pscan_rep_mode = info->pscan_rep_mode; @@ -1824,10 +2012,10 @@ static void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb) data.rssi = 0x00; data.ssp_mode = 0x00; - name_known = hci_inquiry_cache_update(hdev, &data, false, &ssp); + flags = hci_inquiry_cache_update(hdev, &data, false); + mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00, - info->dev_class, 0, !name_known, ssp, NULL, - 0); + info->dev_class, 0, flags, NULL, 0, NULL, 0); } hci_dev_unlock(hdev); @@ -1872,10 +2060,10 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_conn_add_sysfs(conn); if (test_bit(HCI_AUTH, &hdev->flags)) - conn->link_mode |= HCI_LM_AUTH; + set_bit(HCI_CONN_AUTH, &conn->flags); if (test_bit(HCI_ENCRYPT, &hdev->flags)) - conn->link_mode |= HCI_LM_ENCRYPT; + set_bit(HCI_CONN_ENCRYPT, &conn->flags); /* Get remote features */ if (conn->type == ACL_LINK) { @@ -1915,10 +2103,21 @@ unlock: hci_conn_check_pending(hdev); } +static void hci_reject_conn(struct hci_dev *hdev, bdaddr_t *bdaddr) +{ + struct hci_cp_reject_conn_req cp; + + bacpy(&cp.bdaddr, bdaddr); + cp.reason = HCI_ERROR_REJ_BAD_ADDR; + hci_send_cmd(hdev, HCI_OP_REJECT_CONN_REQ, sizeof(cp), &cp); +} + static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_conn_request *ev = (void *) skb->data; int mask = hdev->link_mode; + struct inquiry_entry *ie; + struct hci_conn *conn; __u8 flags = 0; BT_DBG("%s bdaddr %pMR type 0x%x", hdev->name, &ev->bdaddr, @@ -1927,73 +2126,79 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb) mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ev->link_type, &flags); - if ((mask & HCI_LM_ACCEPT) && - !hci_blacklist_lookup(hdev, &ev->bdaddr, BDADDR_BREDR)) { - /* Connection accepted */ - struct inquiry_entry *ie; - struct hci_conn *conn; + if (!(mask & HCI_LM_ACCEPT)) { + hci_reject_conn(hdev, &ev->bdaddr); + return; + } - hci_dev_lock(hdev); + if (hci_bdaddr_list_lookup(&hdev->blacklist, &ev->bdaddr, + BDADDR_BREDR)) { + hci_reject_conn(hdev, &ev->bdaddr); + return; + } - ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr); - if (ie) - memcpy(ie->data.dev_class, ev->dev_class, 3); + if (!test_bit(HCI_CONNECTABLE, &hdev->dev_flags) && + !hci_bdaddr_list_lookup(&hdev->whitelist, &ev->bdaddr, + BDADDR_BREDR)) { + hci_reject_conn(hdev, &ev->bdaddr); + return; + } + + /* Connection accepted */ - conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, - &ev->bdaddr); + hci_dev_lock(hdev); + + ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr); + if (ie) + memcpy(ie->data.dev_class, ev->dev_class, 3); + + conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, + &ev->bdaddr); + if (!conn) { + conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr, + HCI_ROLE_SLAVE); if (!conn) { - conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr); - if (!conn) { - BT_ERR("No memory for new connection"); - hci_dev_unlock(hdev); - return; - } + BT_ERR("No memory for new connection"); + hci_dev_unlock(hdev); + return; } + } - memcpy(conn->dev_class, ev->dev_class, 3); + memcpy(conn->dev_class, ev->dev_class, 3); - hci_dev_unlock(hdev); + hci_dev_unlock(hdev); - if (ev->link_type == ACL_LINK || - (!(flags & HCI_PROTO_DEFER) && !lmp_esco_capable(hdev))) { - struct hci_cp_accept_conn_req cp; - conn->state = BT_CONNECT; + if (ev->link_type == ACL_LINK || + (!(flags & HCI_PROTO_DEFER) && !lmp_esco_capable(hdev))) { + struct hci_cp_accept_conn_req cp; + conn->state = BT_CONNECT; - bacpy(&cp.bdaddr, &ev->bdaddr); + bacpy(&cp.bdaddr, &ev->bdaddr); - if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER)) - cp.role = 0x00; /* Become master */ - else - cp.role = 0x01; /* Remain slave */ + if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER)) + cp.role = 0x00; /* Become master */ + else + cp.role = 0x01; /* Remain slave */ - hci_send_cmd(hdev, HCI_OP_ACCEPT_CONN_REQ, sizeof(cp), - &cp); - } else if (!(flags & HCI_PROTO_DEFER)) { - struct hci_cp_accept_sync_conn_req cp; - conn->state = BT_CONNECT; + hci_send_cmd(hdev, HCI_OP_ACCEPT_CONN_REQ, sizeof(cp), &cp); + } else if (!(flags & HCI_PROTO_DEFER)) { + struct hci_cp_accept_sync_conn_req cp; + conn->state = BT_CONNECT; - bacpy(&cp.bdaddr, &ev->bdaddr); - cp.pkt_type = cpu_to_le16(conn->pkt_type); + bacpy(&cp.bdaddr, &ev->bdaddr); + cp.pkt_type = cpu_to_le16(conn->pkt_type); - cp.tx_bandwidth = cpu_to_le32(0x00001f40); - cp.rx_bandwidth = cpu_to_le32(0x00001f40); - cp.max_latency = cpu_to_le16(0xffff); - cp.content_format = cpu_to_le16(hdev->voice_setting); - cp.retrans_effort = 0xff; + cp.tx_bandwidth = cpu_to_le32(0x00001f40); + cp.rx_bandwidth = cpu_to_le32(0x00001f40); + cp.max_latency = cpu_to_le16(0xffff); + cp.content_format = cpu_to_le16(hdev->voice_setting); + cp.retrans_effort = 0xff; - hci_send_cmd(hdev, HCI_OP_ACCEPT_SYNC_CONN_REQ, - sizeof(cp), &cp); - } else { - conn->state = BT_CONNECT2; - hci_proto_connect_cfm(conn, 0); - } + hci_send_cmd(hdev, HCI_OP_ACCEPT_SYNC_CONN_REQ, sizeof(cp), + &cp); } else { - /* Connection rejected */ - struct hci_cp_reject_conn_req cp; - - bacpy(&cp.bdaddr, &ev->bdaddr); - cp.reason = HCI_ERROR_REJ_BAD_ADDR; - hci_send_cmd(hdev, HCI_OP_REJECT_CONN_REQ, sizeof(cp), &cp); + conn->state = BT_CONNECT2; + hci_proto_connect_cfm(conn, 0); } } @@ -2042,7 +2247,8 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) mgmt_device_disconnected(hdev, &conn->dst, conn->type, conn->dst_type, reason, mgmt_connected); - if (conn->type == ACL_LINK && conn->flush_key) + if (conn->type == ACL_LINK && + test_bit(HCI_CONN_FLUSH_KEY, &conn->flags)) hci_remove_link_key(hdev, &conn->dst); params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type); @@ -2053,8 +2259,11 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) break; /* Fall through */ + case HCI_AUTO_CONN_DIRECT: case HCI_AUTO_CONN_ALWAYS: - hci_pend_le_conn_add(hdev, &conn->dst, conn->dst_type); + list_del_init(¶ms->action); + list_add(¶ms->action, &hdev->pend_le_conns); + hci_update_background_scan(hdev); break; default: @@ -2102,7 +2311,7 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) test_bit(HCI_CONN_REAUTH_PEND, &conn->flags)) { BT_INFO("re-auth of legacy device is not possible."); } else { - conn->link_mode |= HCI_LM_AUTH; + set_bit(HCI_CONN_AUTH, &conn->flags); conn->sec_level = conn->pending_sec_level; } } else { @@ -2181,6 +2390,9 @@ check_auth: if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->flags)) { struct hci_cp_auth_requested cp; + + set_bit(HCI_CONN_AUTH_INITIATOR, &conn->flags); + cp.handle = __cpu_to_le16(conn->handle); hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED, sizeof(cp), &cp); } @@ -2205,19 +2417,19 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb) if (!ev->status) { if (ev->encrypt) { /* Encryption implies authentication */ - conn->link_mode |= HCI_LM_AUTH; - conn->link_mode |= HCI_LM_ENCRYPT; + set_bit(HCI_CONN_AUTH, &conn->flags); + set_bit(HCI_CONN_ENCRYPT, &conn->flags); conn->sec_level = conn->pending_sec_level; /* P-256 authentication key implies FIPS */ if (conn->key_type == HCI_LK_AUTH_COMBINATION_P256) - conn->link_mode |= HCI_LM_FIPS; + set_bit(HCI_CONN_FIPS, &conn->flags); if ((conn->type == ACL_LINK && ev->encrypt == 0x02) || conn->type == LE_LINK) set_bit(HCI_CONN_AES_CCM, &conn->flags); } else { - conn->link_mode &= ~HCI_LM_ENCRYPT; + clear_bit(HCI_CONN_ENCRYPT, &conn->flags); clear_bit(HCI_CONN_AES_CCM, &conn->flags); } } @@ -2268,7 +2480,7 @@ static void hci_change_link_key_complete_evt(struct hci_dev *hdev, conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); if (conn) { if (!ev->status) - conn->link_mode |= HCI_LM_SECURE; + set_bit(HCI_CONN_SECURE, &conn->flags); clear_bit(HCI_CONN_AUTH_PEND, &conn->flags); @@ -2479,6 +2691,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_read_local_amp_info(hdev, skb); break; + case HCI_OP_READ_CLOCK: + hci_cc_read_clock(hdev, skb); + break; + case HCI_OP_READ_LOCAL_AMP_ASSOC: hci_cc_read_local_amp_assoc(hdev, skb); break; @@ -2579,13 +2795,21 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_write_remote_amp_assoc(hdev, skb); break; + case HCI_OP_READ_RSSI: + hci_cc_read_rssi(hdev, skb); + break; + + case HCI_OP_READ_TX_POWER: + hci_cc_read_tx_power(hdev, skb); + break; + default: BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode); break; } if (opcode != HCI_OP_NOP) - del_timer(&hdev->cmd_timer); + cancel_delayed_work(&hdev->cmd_timer); hci_req_cmd_complete(hdev, opcode, status); @@ -2676,7 +2900,7 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb) } if (opcode != HCI_OP_NOP) - del_timer(&hdev->cmd_timer); + cancel_delayed_work(&hdev->cmd_timer); if (ev->status || (hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->req.event)) @@ -2700,12 +2924,8 @@ static void hci_role_change_evt(struct hci_dev *hdev, struct sk_buff *skb) conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); if (conn) { - if (!ev->status) { - if (ev->role) - conn->link_mode &= ~HCI_LM_MASTER; - else - conn->link_mode |= HCI_LM_MASTER; - } + if (!ev->status) + conn->role = ev->role; clear_bit(HCI_CONN_RSWITCH_PEND, &conn->flags); @@ -2899,10 +3119,11 @@ static void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_conn_drop(conn); } - if (!test_bit(HCI_PAIRABLE, &hdev->dev_flags)) + if (!test_bit(HCI_BONDABLE, &hdev->dev_flags) && + !test_bit(HCI_CONN_AUTH_INITIATOR, &conn->flags)) { hci_send_cmd(hdev, HCI_OP_PIN_CODE_NEG_REPLY, sizeof(ev->bdaddr), &ev->bdaddr); - else if (test_bit(HCI_MGMT, &hdev->dev_flags)) { + } else if (test_bit(HCI_MGMT, &hdev->dev_flags)) { u8 secure; if (conn->pending_sec_level == BT_SECURITY_HIGH) @@ -2941,12 +3162,6 @@ static void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s found key type %u for %pMR", hdev->name, key->type, &ev->bdaddr); - if (!test_bit(HCI_DEBUG_KEYS, &hdev->dev_flags) && - key->type == HCI_LK_DEBUG_COMBINATION) { - BT_DBG("%s ignoring debug key", hdev->name); - goto not_found; - } - conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); if (conn) { if ((key->type == HCI_LK_UNAUTH_COMBINATION_P192 || @@ -2957,7 +3172,8 @@ static void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb) } if (key->type == HCI_LK_COMBINATION && key->pin_len < 16 && - conn->pending_sec_level == BT_SECURITY_HIGH) { + (conn->pending_sec_level == BT_SECURITY_HIGH || + conn->pending_sec_level == BT_SECURITY_FIPS)) { BT_DBG("%s ignoring key unauthenticated for high security", hdev->name); goto not_found; @@ -2985,6 +3201,8 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_link_key_notify *ev = (void *) skb->data; struct hci_conn *conn; + struct link_key *key; + bool persistent; u8 pin_len = 0; BT_DBG("%s", hdev->name); @@ -3003,10 +3221,33 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_conn_drop(conn); } - if (test_bit(HCI_MGMT, &hdev->dev_flags)) - hci_add_link_key(hdev, conn, 1, &ev->bdaddr, ev->link_key, - ev->key_type, pin_len); + if (!test_bit(HCI_MGMT, &hdev->dev_flags)) + goto unlock; + + key = hci_add_link_key(hdev, conn, &ev->bdaddr, ev->link_key, + ev->key_type, pin_len, &persistent); + if (!key) + goto unlock; + + mgmt_new_link_key(hdev, key, persistent); + /* Keep debug keys around only if the HCI_KEEP_DEBUG_KEYS flag + * is set. If it's not set simply remove the key from the kernel + * list (we've still notified user space about it but with + * store_hint being 0). + */ + if (key->type == HCI_LK_DEBUG_COMBINATION && + !test_bit(HCI_KEEP_DEBUG_KEYS, &hdev->dev_flags)) { + list_del(&key->list); + kfree(key); + } else if (conn) { + if (persistent) + clear_bit(HCI_CONN_FLUSH_KEY, &conn->flags); + else + set_bit(HCI_CONN_FLUSH_KEY, &conn->flags); + } + +unlock: hci_dev_unlock(hdev); } @@ -3072,7 +3313,6 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, { struct inquiry_data data; int num_rsp = *((__u8 *) skb->data); - bool name_known, ssp; BT_DBG("%s num_rsp %d", hdev->name, num_rsp); @@ -3089,6 +3329,8 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, info = (void *) (skb->data + 1); for (; num_rsp; num_rsp--, info++) { + u32 flags; + bacpy(&data.bdaddr, &info->bdaddr); data.pscan_rep_mode = info->pscan_rep_mode; data.pscan_period_mode = info->pscan_period_mode; @@ -3098,16 +3340,18 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, data.rssi = info->rssi; data.ssp_mode = 0x00; - name_known = hci_inquiry_cache_update(hdev, &data, - false, &ssp); + flags = hci_inquiry_cache_update(hdev, &data, false); + mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00, info->dev_class, info->rssi, - !name_known, ssp, NULL, 0); + flags, NULL, 0, NULL, 0); } } else { struct inquiry_info_with_rssi *info = (void *) (skb->data + 1); for (; num_rsp; num_rsp--, info++) { + u32 flags; + bacpy(&data.bdaddr, &info->bdaddr); data.pscan_rep_mode = info->pscan_rep_mode; data.pscan_period_mode = info->pscan_period_mode; @@ -3116,11 +3360,12 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, data.clock_offset = info->clock_offset; data.rssi = info->rssi; data.ssp_mode = 0x00; - name_known = hci_inquiry_cache_update(hdev, &data, - false, &ssp); + + flags = hci_inquiry_cache_update(hdev, &data, false); + mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00, info->dev_class, info->rssi, - !name_known, ssp, NULL, 0); + flags, NULL, 0, NULL, 0); } } @@ -3223,6 +3468,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, hci_conn_add_sysfs(conn); break; + case 0x10: /* Connection Accept Timeout */ case 0x0d: /* Connection Rejected due to Limited Resources */ case 0x11: /* Unsupported Feature or Parameter Value */ case 0x1c: /* SCO interval rejected */ @@ -3286,7 +3532,8 @@ static void hci_extended_inquiry_result_evt(struct hci_dev *hdev, hci_dev_lock(hdev); for (; num_rsp; num_rsp--, info++) { - bool name_known, ssp; + u32 flags; + bool name_known; bacpy(&data.bdaddr, &info->bdaddr); data.pscan_rep_mode = info->pscan_rep_mode; @@ -3304,12 +3551,13 @@ static void hci_extended_inquiry_result_evt(struct hci_dev *hdev, else name_known = true; - name_known = hci_inquiry_cache_update(hdev, &data, name_known, - &ssp); + flags = hci_inquiry_cache_update(hdev, &data, name_known); + eir_len = eir_get_length(info->data, sizeof(info->data)); + mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00, - info->dev_class, info->rssi, !name_known, - ssp, info->data, eir_len); + info->dev_class, info->rssi, + flags, info->data, eir_len, NULL, 0); } hci_dev_unlock(hdev); @@ -3330,6 +3578,12 @@ static void hci_key_refresh_complete_evt(struct hci_dev *hdev, if (!conn) goto unlock; + /* For BR/EDR the necessary steps are taken through the + * auth_complete event. + */ + if (conn->type != LE_LINK) + goto unlock; + if (!ev->status) conn->sec_level = conn->pending_sec_level; @@ -3361,24 +3615,20 @@ unlock: static u8 hci_get_auth_req(struct hci_conn *conn) { - /* If remote requests dedicated bonding follow that lead */ - if (conn->remote_auth == HCI_AT_DEDICATED_BONDING || - conn->remote_auth == HCI_AT_DEDICATED_BONDING_MITM) { - /* If both remote and local IO capabilities allow MITM - * protection then require it, otherwise don't */ - if (conn->remote_cap == HCI_IO_NO_INPUT_OUTPUT || - conn->io_capability == HCI_IO_NO_INPUT_OUTPUT) - return HCI_AT_DEDICATED_BONDING; - else - return HCI_AT_DEDICATED_BONDING_MITM; - } - /* If remote requests no-bonding follow that lead */ if (conn->remote_auth == HCI_AT_NO_BONDING || conn->remote_auth == HCI_AT_NO_BONDING_MITM) return conn->remote_auth | (conn->auth_type & 0x01); - return conn->auth_type; + /* If both remote and local have enough IO capabilities, require + * MITM protection + */ + if (conn->remote_cap != HCI_IO_NO_INPUT_OUTPUT && + conn->io_capability != HCI_IO_NO_INPUT_OUTPUT) + return conn->remote_auth | 0x01; + + /* No MITM protection possible so ignore remote requirement */ + return (conn->remote_auth & ~0x01) | (conn->auth_type & 0x01); } static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb) @@ -3399,7 +3649,11 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb) if (!test_bit(HCI_MGMT, &hdev->dev_flags)) goto unlock; - if (test_bit(HCI_PAIRABLE, &hdev->dev_flags) || + /* Allow pairing if we're pairable, the initiators of the + * pairing or if the remote is not requesting bonding. + */ + if (test_bit(HCI_BONDABLE, &hdev->dev_flags) || + test_bit(HCI_CONN_AUTH_INITIATOR, &conn->flags) || (conn->remote_auth & ~0x01) == HCI_AT_NO_BONDING) { struct hci_cp_io_capability_reply cp; @@ -3408,7 +3662,25 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb) * to DisplayYesNo as it is not supported by BT spec. */ cp.capability = (conn->io_capability == 0x04) ? HCI_IO_DISPLAY_YESNO : conn->io_capability; - conn->auth_type = hci_get_auth_req(conn); + + /* If we are initiators, there is no remote information yet */ + if (conn->remote_auth == 0xff) { + /* Request MITM protection if our IO caps allow it + * except for the no-bonding case. + */ + if (conn->io_capability != HCI_IO_NO_INPUT_OUTPUT && + conn->auth_type != HCI_AT_NO_BONDING) + conn->auth_type |= 0x01; + } else { + conn->auth_type = hci_get_auth_req(conn); + } + + /* If we're not bondable, force one of the non-bondable + * authentication requirement values. + */ + if (!test_bit(HCI_BONDABLE, &hdev->dev_flags)) + conn->auth_type &= HCI_AT_NO_BONDING_MITM; + cp.authentication = conn->auth_type; if (hci_find_remote_oob_data(hdev, &conn->dst) && @@ -3478,10 +3750,10 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev, /* If we require MITM but the remote device can't provide that * (it has NoInputNoOutput) then reject the confirmation - * request. The only exception is when we're dedicated bonding - * initiators (connect_cfm_cb set) since then we always have the MITM - * bit set. */ - if (!conn->connect_cfm_cb && loc_mitm && + * request. We check the security level here since it doesn't + * necessarily match conn->auth_type. + */ + if (conn->pending_sec_level > BT_SECURITY_MEDIUM && conn->remote_cap == HCI_IO_NO_INPUT_OUTPUT) { BT_DBG("Rejecting request: remote device can't provide MITM"); hci_send_cmd(hdev, HCI_OP_USER_CONFIRM_NEG_REPLY, @@ -3495,8 +3767,13 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev, /* If we're not the initiators request authorization to * proceed from user space (mgmt_user_confirm with - * confirm_hint set to 1). */ - if (!test_bit(HCI_CONN_AUTH_PEND, &conn->flags)) { + * confirm_hint set to 1). The exception is if neither + * side had MITM or if the local IO capability is + * NoInputNoOutput, in which case we do auto-accept + */ + if (!test_bit(HCI_CONN_AUTH_PEND, &conn->flags) && + conn->io_capability != HCI_IO_NO_INPUT_OUTPUT && + (loc_mitm || rem_mitm)) { BT_DBG("Confirming auto-accept as acceptor"); confirm_hint = 1; goto confirm; @@ -3609,6 +3886,9 @@ static void hci_simple_pair_complete_evt(struct hci_dev *hdev, if (!conn) goto unlock; + /* Reset the authentication requirement to unknown */ + conn->remote_auth = 0xff; + /* To avoid duplicate auth_failed events to user space we check * the HCI_CONN_AUTH_PEND flag which will be set if we * initiated the authentication. A traditional auth_complete @@ -3823,16 +4103,23 @@ static void hci_disconn_phylink_complete_evt(struct hci_dev *hdev, static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_le_conn_complete *ev = (void *) skb->data; + struct hci_conn_params *params; struct hci_conn *conn; struct smp_irk *irk; + u8 addr_type; BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); hci_dev_lock(hdev); + /* All controllers implicitly stop advertising in the event of a + * connection, so ensure that the state bit is cleared. + */ + clear_bit(HCI_LE_ADV, &hdev->dev_flags); + conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); if (!conn) { - conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr); + conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr, ev->role); if (!conn) { BT_ERR("No memory for new connection"); goto unlock; @@ -3840,22 +4127,6 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) conn->dst_type = ev->bdaddr_type; - /* The advertising parameters for own address type - * define which source address and source address - * type this connections has. - */ - if (bacmp(&conn->src, BDADDR_ANY)) { - conn->src_type = ADDR_LE_DEV_PUBLIC; - } else { - bacpy(&conn->src, &hdev->static_addr); - conn->src_type = ADDR_LE_DEV_RANDOM; - } - - if (ev->role == LE_CONN_ROLE_MASTER) { - conn->out = true; - conn->link_mode |= HCI_LM_MASTER; - } - /* If we didn't have a hci_conn object previously * but we're in master role this must be something * initiated using a white list. Since white list based @@ -3875,27 +4146,32 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) &conn->init_addr, &conn->init_addr_type); } - } else { - /* Set the responder (our side) address type based on - * the advertising address type. - */ - conn->resp_addr_type = hdev->adv_addr_type; - if (hdev->adv_addr_type == ADDR_LE_DEV_RANDOM) - bacpy(&conn->resp_addr, &hdev->random_addr); - else - bacpy(&conn->resp_addr, &hdev->bdaddr); - - conn->init_addr_type = ev->bdaddr_type; - bacpy(&conn->init_addr, &ev->bdaddr); } } else { cancel_delayed_work(&conn->le_conn_timeout); } - /* Ensure that the hci_conn contains the identity address type - * regardless of which address the connection was made with. - */ - hci_copy_identity_address(hdev, &conn->src, &conn->src_type); + if (!conn->out) { + /* Set the responder (our side) address type based on + * the advertising address type. + */ + conn->resp_addr_type = hdev->adv_addr_type; + if (hdev->adv_addr_type == ADDR_LE_DEV_RANDOM) + bacpy(&conn->resp_addr, &hdev->random_addr); + else + bacpy(&conn->resp_addr, &hdev->bdaddr); + + conn->init_addr_type = ev->bdaddr_type; + bacpy(&conn->init_addr, &ev->bdaddr); + + /* For incoming connections, set the default minimum + * and maximum connection interval. They will be used + * to check if the parameters are in range and if not + * trigger the connection update procedure. + */ + conn->le_conn_min_interval = hdev->le_conn_min_interval; + conn->le_conn_max_interval = hdev->le_conn_max_interval; + } /* Lookup the identity address from the stored connection * address and address type. @@ -3912,11 +4188,22 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) conn->dst_type = irk->addr_type; } + if (conn->dst_type == ADDR_LE_DEV_PUBLIC) + addr_type = BDADDR_LE_PUBLIC; + else + addr_type = BDADDR_LE_RANDOM; + if (ev->status) { hci_le_conn_failed(conn, ev->status); goto unlock; } + /* Drop the connection if the device is blocked */ + if (hci_bdaddr_list_lookup(&hdev->blacklist, &conn->dst, addr_type)) { + hci_conn_drop(conn); + goto unlock; + } + if (!test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags)) mgmt_device_connected(hdev, &conn->dst, conn->type, conn->dst_type, 0, NULL, 0, NULL); @@ -3925,40 +4212,98 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) conn->handle = __le16_to_cpu(ev->handle); conn->state = BT_CONNECTED; - if (test_bit(HCI_6LOWPAN_ENABLED, &hdev->dev_flags)) - set_bit(HCI_CONN_6LOWPAN, &conn->flags); + conn->le_conn_interval = le16_to_cpu(ev->interval); + conn->le_conn_latency = le16_to_cpu(ev->latency); + conn->le_supv_timeout = le16_to_cpu(ev->supervision_timeout); hci_conn_add_sysfs(conn); hci_proto_connect_cfm(conn, ev->status); - hci_pend_le_conn_del(hdev, &conn->dst, conn->dst_type); + params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type); + if (params) + list_del_init(¶ms->action); unlock: + hci_update_background_scan(hdev); + hci_dev_unlock(hdev); +} + +static void hci_le_conn_update_complete_evt(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_ev_le_conn_update_complete *ev = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); + + if (ev->status) + return; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); + if (conn) { + conn->le_conn_interval = le16_to_cpu(ev->interval); + conn->le_conn_latency = le16_to_cpu(ev->latency); + conn->le_supv_timeout = le16_to_cpu(ev->supervision_timeout); + } + hci_dev_unlock(hdev); } /* This function requires the caller holds hdev->lock */ static void check_pending_le_conn(struct hci_dev *hdev, bdaddr_t *addr, - u8 addr_type) + u8 addr_type, u8 adv_type) { struct hci_conn *conn; - struct smp_irk *irk; + struct hci_conn_params *params; - /* If this is a resolvable address, we should resolve it and then - * update address and address type variables. + /* If the event is not connectable don't proceed further */ + if (adv_type != LE_ADV_IND && adv_type != LE_ADV_DIRECT_IND) + return; + + /* Ignore if the device is blocked */ + if (hci_bdaddr_list_lookup(&hdev->blacklist, addr, addr_type)) + return; + + /* Most controller will fail if we try to create new connections + * while we have an existing one in slave role. */ - irk = hci_get_irk(hdev, addr, addr_type); - if (irk) { - addr = &irk->bdaddr; - addr_type = irk->addr_type; - } + if (hdev->conn_hash.le_num_slave > 0) + return; - if (!hci_pend_le_conn_lookup(hdev, addr, addr_type)) + /* If we're not connectable only connect devices that we have in + * our pend_le_conns list. + */ + params = hci_pend_le_action_lookup(&hdev->pend_le_conns, + addr, addr_type); + if (!params) return; + switch (params->auto_connect) { + case HCI_AUTO_CONN_DIRECT: + /* Only devices advertising with ADV_DIRECT_IND are + * triggering a connection attempt. This is allowing + * incoming connections from slave devices. + */ + if (adv_type != LE_ADV_DIRECT_IND) + return; + break; + case HCI_AUTO_CONN_ALWAYS: + /* Devices advertising with ADV_IND or ADV_DIRECT_IND + * are triggering a connection attempt. This means + * that incoming connectioms from slave device are + * accepted and also outgoing connections to slave + * devices are established when found. + */ + break; + default: + return; + } + conn = hci_connect_le(hdev, addr, addr_type, BT_SECURITY_LOW, - HCI_AT_NO_BONDING); + HCI_LE_AUTOCONN_TIMEOUT, HCI_ROLE_MASTER); if (!IS_ERR(conn)) return; @@ -3975,25 +4320,144 @@ static void check_pending_le_conn(struct hci_dev *hdev, bdaddr_t *addr, } } +static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, + u8 bdaddr_type, s8 rssi, u8 *data, u8 len) +{ + struct discovery_state *d = &hdev->discovery; + struct smp_irk *irk; + bool match; + u32 flags; + + /* Check if we need to convert to identity address */ + irk = hci_get_irk(hdev, bdaddr, bdaddr_type); + if (irk) { + bdaddr = &irk->bdaddr; + bdaddr_type = irk->addr_type; + } + + /* Check if we have been requested to connect to this device */ + check_pending_le_conn(hdev, bdaddr, bdaddr_type, type); + + /* Passive scanning shouldn't trigger any device found events, + * except for devices marked as CONN_REPORT for which we do send + * device found events. + */ + if (hdev->le_scan_type == LE_SCAN_PASSIVE) { + if (type == LE_ADV_DIRECT_IND) + return; + + if (!hci_pend_le_action_lookup(&hdev->pend_le_reports, + bdaddr, bdaddr_type)) + return; + + if (type == LE_ADV_NONCONN_IND || type == LE_ADV_SCAN_IND) + flags = MGMT_DEV_FOUND_NOT_CONNECTABLE; + else + flags = 0; + mgmt_device_found(hdev, bdaddr, LE_LINK, bdaddr_type, NULL, + rssi, flags, data, len, NULL, 0); + return; + } + + /* When receiving non-connectable or scannable undirected + * advertising reports, this means that the remote device is + * not connectable and then clearly indicate this in the + * device found event. + * + * When receiving a scan response, then there is no way to + * know if the remote device is connectable or not. However + * since scan responses are merged with a previously seen + * advertising report, the flags field from that report + * will be used. + * + * In the really unlikely case that a controller get confused + * and just sends a scan response event, then it is marked as + * not connectable as well. + */ + if (type == LE_ADV_NONCONN_IND || type == LE_ADV_SCAN_IND || + type == LE_ADV_SCAN_RSP) + flags = MGMT_DEV_FOUND_NOT_CONNECTABLE; + else + flags = 0; + + /* If there's nothing pending either store the data from this + * event or send an immediate device found event if the data + * should not be stored for later. + */ + if (!has_pending_adv_report(hdev)) { + /* If the report will trigger a SCAN_REQ store it for + * later merging. + */ + if (type == LE_ADV_IND || type == LE_ADV_SCAN_IND) { + store_pending_adv_report(hdev, bdaddr, bdaddr_type, + rssi, flags, data, len); + return; + } + + mgmt_device_found(hdev, bdaddr, LE_LINK, bdaddr_type, NULL, + rssi, flags, data, len, NULL, 0); + return; + } + + /* Check if the pending report is for the same device as the new one */ + match = (!bacmp(bdaddr, &d->last_adv_addr) && + bdaddr_type == d->last_adv_addr_type); + + /* If the pending data doesn't match this report or this isn't a + * scan response (e.g. we got a duplicate ADV_IND) then force + * sending of the pending data. + */ + if (type != LE_ADV_SCAN_RSP || !match) { + /* Send out whatever is in the cache, but skip duplicates */ + if (!match) + mgmt_device_found(hdev, &d->last_adv_addr, LE_LINK, + d->last_adv_addr_type, NULL, + d->last_adv_rssi, d->last_adv_flags, + d->last_adv_data, + d->last_adv_data_len, NULL, 0); + + /* If the new report will trigger a SCAN_REQ store it for + * later merging. + */ + if (type == LE_ADV_IND || type == LE_ADV_SCAN_IND) { + store_pending_adv_report(hdev, bdaddr, bdaddr_type, + rssi, flags, data, len); + return; + } + + /* The advertising reports cannot be merged, so clear + * the pending report and send out a device found event. + */ + clear_pending_adv_report(hdev); + mgmt_device_found(hdev, bdaddr, LE_LINK, bdaddr_type, NULL, + rssi, flags, data, len, NULL, 0); + return; + } + + /* If we get here we've got a pending ADV_IND or ADV_SCAN_IND and + * the new event is a SCAN_RSP. We can therefore proceed with + * sending a merged device found event. + */ + mgmt_device_found(hdev, &d->last_adv_addr, LE_LINK, + d->last_adv_addr_type, NULL, rssi, d->last_adv_flags, + d->last_adv_data, d->last_adv_data_len, data, len); + clear_pending_adv_report(hdev); +} + static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb) { u8 num_reports = skb->data[0]; void *ptr = &skb->data[1]; - s8 rssi; hci_dev_lock(hdev); while (num_reports--) { struct hci_ev_le_advertising_info *ev = ptr; - - if (ev->evt_type == LE_ADV_IND || - ev->evt_type == LE_ADV_DIRECT_IND) - check_pending_le_conn(hdev, &ev->bdaddr, - ev->bdaddr_type); + s8 rssi; rssi = ev->data[ev->length]; - mgmt_device_found(hdev, &ev->bdaddr, LE_LINK, ev->bdaddr_type, - NULL, rssi, 0, 1, ev->data, ev->length); + process_adv_report(hdev, ev->evt_type, &ev->bdaddr, + ev->bdaddr_type, rssi, ev->data, ev->length); ptr += sizeof(*ev) + ev->length + 1; } @@ -4017,7 +4481,7 @@ static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb) if (conn == NULL) goto not_found; - ltk = hci_find_ltk(hdev, ev->ediv, ev->rand, conn->out); + ltk = hci_find_ltk(hdev, ev->ediv, ev->rand, conn->role); if (ltk == NULL) goto not_found; @@ -4039,9 +4503,12 @@ static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb) * distribute the keys. Later, security can be re-established * using a distributed LTK. */ - if (ltk->type == HCI_SMP_STK_SLAVE) { + if (ltk->type == SMP_STK) { + set_bit(HCI_CONN_STK_ENCRYPT, &conn->flags); list_del(<k->list); kfree(ltk); + } else { + clear_bit(HCI_CONN_STK_ENCRYPT, &conn->flags); } hci_dev_unlock(hdev); @@ -4054,6 +4521,76 @@ not_found: hci_dev_unlock(hdev); } +static void send_conn_param_neg_reply(struct hci_dev *hdev, u16 handle, + u8 reason) +{ + struct hci_cp_le_conn_param_req_neg_reply cp; + + cp.handle = cpu_to_le16(handle); + cp.reason = reason; + + hci_send_cmd(hdev, HCI_OP_LE_CONN_PARAM_REQ_NEG_REPLY, sizeof(cp), + &cp); +} + +static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_ev_le_remote_conn_param_req *ev = (void *) skb->data; + struct hci_cp_le_conn_param_req_reply cp; + struct hci_conn *hcon; + u16 handle, min, max, latency, timeout; + + handle = le16_to_cpu(ev->handle); + min = le16_to_cpu(ev->interval_min); + max = le16_to_cpu(ev->interval_max); + latency = le16_to_cpu(ev->latency); + timeout = le16_to_cpu(ev->timeout); + + hcon = hci_conn_hash_lookup_handle(hdev, handle); + if (!hcon || hcon->state != BT_CONNECTED) + return send_conn_param_neg_reply(hdev, handle, + HCI_ERROR_UNKNOWN_CONN_ID); + + if (hci_check_conn_params(min, max, latency, timeout)) + return send_conn_param_neg_reply(hdev, handle, + HCI_ERROR_INVALID_LL_PARAMS); + + if (hcon->role == HCI_ROLE_MASTER) { + struct hci_conn_params *params; + u8 store_hint; + + hci_dev_lock(hdev); + + params = hci_conn_params_lookup(hdev, &hcon->dst, + hcon->dst_type); + if (params) { + params->conn_min_interval = min; + params->conn_max_interval = max; + params->conn_latency = latency; + params->supervision_timeout = timeout; + store_hint = 0x01; + } else{ + store_hint = 0x00; + } + + hci_dev_unlock(hdev); + + mgmt_new_conn_param(hdev, &hcon->dst, hcon->dst_type, + store_hint, min, max, latency, timeout); + } + + cp.handle = ev->handle; + cp.interval_min = ev->interval_min; + cp.interval_max = ev->interval_max; + cp.latency = ev->latency; + cp.timeout = ev->timeout; + cp.min_ce_len = 0; + cp.max_ce_len = 0; + + hci_send_cmd(hdev, HCI_OP_LE_CONN_PARAM_REQ_REPLY, sizeof(cp), &cp); +} + static void hci_le_meta_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_le_meta *le_ev = (void *) skb->data; @@ -4065,6 +4602,10 @@ static void hci_le_meta_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_le_conn_complete_evt(hdev, skb); break; + case HCI_EV_LE_CONN_UPDATE_COMPLETE: + hci_le_conn_update_complete_evt(hdev, skb); + break; + case HCI_EV_LE_ADVERTISING_REPORT: hci_le_adv_report_evt(hdev, skb); break; @@ -4073,6 +4614,10 @@ static void hci_le_meta_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_le_ltk_request_evt(hdev, skb); break; + case HCI_EV_LE_REMOTE_CONN_PARAM_REQ: + hci_le_remote_conn_param_req_evt(hdev, skb); + break; + default: break; } @@ -4104,7 +4649,7 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) /* Received events are (currently) only needed when a request is * ongoing so avoid unnecessary memory allocation. */ - if (hdev->req_status == HCI_REQ_PEND) { + if (hci_req_pending(hdev)) { kfree_skb(hdev->recv_evt); hdev->recv_evt = skb_clone(skb, GFP_KERNEL); } diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index b9a418e578e0..115f149362ba 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -35,13 +35,32 @@ static atomic_t monitor_promisc = ATOMIC_INIT(0); /* ----- HCI socket interface ----- */ +/* Socket info */ +#define hci_pi(sk) ((struct hci_pinfo *) sk) + +struct hci_pinfo { + struct bt_sock bt; + struct hci_dev *hdev; + struct hci_filter filter; + __u32 cmsg_mask; + unsigned short channel; +}; + static inline int hci_test_bit(int nr, void *addr) { return *((__u32 *) addr + (nr >> 5)) & ((__u32) 1 << (nr & 31)); } /* Security filter */ -static struct hci_sec_filter hci_sec_filter = { +#define HCI_SFLT_MAX_OGF 5 + +struct hci_sec_filter { + __u32 type_mask; + __u32 event_mask[2]; + __u32 ocf_mask[HCI_SFLT_MAX_OGF + 1][4]; +}; + +static const struct hci_sec_filter hci_sec_filter = { /* Packet types */ 0x10, /* Events */ @@ -143,7 +162,7 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) if (!skb_copy) { /* Create a private copy with headroom */ - skb_copy = __pskb_copy(skb, 1, GFP_ATOMIC); + skb_copy = __pskb_copy_fclone(skb, 1, GFP_ATOMIC, true); if (!skb_copy) continue; @@ -247,8 +266,8 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) struct hci_mon_hdr *hdr; /* Create a private copy with headroom */ - skb_copy = __pskb_copy(skb, HCI_MON_HDR_SIZE, - GFP_ATOMIC); + skb_copy = __pskb_copy_fclone(skb, HCI_MON_HDR_SIZE, + GFP_ATOMIC, true); if (!skb_copy) continue; @@ -481,7 +500,7 @@ static int hci_sock_blacklist_add(struct hci_dev *hdev, void __user *arg) hci_dev_lock(hdev); - err = hci_blacklist_add(hdev, &bdaddr, BDADDR_BREDR); + err = hci_bdaddr_list_add(&hdev->blacklist, &bdaddr, BDADDR_BREDR); hci_dev_unlock(hdev); @@ -498,7 +517,7 @@ static int hci_sock_blacklist_del(struct hci_dev *hdev, void __user *arg) hci_dev_lock(hdev); - err = hci_blacklist_del(hdev, &bdaddr, BDADDR_BREDR); + err = hci_bdaddr_list_del(&hdev->blacklist, &bdaddr, BDADDR_BREDR); hci_dev_unlock(hdev); @@ -517,6 +536,9 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) return -EBUSY; + if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) + return -EOPNOTSUPP; + if (hdev->dev_type != HCI_BREDR) return -EOPNOTSUPP; @@ -524,16 +546,7 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, case HCISETRAW: if (!capable(CAP_NET_ADMIN)) return -EPERM; - - if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) - return -EPERM; - - if (arg) - set_bit(HCI_RAW, &hdev->flags); - else - clear_bit(HCI_RAW, &hdev->flags); - - return 0; + return -EOPNOTSUPP; case HCIGETCONNINFO: return hci_get_conn_info(hdev, (void __user *) arg); @@ -699,7 +712,8 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, if (test_bit(HCI_UP, &hdev->flags) || test_bit(HCI_INIT, &hdev->flags) || - test_bit(HCI_SETUP, &hdev->dev_flags)) { + test_bit(HCI_SETUP, &hdev->dev_flags) || + test_bit(HCI_CONFIG, &hdev->dev_flags)) { err = -EBUSY; hci_dev_put(hdev); goto done; @@ -969,7 +983,7 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, goto drop; } - if (test_bit(HCI_RAW, &hdev->flags) || (ogf == 0x3f)) { + if (ogf == 0x3f) { skb_queue_tail(&hdev->raw_q, skb); queue_work(hdev->workqueue, &hdev->tx_work); } else { diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 8181ea4bc2f2..6c7ecf116e74 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -154,7 +154,7 @@ static int hidp_input_event(struct input_dev *dev, unsigned int type, (!!test_bit(LED_COMPOSE, dev->led) << 3) | (!!test_bit(LED_SCROLLL, dev->led) << 2) | (!!test_bit(LED_CAPSL, dev->led) << 1) | - (!!test_bit(LED_NUML, dev->led)); + (!!test_bit(LED_NUML, dev->led) << 0); if (session->leds == newleds) return 0; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index a1e5bb7d06e8..46547b920f88 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -40,14 +40,13 @@ #include "smp.h" #include "a2mp.h" #include "amp.h" -#include "6lowpan.h" #define LE_FLOWCTL_MAX_CREDITS 65535 bool disable_ertm; static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN | L2CAP_FEAT_UCD; -static u8 l2cap_fixed_chan[8] = { L2CAP_FC_L2CAP | L2CAP_FC_CONNLESS, }; +static u8 l2cap_fixed_chan[8] = { L2CAP_FC_SIG_BREDR | L2CAP_FC_CONNLESS, }; static LIST_HEAD(chan_list); static DEFINE_RWLOCK(chan_list_lock); @@ -205,6 +204,7 @@ done: write_unlock(&chan_list_lock); return err; } +EXPORT_SYMBOL_GPL(l2cap_add_psm); int l2cap_add_scid(struct l2cap_chan *chan, __u16 scid) { @@ -437,6 +437,7 @@ struct l2cap_chan *l2cap_chan_create(void) return chan; } +EXPORT_SYMBOL_GPL(l2cap_chan_create); static void l2cap_chan_destroy(struct kref *kref) { @@ -464,6 +465,7 @@ void l2cap_chan_put(struct l2cap_chan *c) kref_put(&c->kref, l2cap_chan_destroy); } +EXPORT_SYMBOL_GPL(l2cap_chan_put); void l2cap_chan_set_defaults(struct l2cap_chan *chan) { @@ -471,11 +473,18 @@ void l2cap_chan_set_defaults(struct l2cap_chan *chan) chan->max_tx = L2CAP_DEFAULT_MAX_TX; chan->tx_win = L2CAP_DEFAULT_TX_WINDOW; chan->tx_win_max = L2CAP_DEFAULT_TX_WINDOW; + chan->remote_max_tx = chan->max_tx; + chan->remote_tx_win = chan->tx_win; chan->ack_win = L2CAP_DEFAULT_TX_WINDOW; chan->sec_level = BT_SECURITY_LOW; + chan->flush_to = L2CAP_DEFAULT_FLUSH_TO; + chan->retrans_timeout = L2CAP_DEFAULT_RETRANS_TO; + chan->monitor_timeout = L2CAP_DEFAULT_MONITOR_TO; + chan->conf_state = 0; set_bit(FLAG_FORCE_ACTIVE, &chan->flags); } +EXPORT_SYMBOL_GPL(l2cap_chan_set_defaults); static void l2cap_le_flowctl_init(struct l2cap_chan *chan) { @@ -608,6 +617,7 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err) return; } +EXPORT_SYMBOL_GPL(l2cap_chan_del); void l2cap_conn_update_id_addr(struct hci_conn *hcon) { @@ -711,6 +721,7 @@ void l2cap_chan_close(struct l2cap_chan *chan, int reason) break; } } +EXPORT_SYMBOL(l2cap_chan_close); static inline u8 l2cap_get_auth_type(struct l2cap_chan *chan) { @@ -764,7 +775,7 @@ static inline u8 l2cap_get_auth_type(struct l2cap_chan *chan) } /* Service level security */ -int l2cap_chan_check_security(struct l2cap_chan *chan) +int l2cap_chan_check_security(struct l2cap_chan *chan, bool initiator) { struct l2cap_conn *conn = chan->conn; __u8 auth_type; @@ -774,7 +785,8 @@ int l2cap_chan_check_security(struct l2cap_chan *chan) auth_type = l2cap_get_auth_type(chan); - return hci_conn_security(conn->hcon, chan->sec_level, auth_type); + return hci_conn_security(conn->hcon, chan->sec_level, auth_type, + initiator); } static u8 l2cap_get_ident(struct l2cap_conn *conn) @@ -787,14 +799,14 @@ static u8 l2cap_get_ident(struct l2cap_conn *conn) * 200 - 254 are used by utilities like l2ping, etc. */ - spin_lock(&conn->lock); + mutex_lock(&conn->ident_lock); if (++conn->tx_ident > 128) conn->tx_ident = 1; id = conn->tx_ident; - spin_unlock(&conn->lock); + mutex_unlock(&conn->ident_lock); return id; } @@ -1267,7 +1279,7 @@ static void l2cap_do_start(struct l2cap_chan *chan) if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE)) return; - if (l2cap_chan_check_security(chan) && + if (l2cap_chan_check_security(chan, true) && __l2cap_no_conn_pending(chan)) { l2cap_start_connection(chan); } @@ -1346,7 +1358,7 @@ static void l2cap_conn_start(struct l2cap_conn *conn) } if (chan->state == BT_CONNECT) { - if (!l2cap_chan_check_security(chan) || + if (!l2cap_chan_check_security(chan, true) || !__l2cap_no_conn_pending(chan)) { l2cap_chan_unlock(chan); continue; @@ -1368,7 +1380,7 @@ static void l2cap_conn_start(struct l2cap_conn *conn) rsp.scid = cpu_to_le16(chan->dcid); rsp.dcid = cpu_to_le16(chan->scid); - if (l2cap_chan_check_security(chan)) { + if (l2cap_chan_check_security(chan, false)) { if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) { rsp.result = cpu_to_le16(L2CAP_CR_PEND); rsp.status = cpu_to_le16(L2CAP_CS_AUTHOR_PEND); @@ -1449,13 +1461,12 @@ static struct l2cap_chan *l2cap_global_chan_by_scid(int state, u16 cid, static void l2cap_le_conn_ready(struct l2cap_conn *conn) { struct hci_conn *hcon = conn->hcon; + struct hci_dev *hdev = hcon->hdev; struct l2cap_chan *chan, *pchan; u8 dst_type; BT_DBG(""); - bt_6lowpan_add_conn(conn); - /* Check if we have socket listening on cid */ pchan = l2cap_global_chan_by_scid(BT_LISTEN, L2CAP_CID_ATT, &hcon->src, &hcon->dst); @@ -1469,9 +1480,28 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) dst_type = bdaddr_type(hcon, hcon->dst_type); /* If device is blocked, do not create a channel for it */ - if (hci_blacklist_lookup(hcon->hdev, &hcon->dst, dst_type)) + if (hci_bdaddr_list_lookup(&hdev->blacklist, &hcon->dst, dst_type)) return; + /* For LE slave connections, make sure the connection interval + * is in the range of the minium and maximum interval that has + * been configured for this connection. If not, then trigger + * the connection update procedure. + */ + if (hcon->role == HCI_ROLE_SLAVE && + (hcon->le_conn_interval < hcon->le_conn_min_interval || + hcon->le_conn_interval > hcon->le_conn_max_interval)) { + struct l2cap_conn_param_update_req req; + + req.min = cpu_to_le16(hcon->le_conn_min_interval); + req.max = cpu_to_le16(hcon->le_conn_max_interval); + req.latency = cpu_to_le16(hcon->le_conn_latency); + req.to_multiplier = cpu_to_le16(hcon->le_supv_timeout); + + l2cap_send_cmd(conn, l2cap_get_ident(conn), + L2CAP_CONN_PARAM_UPDATE_REQ, sizeof(req), &req); + } + l2cap_chan_lock(pchan); chan = pchan->ops->new_connection(pchan); @@ -1657,7 +1687,13 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) kfree_skb(conn->rx_skb); skb_queue_purge(&conn->pending_rx); - flush_work(&conn->pending_rx_work); + + /* We can not call flush_work(&conn->pending_rx_work) here since we + * might block if we are running on a worker from the same workqueue + * pending_rx_work is waiting on. + */ + if (work_pending(&conn->pending_rx_work)) + cancel_work_sync(&conn->pending_rx_work); l2cap_unregister_all_users(conn); @@ -2106,7 +2142,8 @@ static inline int l2cap_skbuff_fromiovec(struct l2cap_chan *chan, struct sk_buff **frag; int sent = 0; - if (memcpy_fromiovec(skb_put(skb, count), msg->msg_iov, count)) + if (chan->ops->memcpy_fromiovec(chan, skb_put(skb, count), + msg->msg_iov, count)) return -EFAULT; sent += count; @@ -2119,18 +2156,17 @@ static inline int l2cap_skbuff_fromiovec(struct l2cap_chan *chan, count = min_t(unsigned int, conn->mtu, len); - tmp = chan->ops->alloc_skb(chan, count, + tmp = chan->ops->alloc_skb(chan, 0, count, msg->msg_flags & MSG_DONTWAIT); if (IS_ERR(tmp)) return PTR_ERR(tmp); *frag = tmp; - if (memcpy_fromiovec(skb_put(*frag, count), msg->msg_iov, count)) + if (chan->ops->memcpy_fromiovec(chan, skb_put(*frag, count), + msg->msg_iov, count)) return -EFAULT; - (*frag)->priority = skb->priority; - sent += count; len -= count; @@ -2144,26 +2180,23 @@ static inline int l2cap_skbuff_fromiovec(struct l2cap_chan *chan, } static struct sk_buff *l2cap_create_connless_pdu(struct l2cap_chan *chan, - struct msghdr *msg, size_t len, - u32 priority) + struct msghdr *msg, size_t len) { struct l2cap_conn *conn = chan->conn; struct sk_buff *skb; int err, count, hlen = L2CAP_HDR_SIZE + L2CAP_PSMLEN_SIZE; struct l2cap_hdr *lh; - BT_DBG("chan %p psm 0x%2.2x len %zu priority %u", chan, - __le16_to_cpu(chan->psm), len, priority); + BT_DBG("chan %p psm 0x%2.2x len %zu", chan, + __le16_to_cpu(chan->psm), len); count = min_t(unsigned int, (conn->mtu - hlen), len); - skb = chan->ops->alloc_skb(chan, count + hlen, + skb = chan->ops->alloc_skb(chan, hlen, count, msg->msg_flags & MSG_DONTWAIT); if (IS_ERR(skb)) return skb; - skb->priority = priority; - /* Create L2CAP header */ lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); lh->cid = cpu_to_le16(chan->dcid); @@ -2179,8 +2212,7 @@ static struct sk_buff *l2cap_create_connless_pdu(struct l2cap_chan *chan, } static struct sk_buff *l2cap_create_basic_pdu(struct l2cap_chan *chan, - struct msghdr *msg, size_t len, - u32 priority) + struct msghdr *msg, size_t len) { struct l2cap_conn *conn = chan->conn; struct sk_buff *skb; @@ -2191,13 +2223,11 @@ static struct sk_buff *l2cap_create_basic_pdu(struct l2cap_chan *chan, count = min_t(unsigned int, (conn->mtu - L2CAP_HDR_SIZE), len); - skb = chan->ops->alloc_skb(chan, count + L2CAP_HDR_SIZE, + skb = chan->ops->alloc_skb(chan, L2CAP_HDR_SIZE, count, msg->msg_flags & MSG_DONTWAIT); if (IS_ERR(skb)) return skb; - skb->priority = priority; - /* Create L2CAP header */ lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); lh->cid = cpu_to_le16(chan->dcid); @@ -2235,7 +2265,7 @@ static struct sk_buff *l2cap_create_iframe_pdu(struct l2cap_chan *chan, count = min_t(unsigned int, (conn->mtu - hlen), len); - skb = chan->ops->alloc_skb(chan, count + hlen, + skb = chan->ops->alloc_skb(chan, hlen, count, msg->msg_flags & MSG_DONTWAIT); if (IS_ERR(skb)) return skb; @@ -2356,7 +2386,7 @@ static struct sk_buff *l2cap_create_le_flowctl_pdu(struct l2cap_chan *chan, count = min_t(unsigned int, (conn->mtu - hlen), len); - skb = chan->ops->alloc_skb(chan, count + hlen, + skb = chan->ops->alloc_skb(chan, hlen, count, msg->msg_flags & MSG_DONTWAIT); if (IS_ERR(skb)) return skb; @@ -2418,8 +2448,7 @@ static int l2cap_segment_le_sdu(struct l2cap_chan *chan, return 0; } -int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len, - u32 priority) +int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len) { struct sk_buff *skb; int err; @@ -2430,7 +2459,7 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len, /* Connectionless channel */ if (chan->chan_type == L2CAP_CHAN_CONN_LESS) { - skb = l2cap_create_connless_pdu(chan, msg, len, priority); + skb = l2cap_create_connless_pdu(chan, msg, len); if (IS_ERR(skb)) return PTR_ERR(skb); @@ -2487,7 +2516,7 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len, return -EMSGSIZE; /* Create a basic PDU */ - skb = l2cap_create_basic_pdu(chan, msg, len, priority); + skb = l2cap_create_basic_pdu(chan, msg, len); if (IS_ERR(skb)) return PTR_ERR(skb); @@ -2550,6 +2579,7 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len, return err; } +EXPORT_SYMBOL_GPL(l2cap_chan_send); static void l2cap_send_srej(struct l2cap_chan *chan, u16 txseq) { @@ -3205,6 +3235,9 @@ done: switch (chan->mode) { case L2CAP_MODE_BASIC: + if (disable_ertm) + break; + if (!(chan->conn->feat_mask & L2CAP_FEAT_ERTM) && !(chan->conn->feat_mask & L2CAP_FEAT_STREAMING)) break; @@ -3817,7 +3850,7 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn, chan->ident = cmd->ident; if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE) { - if (l2cap_chan_check_security(chan)) { + if (l2cap_chan_check_security(chan, false)) { if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) { l2cap_state_change(chan, BT_CONNECT2); result = L2CAP_CR_PEND; @@ -5185,27 +5218,6 @@ static inline int l2cap_move_channel_confirm_rsp(struct l2cap_conn *conn, return 0; } -static inline int l2cap_check_conn_param(u16 min, u16 max, u16 latency, - u16 to_multiplier) -{ - u16 max_latency; - - if (min > max || min < 6 || max > 3200) - return -EINVAL; - - if (to_multiplier < 10 || to_multiplier > 3200) - return -EINVAL; - - if (max >= to_multiplier * 8) - return -EINVAL; - - max_latency = (to_multiplier * 8 / max) - 1; - if (latency > 499 || latency > max_latency) - return -EINVAL; - - return 0; -} - static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) @@ -5216,7 +5228,7 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, u16 min, max, latency, to_multiplier; int err; - if (!(hcon->link_mode & HCI_LM_MASTER)) + if (hcon->role != HCI_ROLE_MASTER) return -EINVAL; if (cmd_len != sizeof(struct l2cap_conn_param_update_req)) @@ -5233,7 +5245,7 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, memset(&rsp, 0, sizeof(rsp)); - err = l2cap_check_conn_param(min, max, latency, to_multiplier); + err = hci_check_conn_params(min, max, latency, to_multiplier); if (err) rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED); else @@ -5242,8 +5254,16 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, l2cap_send_cmd(conn, cmd->ident, L2CAP_CONN_PARAM_UPDATE_RSP, sizeof(rsp), &rsp); - if (!err) - hci_le_conn_update(hcon, min, max, latency, to_multiplier); + if (!err) { + u8 store_hint; + + store_hint = hci_le_conn_update(hcon, min, max, latency, + to_multiplier); + mgmt_new_conn_param(hcon->hdev, &hcon->dst, hcon->dst_type, + store_hint, min, max, latency, + to_multiplier); + + } return 0; } @@ -6867,9 +6887,6 @@ static void l2cap_att_channel(struct l2cap_conn *conn, BT_DBG("chan %p, len %d", chan, skb->len); - if (hci_blacklist_lookup(hcon->hdev, &hcon->dst, hcon->dst_type)) - goto drop; - if (chan->imtu < skb->len) goto drop; @@ -6902,6 +6919,16 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb) return; } + /* Since we can't actively block incoming LE connections we must + * at least ensure that we ignore incoming data from them. + */ + if (hcon->type == LE_LINK && + hci_bdaddr_list_lookup(&hcon->hdev->blacklist, &hcon->dst, + bdaddr_type(hcon, hcon->dst_type))) { + kfree_skb(skb); + return; + } + BT_DBG("len %d, cid 0x%4.4x", len, cid); switch (cid) { @@ -6928,10 +6955,6 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb) l2cap_conn_del(conn->hcon, EACCES); break; - case L2CAP_FC_6LOWPAN: - bt_6lowpan_recv(conn, skb); - break; - default: l2cap_data_channel(conn, cid, skb); break; @@ -6962,7 +6985,7 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon) if (!hchan) return NULL; - conn = kzalloc(sizeof(struct l2cap_conn), GFP_KERNEL); + conn = kzalloc(sizeof(*conn), GFP_KERNEL); if (!conn) { hci_chan_del(hchan); return NULL; @@ -6994,7 +7017,7 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon) conn->hs_enabled = test_bit(HCI_HS_ENABLED, &hcon->hdev->dev_flags); - spin_lock_init(&conn->lock); + mutex_init(&conn->ident_lock); mutex_init(&conn->chan_lock); INIT_LIST_HEAD(&conn->chan_l); @@ -7030,7 +7053,6 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, struct l2cap_conn *conn; struct hci_conn *hcon; struct hci_dev *hdev; - __u8 auth_type; int err; BT_DBG("%pMR -> %pMR (type %u) psm 0x%2.2x", &chan->src, dst, @@ -7072,7 +7094,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, break; /* fall through */ default: - err = -ENOTSUPP; + err = -EOPNOTSUPP; goto done; } @@ -7106,9 +7128,9 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, chan->psm = psm; chan->dcid = cid; - auth_type = l2cap_get_auth_type(chan); - if (bdaddr_type_is_le(dst_type)) { + u8 role; + /* Convert from L2CAP channel address type to HCI address type */ if (dst_type == BDADDR_LE_PUBLIC) @@ -7116,9 +7138,15 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, else dst_type = ADDR_LE_DEV_RANDOM; + if (test_bit(HCI_ADVERTISING, &hdev->dev_flags)) + role = HCI_ROLE_SLAVE; + else + role = HCI_ROLE_MASTER; + hcon = hci_connect_le(hdev, dst, dst_type, chan->sec_level, - auth_type); + HCI_LE_CONN_TIMEOUT, role); } else { + u8 auth_type = l2cap_get_auth_type(chan); hcon = hci_connect_acl(hdev, dst, chan->sec_level, auth_type); } @@ -7164,7 +7192,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, if (hcon->state == BT_CONNECTED) { if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED) { __clear_chan_timer(chan); - if (l2cap_chan_check_security(chan)) + if (l2cap_chan_check_security(chan, true)) l2cap_state_change(chan, BT_CONNECTED); } else l2cap_do_start(chan); @@ -7178,6 +7206,7 @@ done: hci_dev_put(hdev); return err; } +EXPORT_SYMBOL_GPL(l2cap_chan_connect); /* ---- L2CAP interface with lower layer (HCI) ---- */ @@ -7240,8 +7269,6 @@ void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason) { BT_DBG("hcon %p reason %d", hcon, reason); - bt_6lowpan_del_conn(hcon->l2cap_data); - l2cap_conn_del(hcon, bt_to_errno(reason)); } @@ -7519,19 +7546,16 @@ int __init l2cap_init(void) l2cap_debugfs = debugfs_create_file("l2cap", 0444, bt_debugfs, NULL, &l2cap_debugfs_fops); - debugfs_create_u16("l2cap_le_max_credits", 0466, bt_debugfs, + debugfs_create_u16("l2cap_le_max_credits", 0644, bt_debugfs, &le_max_credits); - debugfs_create_u16("l2cap_le_default_mps", 0466, bt_debugfs, + debugfs_create_u16("l2cap_le_default_mps", 0644, bt_debugfs, &le_default_mps); - bt_6lowpan_init(); - return 0; } void l2cap_exit(void) { - bt_6lowpan_cleanup(); debugfs_remove(l2cap_debugfs); l2cap_cleanup_sockets(); } diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index ef5e5b04f34f..1884f72083c2 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -279,7 +279,7 @@ static int l2cap_sock_listen(struct socket *sock, int backlog) break; /* fall through */ default: - err = -ENOTSUPP; + err = -EOPNOTSUPP; goto done; } @@ -361,7 +361,8 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, BT_DBG("sock %p, sk %p", sock, sk); if (peer && sk->sk_state != BT_CONNECTED && - sk->sk_state != BT_CONNECT && sk->sk_state != BT_CONNECT2) + sk->sk_state != BT_CONNECT && sk->sk_state != BT_CONNECT2 && + sk->sk_state != BT_CONFIG) return -ENOTCONN; memset(la, 0, sizeof(struct sockaddr_l2)); @@ -787,11 +788,6 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, /*change security for LE channels */ if (chan->scid == L2CAP_CID_ATT) { - if (!conn->hcon->out) { - err = -EINVAL; - break; - } - if (smp_conn_security(conn->hcon, sec.level)) break; sk->sk_state = BT_CONFIG; @@ -801,7 +797,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, } else if ((sk->sk_state == BT_CONNECT2 && test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) || sk->sk_state == BT_CONNECTED) { - if (!l2cap_chan_check_security(chan)) + if (!l2cap_chan_check_security(chan, true)) set_bit(BT_SK_SUSPEND, &bt_sk(sk)->flags); else sk->sk_state_change(sk); @@ -969,7 +965,7 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, return err; l2cap_chan_lock(chan); - err = l2cap_chan_send(chan, msg, len, sk->sk_priority); + err = l2cap_chan_send(chan, msg, len); l2cap_chan_unlock(chan); return err; @@ -1116,7 +1112,8 @@ static int l2cap_sock_shutdown(struct socket *sock, int how) l2cap_chan_close(chan, 0); lock_sock(sk); - if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime) + if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime && + !(current->flags & PF_EXITING)) err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime); } @@ -1180,13 +1177,16 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan) /* Check for backlog size */ if (sk_acceptq_is_full(parent)) { BT_DBG("backlog full %d", parent->sk_ack_backlog); + release_sock(parent); return NULL; } sk = l2cap_sock_alloc(sock_net(parent), NULL, BTPROTO_L2CAP, GFP_ATOMIC); - if (!sk) + if (!sk) { + release_sock(parent); return NULL; + } bt_sock_reclassify_lock(sk, BTPROTO_L2CAP); @@ -1294,6 +1294,7 @@ static void l2cap_sock_state_change_cb(struct l2cap_chan *chan, int state, } static struct sk_buff *l2cap_sock_alloc_skb_cb(struct l2cap_chan *chan, + unsigned long hdr_len, unsigned long len, int nb) { struct sock *sk = chan->data; @@ -1301,17 +1302,26 @@ static struct sk_buff *l2cap_sock_alloc_skb_cb(struct l2cap_chan *chan, int err; l2cap_chan_unlock(chan); - skb = bt_skb_send_alloc(sk, len, nb, &err); + skb = bt_skb_send_alloc(sk, hdr_len + len, nb, &err); l2cap_chan_lock(chan); if (!skb) return ERR_PTR(err); + skb->priority = sk->sk_priority; + bt_cb(skb)->chan = chan; return skb; } +static int l2cap_sock_memcpy_fromiovec_cb(struct l2cap_chan *chan, + unsigned char *kdata, + struct iovec *iov, int len) +{ + return memcpy_fromiovec(kdata, iov, len); +} + static void l2cap_sock_ready_cb(struct l2cap_chan *chan) { struct sock *sk = chan->data; @@ -1377,20 +1387,21 @@ static void l2cap_sock_suspend_cb(struct l2cap_chan *chan) sk->sk_state_change(sk); } -static struct l2cap_ops l2cap_chan_ops = { - .name = "L2CAP Socket Interface", - .new_connection = l2cap_sock_new_connection_cb, - .recv = l2cap_sock_recv_cb, - .close = l2cap_sock_close_cb, - .teardown = l2cap_sock_teardown_cb, - .state_change = l2cap_sock_state_change_cb, - .ready = l2cap_sock_ready_cb, - .defer = l2cap_sock_defer_cb, - .resume = l2cap_sock_resume_cb, - .suspend = l2cap_sock_suspend_cb, - .set_shutdown = l2cap_sock_set_shutdown_cb, - .get_sndtimeo = l2cap_sock_get_sndtimeo_cb, - .alloc_skb = l2cap_sock_alloc_skb_cb, +static const struct l2cap_ops l2cap_chan_ops = { + .name = "L2CAP Socket Interface", + .new_connection = l2cap_sock_new_connection_cb, + .recv = l2cap_sock_recv_cb, + .close = l2cap_sock_close_cb, + .teardown = l2cap_sock_teardown_cb, + .state_change = l2cap_sock_state_change_cb, + .ready = l2cap_sock_ready_cb, + .defer = l2cap_sock_defer_cb, + .resume = l2cap_sock_resume_cb, + .suspend = l2cap_sock_suspend_cb, + .set_shutdown = l2cap_sock_set_shutdown_cb, + .get_sndtimeo = l2cap_sock_get_sndtimeo_cb, + .alloc_skb = l2cap_sock_alloc_skb_cb, + .memcpy_fromiovec = l2cap_sock_memcpy_fromiovec_cb, }; static void l2cap_sock_destruct(struct sock *sk) diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c index b3fbc73516c4..941ad7530eda 100644 --- a/net/bluetooth/lib.c +++ b/net/bluetooth/lib.c @@ -58,6 +58,7 @@ int bt_to_errno(__u16 code) return EIO; case 0x04: + case 0x3c: return EHOSTDOWN; case 0x05: diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index d2d4e0d5aed0..b8554d429d88 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -29,12 +29,13 @@ #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> +#include <net/bluetooth/l2cap.h> #include <net/bluetooth/mgmt.h> #include "smp.h" #define MGMT_VERSION 1 -#define MGMT_REVISION 5 +#define MGMT_REVISION 7 static const u16 mgmt_commands[] = { MGMT_OP_READ_INDEX_LIST, @@ -43,7 +44,7 @@ static const u16 mgmt_commands[] = { MGMT_OP_SET_DISCOVERABLE, MGMT_OP_SET_CONNECTABLE, MGMT_OP_SET_FAST_CONNECTABLE, - MGMT_OP_SET_PAIRABLE, + MGMT_OP_SET_BONDABLE, MGMT_OP_SET_LINK_SECURITY, MGMT_OP_SET_SSP, MGMT_OP_SET_HS, @@ -83,6 +84,15 @@ static const u16 mgmt_commands[] = { MGMT_OP_SET_DEBUG_KEYS, MGMT_OP_SET_PRIVACY, MGMT_OP_LOAD_IRKS, + MGMT_OP_GET_CONN_INFO, + MGMT_OP_GET_CLOCK_INFO, + MGMT_OP_ADD_DEVICE, + MGMT_OP_REMOVE_DEVICE, + MGMT_OP_LOAD_CONN_PARAM, + MGMT_OP_READ_UNCONF_INDEX_LIST, + MGMT_OP_READ_CONFIG_INFO, + MGMT_OP_SET_EXTERNAL_CONFIG, + MGMT_OP_SET_PUBLIC_ADDRESS, }; static const u16 mgmt_events[] = { @@ -109,6 +119,12 @@ static const u16 mgmt_events[] = { MGMT_EV_PASSKEY_NOTIFY, MGMT_EV_NEW_IRK, MGMT_EV_NEW_CSRK, + MGMT_EV_DEVICE_ADDED, + MGMT_EV_DEVICE_REMOVED, + MGMT_EV_NEW_CONN_PARAM, + MGMT_EV_UNCONF_INDEX_ADDED, + MGMT_EV_UNCONF_INDEX_REMOVED, + MGMT_EV_NEW_CONFIG_OPTIONS, }; #define CACHE_TIMEOUT msecs_to_jiffies(2 * 1000) @@ -198,6 +214,36 @@ static u8 mgmt_status(u8 hci_status) return MGMT_STATUS_FAILED; } +static int mgmt_event(u16 event, struct hci_dev *hdev, void *data, u16 data_len, + struct sock *skip_sk) +{ + struct sk_buff *skb; + struct mgmt_hdr *hdr; + + skb = alloc_skb(sizeof(*hdr) + data_len, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + hdr = (void *) skb_put(skb, sizeof(*hdr)); + hdr->opcode = cpu_to_le16(event); + if (hdev) + hdr->index = cpu_to_le16(hdev->id); + else + hdr->index = cpu_to_le16(MGMT_INDEX_NONE); + hdr->len = cpu_to_le16(data_len); + + if (data) + memcpy(skb_put(skb, data_len), data, data_len); + + /* Time stamp */ + __net_timestamp(skb); + + hci_send_to_control(skb, skip_sk); + kfree_skb(skb); + + return 0; +} + static int cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status) { struct sk_buff *skb; @@ -325,7 +371,8 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, count = 0; list_for_each_entry(d, &hci_dev_list, list) { - if (d->dev_type == HCI_BREDR) + if (d->dev_type == HCI_BREDR && + !test_bit(HCI_UNCONFIGURED, &d->dev_flags)) count++; } @@ -338,13 +385,19 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, count = 0; list_for_each_entry(d, &hci_dev_list, list) { - if (test_bit(HCI_SETUP, &d->dev_flags)) + if (test_bit(HCI_SETUP, &d->dev_flags) || + test_bit(HCI_CONFIG, &d->dev_flags) || + test_bit(HCI_USER_CHANNEL, &d->dev_flags)) continue; - if (test_bit(HCI_USER_CHANNEL, &d->dev_flags)) + /* Devices marked as raw-only are neither configured + * nor unconfigured controllers. + */ + if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks)) continue; - if (d->dev_type == HCI_BREDR) { + if (d->dev_type == HCI_BREDR && + !test_bit(HCI_UNCONFIGURED, &d->dev_flags)) { rp->index[count++] = cpu_to_le16(d->id); BT_DBG("Added hci%u", d->id); } @@ -363,19 +416,151 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, return err; } +static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev, + void *data, u16 data_len) +{ + struct mgmt_rp_read_unconf_index_list *rp; + struct hci_dev *d; + size_t rp_len; + u16 count; + int err; + + BT_DBG("sock %p", sk); + + read_lock(&hci_dev_list_lock); + + count = 0; + list_for_each_entry(d, &hci_dev_list, list) { + if (d->dev_type == HCI_BREDR && + test_bit(HCI_UNCONFIGURED, &d->dev_flags)) + count++; + } + + rp_len = sizeof(*rp) + (2 * count); + rp = kmalloc(rp_len, GFP_ATOMIC); + if (!rp) { + read_unlock(&hci_dev_list_lock); + return -ENOMEM; + } + + count = 0; + list_for_each_entry(d, &hci_dev_list, list) { + if (test_bit(HCI_SETUP, &d->dev_flags) || + test_bit(HCI_CONFIG, &d->dev_flags) || + test_bit(HCI_USER_CHANNEL, &d->dev_flags)) + continue; + + /* Devices marked as raw-only are neither configured + * nor unconfigured controllers. + */ + if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks)) + continue; + + if (d->dev_type == HCI_BREDR && + test_bit(HCI_UNCONFIGURED, &d->dev_flags)) { + rp->index[count++] = cpu_to_le16(d->id); + BT_DBG("Added hci%u", d->id); + } + } + + rp->num_controllers = cpu_to_le16(count); + rp_len = sizeof(*rp) + (2 * count); + + read_unlock(&hci_dev_list_lock); + + err = cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_UNCONF_INDEX_LIST, + 0, rp, rp_len); + + kfree(rp); + + return err; +} + +static bool is_configured(struct hci_dev *hdev) +{ + if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) && + !test_bit(HCI_EXT_CONFIGURED, &hdev->dev_flags)) + return false; + + if (test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) && + !bacmp(&hdev->public_addr, BDADDR_ANY)) + return false; + + return true; +} + +static __le32 get_missing_options(struct hci_dev *hdev) +{ + u32 options = 0; + + if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) && + !test_bit(HCI_EXT_CONFIGURED, &hdev->dev_flags)) + options |= MGMT_OPTION_EXTERNAL_CONFIG; + + if (test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) && + !bacmp(&hdev->public_addr, BDADDR_ANY)) + options |= MGMT_OPTION_PUBLIC_ADDRESS; + + return cpu_to_le32(options); +} + +static int new_options(struct hci_dev *hdev, struct sock *skip) +{ + __le32 options = get_missing_options(hdev); + + return mgmt_event(MGMT_EV_NEW_CONFIG_OPTIONS, hdev, &options, + sizeof(options), skip); +} + +static int send_options_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev) +{ + __le32 options = get_missing_options(hdev); + + return cmd_complete(sk, hdev->id, opcode, 0, &options, + sizeof(options)); +} + +static int read_config_info(struct sock *sk, struct hci_dev *hdev, + void *data, u16 data_len) +{ + struct mgmt_rp_read_config_info rp; + u32 options = 0; + + BT_DBG("sock %p %s", sk, hdev->name); + + hci_dev_lock(hdev); + + memset(&rp, 0, sizeof(rp)); + rp.manufacturer = cpu_to_le16(hdev->manufacturer); + + if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks)) + options |= MGMT_OPTION_EXTERNAL_CONFIG; + + if (hdev->set_bdaddr) + options |= MGMT_OPTION_PUBLIC_ADDRESS; + + rp.supported_options = cpu_to_le32(options); + rp.missing_options = get_missing_options(hdev); + + hci_dev_unlock(hdev); + + return cmd_complete(sk, hdev->id, MGMT_OP_READ_CONFIG_INFO, 0, &rp, + sizeof(rp)); +} + static u32 get_supported_settings(struct hci_dev *hdev) { u32 settings = 0; settings |= MGMT_SETTING_POWERED; - settings |= MGMT_SETTING_PAIRABLE; + settings |= MGMT_SETTING_BONDABLE; settings |= MGMT_SETTING_DEBUG_KEYS; + settings |= MGMT_SETTING_CONNECTABLE; + settings |= MGMT_SETTING_DISCOVERABLE; if (lmp_bredr_capable(hdev)) { - settings |= MGMT_SETTING_CONNECTABLE; if (hdev->hci_ver >= BLUETOOTH_VER_1_2) settings |= MGMT_SETTING_FAST_CONNECTABLE; - settings |= MGMT_SETTING_DISCOVERABLE; settings |= MGMT_SETTING_BREDR; settings |= MGMT_SETTING_LINK_SECURITY; @@ -385,7 +570,7 @@ static u32 get_supported_settings(struct hci_dev *hdev) } if (lmp_sc_capable(hdev) || - test_bit(HCI_FORCE_SC, &hdev->dev_flags)) + test_bit(HCI_FORCE_SC, &hdev->dbg_flags)) settings |= MGMT_SETTING_SECURE_CONN; } @@ -395,6 +580,10 @@ static u32 get_supported_settings(struct hci_dev *hdev) settings |= MGMT_SETTING_PRIVACY; } + if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) || + hdev->set_bdaddr) + settings |= MGMT_SETTING_CONFIGURATION; + return settings; } @@ -414,8 +603,8 @@ static u32 get_current_settings(struct hci_dev *hdev) if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags)) settings |= MGMT_SETTING_DISCOVERABLE; - if (test_bit(HCI_PAIRABLE, &hdev->dev_flags)) - settings |= MGMT_SETTING_PAIRABLE; + if (test_bit(HCI_BONDABLE, &hdev->dev_flags)) + settings |= MGMT_SETTING_BONDABLE; if (test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) settings |= MGMT_SETTING_BREDR; @@ -438,7 +627,7 @@ static u32 get_current_settings(struct hci_dev *hdev) if (test_bit(HCI_SC_ENABLED, &hdev->dev_flags)) settings |= MGMT_SETTING_SECURE_CONN; - if (test_bit(HCI_DEBUG_KEYS, &hdev->dev_flags)) + if (test_bit(HCI_KEEP_DEBUG_KEYS, &hdev->dev_flags)) settings |= MGMT_SETTING_DEBUG_KEYS; if (test_bit(HCI_PRIVACY, &hdev->dev_flags)) @@ -569,6 +758,22 @@ static struct pending_cmd *mgmt_pending_find(u16 opcode, struct hci_dev *hdev) return NULL; } +static struct pending_cmd *mgmt_pending_find_data(u16 opcode, + struct hci_dev *hdev, + const void *data) +{ + struct pending_cmd *cmd; + + list_for_each_entry(cmd, &hdev->mgmt_pending, list) { + if (cmd->user_data != data) + continue; + if (cmd->opcode == opcode) + return cmd; + } + + return NULL; +} + static u8 create_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) { u8 ad_len = 0; @@ -701,6 +906,16 @@ static void update_adv_data(struct hci_request *req) hci_req_add(req, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp); } +int mgmt_update_adv_data(struct hci_dev *hdev) +{ + struct hci_request req; + + hci_req_init(&req, hdev); + update_adv_data(&req); + + return hci_req_run(&req, NULL); +} + static void create_eir(struct hci_dev *hdev, u8 *data) { u8 *ptr = data; @@ -834,6 +1049,13 @@ static bool get_connectable(struct hci_dev *hdev) return test_bit(HCI_CONNECTABLE, &hdev->dev_flags); } +static void disable_advertising(struct hci_request *req) +{ + u8 enable = 0x00; + + hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); +} + static void enable_advertising(struct hci_request *req) { struct hci_dev *hdev = req->hdev; @@ -841,12 +1063,18 @@ static void enable_advertising(struct hci_request *req) u8 own_addr_type, enable = 0x01; bool connectable; - /* Clear the HCI_ADVERTISING bit temporarily so that the + if (hci_conn_num(hdev, LE_LINK) > 0) + return; + + if (test_bit(HCI_LE_ADV, &hdev->dev_flags)) + disable_advertising(req); + + /* Clear the HCI_LE_ADV bit temporarily so that the * hci_update_random_address knows that it's safe to go ahead * and write a new random address. The flag will be set back on * as soon as the SET_ADV_ENABLE HCI command completes. */ - clear_bit(HCI_ADVERTISING, &hdev->dev_flags); + clear_bit(HCI_LE_ADV, &hdev->dev_flags); connectable = get_connectable(hdev); @@ -858,8 +1086,8 @@ static void enable_advertising(struct hci_request *req) return; memset(&cp, 0, sizeof(cp)); - cp.min_interval = cpu_to_le16(0x0800); - cp.max_interval = cpu_to_le16(0x0800); + cp.min_interval = cpu_to_le16(hdev->le_adv_min_interval); + cp.max_interval = cpu_to_le16(hdev->le_adv_max_interval); cp.type = connectable ? LE_ADV_IND : LE_ADV_NONCONN_IND; cp.own_address_type = own_addr_type; cp.channel_map = hdev->le_adv_channel_map; @@ -869,13 +1097,6 @@ static void enable_advertising(struct hci_request *req) hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); } -static void disable_advertising(struct hci_request *req) -{ - u8 enable = 0x00; - - hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); -} - static void service_cache_off(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, @@ -907,19 +1128,14 @@ static void rpa_expired(struct work_struct *work) set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags); - if (!test_bit(HCI_ADVERTISING, &hdev->dev_flags) || - hci_conn_num(hdev, LE_LINK) > 0) + if (!test_bit(HCI_ADVERTISING, &hdev->dev_flags)) return; /* The generation of a new RPA and programming it into the * controller happens in the enable_advertising() function. */ - hci_req_init(&req, hdev); - - disable_advertising(&req); enable_advertising(&req); - hci_req_run(&req, NULL); } @@ -936,7 +1152,7 @@ static void mgmt_init_hdev(struct sock *sk, struct hci_dev *hdev) * for mgmt we require user-space to explicitly enable * it */ - clear_bit(HCI_PAIRABLE, &hdev->dev_flags); + clear_bit(HCI_BONDABLE, &hdev->dev_flags); } static int read_controller_info(struct sock *sk, struct hci_dev *hdev, @@ -982,7 +1198,7 @@ static struct pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode, { struct pending_cmd *cmd; - cmd = kmalloc(sizeof(*cmd), GFP_KERNEL); + cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); if (!cmd) return NULL; @@ -1045,10 +1261,54 @@ static void clean_up_hci_complete(struct hci_dev *hdev, u8 status) } } +static bool hci_stop_discovery(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_remote_name_req_cancel cp; + struct inquiry_entry *e; + + switch (hdev->discovery.state) { + case DISCOVERY_FINDING: + if (test_bit(HCI_INQUIRY, &hdev->flags)) { + hci_req_add(req, HCI_OP_INQUIRY_CANCEL, 0, NULL); + } else { + cancel_delayed_work(&hdev->le_scan_disable); + hci_req_add_le_scan_disable(req); + } + + return true; + + case DISCOVERY_RESOLVING: + e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY, + NAME_PENDING); + if (!e) + break; + + bacpy(&cp.bdaddr, &e->data.bdaddr); + hci_req_add(req, HCI_OP_REMOTE_NAME_REQ_CANCEL, sizeof(cp), + &cp); + + return true; + + default: + /* Passive scanning */ + if (test_bit(HCI_LE_SCAN, &hdev->dev_flags)) { + hci_req_add_le_scan_disable(req); + return true; + } + + break; + } + + return false; +} + static int clean_up_hci_state(struct hci_dev *hdev) { struct hci_request req; struct hci_conn *conn; + bool discov_stopped; + int err; hci_req_init(&req, hdev); @@ -1058,12 +1318,10 @@ static int clean_up_hci_state(struct hci_dev *hdev) hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); } - if (test_bit(HCI_ADVERTISING, &hdev->dev_flags)) + if (test_bit(HCI_LE_ADV, &hdev->dev_flags)) disable_advertising(&req); - if (test_bit(HCI_LE_SCAN, &hdev->dev_flags)) { - hci_req_add_le_scan_disable(&req); - } + discov_stopped = hci_stop_discovery(&req); list_for_each_entry(conn, &hdev->conn_hash.list, list) { struct hci_cp_disconnect dc; @@ -1097,7 +1355,11 @@ static int clean_up_hci_state(struct hci_dev *hdev) } } - return hci_req_run(&req, clean_up_hci_complete); + err = hci_req_run(&req, clean_up_hci_complete); + if (!err && discov_stopped) + hci_discovery_set_state(hdev, DISCOVERY_STOPPING); + + return err; } static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data, @@ -1166,36 +1428,6 @@ failed: return err; } -static int mgmt_event(u16 event, struct hci_dev *hdev, void *data, u16 data_len, - struct sock *skip_sk) -{ - struct sk_buff *skb; - struct mgmt_hdr *hdr; - - skb = alloc_skb(sizeof(*hdr) + data_len, GFP_KERNEL); - if (!skb) - return -ENOMEM; - - hdr = (void *) skb_put(skb, sizeof(*hdr)); - hdr->opcode = cpu_to_le16(event); - if (hdev) - hdr->index = cpu_to_le16(hdev->id); - else - hdr->index = cpu_to_le16(MGMT_INDEX_NONE); - hdr->len = cpu_to_le16(data_len); - - if (data) - memcpy(skb_put(skb, data_len), data, data_len); - - /* Time stamp */ - __net_timestamp(skb); - - hci_send_to_control(skb, skip_sk); - kfree_skb(skb); - - return 0; -} - static int new_settings(struct hci_dev *hdev, struct sock *skip) { __le32 ev; @@ -1205,6 +1437,11 @@ static int new_settings(struct hci_dev *hdev, struct sock *skip) return mgmt_event(MGMT_EV_NEW_SETTINGS, hdev, &ev, sizeof(ev), skip); } +int mgmt_new_settings(struct hci_dev *hdev) +{ + return new_settings(hdev, NULL); +} + struct cmd_lookup { struct sock *sk; struct hci_dev *hdev; @@ -1516,7 +1753,7 @@ static void set_connectable_complete(struct hci_dev *hdev, u8 status) { struct pending_cmd *cmd; struct mgmt_mode *cp; - bool changed; + bool conn_changed, discov_changed; BT_DBG("status 0x%02x", status); @@ -1533,15 +1770,25 @@ static void set_connectable_complete(struct hci_dev *hdev, u8 status) } cp = cmd->param; - if (cp->val) - changed = !test_and_set_bit(HCI_CONNECTABLE, &hdev->dev_flags); - else - changed = test_and_clear_bit(HCI_CONNECTABLE, &hdev->dev_flags); + if (cp->val) { + conn_changed = !test_and_set_bit(HCI_CONNECTABLE, + &hdev->dev_flags); + discov_changed = false; + } else { + conn_changed = test_and_clear_bit(HCI_CONNECTABLE, + &hdev->dev_flags); + discov_changed = test_and_clear_bit(HCI_DISCOVERABLE, + &hdev->dev_flags); + } send_settings_rsp(cmd->sk, MGMT_OP_SET_CONNECTABLE, hdev); - if (changed) + if (conn_changed || discov_changed) { new_settings(hdev, cmd->sk); + if (discov_changed) + mgmt_update_adv_data(hdev); + hci_update_background_scan(hdev); + } remove_cmd: mgmt_pending_remove(cmd); @@ -1570,8 +1817,10 @@ static int set_connectable_update_settings(struct hci_dev *hdev, if (err < 0) return err; - if (changed) + if (changed) { + hci_update_background_scan(hdev); return new_settings(hdev, sk); + } return 0; } @@ -1632,7 +1881,18 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, if (cp->val) { scan = SCAN_PAGE; } else { - scan = 0; + /* If we don't have any whitelist entries just + * disable all scanning. If there are entries + * and we had both page and inquiry scanning + * enabled then fall back to only page scanning. + * Otherwise no changes are needed. + */ + if (list_empty(&hdev->whitelist)) + scan = SCAN_DISABLED; + else if (test_bit(HCI_ISCAN, &hdev->flags)) + scan = SCAN_PAGE; + else + goto no_scan_update; if (test_bit(HCI_ISCAN, &hdev->flags) && hdev->discov_timeout > 0) @@ -1642,6 +1902,7 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); } +no_scan_update: /* If we're going from non-connectable to connectable or * vice-versa when fast connectable is enabled ensure that fast * connectable gets disabled. write_fast_connectable won't do @@ -1651,11 +1912,9 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, if (cp->val || test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags)) write_fast_connectable(&req, false); - if (test_bit(HCI_ADVERTISING, &hdev->dev_flags) && - hci_conn_num(hdev, LE_LINK) == 0) { - disable_advertising(&req); + /* Update the advertising parameters if necessary */ + if (test_bit(HCI_ADVERTISING, &hdev->dev_flags)) enable_advertising(&req); - } err = hci_req_run(&req, set_connectable_complete); if (err < 0) { @@ -1671,7 +1930,7 @@ failed: return err; } -static int set_pairable(struct sock *sk, struct hci_dev *hdev, void *data, +static int set_bondable(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; @@ -1681,17 +1940,17 @@ static int set_pairable(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("request for %s", hdev->name); if (cp->val != 0x00 && cp->val != 0x01) - return cmd_status(sk, hdev->id, MGMT_OP_SET_PAIRABLE, + return cmd_status(sk, hdev->id, MGMT_OP_SET_BONDABLE, MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); if (cp->val) - changed = !test_and_set_bit(HCI_PAIRABLE, &hdev->dev_flags); + changed = !test_and_set_bit(HCI_BONDABLE, &hdev->dev_flags); else - changed = test_and_clear_bit(HCI_PAIRABLE, &hdev->dev_flags); + changed = test_and_clear_bit(HCI_BONDABLE, &hdev->dev_flags); - err = send_settings_rsp(sk, MGMT_OP_SET_PAIRABLE, hdev); + err = send_settings_rsp(sk, MGMT_OP_SET_BONDABLE, hdev); if (err < 0) goto unlock; @@ -1840,6 +2099,10 @@ static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) goto failed; } + if (!cp->val && test_bit(HCI_USE_DEBUG_KEYS, &hdev->dev_flags)) + hci_send_cmd(hdev, HCI_OP_WRITE_SSP_DEBUG_MODE, + sizeof(cp->val), &cp->val); + err = hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE, 1, &cp->val); if (err < 0) { mgmt_pending_remove(cmd); @@ -1936,6 +2199,8 @@ static void le_enable_complete(struct hci_dev *hdev, u8 status) update_scan_rsp_data(&req); hci_req_run(&req, NULL); + hci_update_background_scan(hdev); + hci_dev_unlock(hdev); } } @@ -2011,9 +2276,9 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) if (val) { hci_cp.le = val; - hci_cp.simul = lmp_le_br_capable(hdev); + hci_cp.simul = 0x00; } else { - if (test_bit(HCI_ADVERTISING, &hdev->dev_flags)) + if (test_bit(HCI_LE_ADV, &hdev->dev_flags)) disable_advertising(&req); } @@ -2336,6 +2601,8 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_load_link_keys *cp = data; + const u16 max_key_count = ((U16_MAX - sizeof(*cp)) / + sizeof(struct mgmt_link_key_info)); u16 key_count, expected_len; bool changed; int i; @@ -2347,6 +2614,12 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, MGMT_STATUS_NOT_SUPPORTED); key_count = __le16_to_cpu(cp->key_count); + if (key_count > max_key_count) { + BT_ERR("load_link_keys: too big key_count value %u", + key_count); + return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, + MGMT_STATUS_INVALID_PARAMS); + } expected_len = sizeof(*cp) + key_count * sizeof(struct mgmt_link_key_info); @@ -2377,9 +2650,11 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, hci_link_keys_clear(hdev); if (cp->debug_keys) - changed = !test_and_set_bit(HCI_DEBUG_KEYS, &hdev->dev_flags); + changed = !test_and_set_bit(HCI_KEEP_DEBUG_KEYS, + &hdev->dev_flags); else - changed = test_and_clear_bit(HCI_DEBUG_KEYS, &hdev->dev_flags); + changed = test_and_clear_bit(HCI_KEEP_DEBUG_KEYS, + &hdev->dev_flags); if (changed) new_settings(hdev, NULL); @@ -2387,8 +2662,14 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, for (i = 0; i < key_count; i++) { struct mgmt_link_key_info *key = &cp->keys[i]; - hci_add_link_key(hdev, NULL, 0, &key->addr.bdaddr, key->val, - key->type, key->pin_len); + /* Always ignore debug keys and require a new pairing if + * the user wants to use them. + */ + if (key->type == HCI_LK_DEBUG_COMBINATION) + continue; + + hci_add_link_key(hdev, NULL, &key->addr.bdaddr, key->val, + key->type, key->pin_len, NULL); } cmd_complete(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, 0, NULL, 0); @@ -2729,6 +3010,10 @@ static int set_io_capability(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG(""); + if (cp->io_capability > SMP_IO_KEYBOARD_DISPLAY) + return cmd_complete(sk, hdev->id, MGMT_OP_SET_IO_CAPABILITY, + MGMT_STATUS_INVALID_PARAMS, NULL, 0); + hci_dev_lock(hdev); hdev->io_capability = cp->io_capability; @@ -2841,6 +3126,11 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, MGMT_STATUS_INVALID_PARAMS, &rp, sizeof(rp)); + if (cp->io_cap > SMP_IO_KEYBOARD_DISPLAY) + return cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &rp, sizeof(rp)); + hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { @@ -2850,10 +3140,7 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, } sec_level = BT_SECURITY_MEDIUM; - if (cp->io_cap == 0x03) - auth_type = HCI_AT_DEDICATED_BONDING; - else - auth_type = HCI_AT_DEDICATED_BONDING_MITM; + auth_type = HCI_AT_DEDICATED_BONDING; if (cp->addr.type == BDADDR_BREDR) { conn = hci_connect_acl(hdev, &cp->addr.bdaddr, sec_level, @@ -2868,8 +3155,20 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, else addr_type = ADDR_LE_DEV_RANDOM; + /* When pairing a new device, it is expected to remember + * this device for future connections. Adding the connection + * parameter information ahead of time allows tracking + * of the slave preferred values and will speed up any + * further connection establishment. + * + * If connection parameters already exist, then they + * will be kept and this function does nothing. + */ + hci_conn_params_add(hdev, &cp->addr.bdaddr, addr_type); + conn = hci_connect_le(hdev, &cp->addr.bdaddr, addr_type, - sec_level, auth_type); + sec_level, HCI_LE_CONN_TIMEOUT, + HCI_ROLE_MASTER); } if (IS_ERR(conn)) { @@ -2914,8 +3213,8 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, conn->io_capability = cp->io_cap; cmd->user_data = conn; - if (conn->state == BT_CONNECTED && - hci_conn_security(conn, sec_level, auth_type)) + if ((conn->state == BT_CONNECTED || conn->state == BT_CONFIG) && + hci_conn_security(conn, sec_level, auth_type, true)) pairing_complete(cmd, 0); err = 0; @@ -2997,9 +3296,7 @@ static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev, } if (addr->type == BDADDR_LE_PUBLIC || addr->type == BDADDR_LE_RANDOM) { - /* Continue with pairing via SMP */ err = smp_user_confirm_reply(conn, mgmt_op, passkey); - if (!err) err = cmd_complete(sk, hdev->id, mgmt_op, MGMT_STATUS_SUCCESS, addr, @@ -3351,6 +3648,8 @@ static int mgmt_start_discovery_failed(struct hci_dev *hdev, u8 status) static void start_discovery_complete(struct hci_dev *hdev, u8 status) { + unsigned long timeout = 0; + BT_DBG("status %d", status); if (status) { @@ -3366,13 +3665,11 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status) switch (hdev->discovery.type) { case DISCOV_TYPE_LE: - queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, - DISCOV_LE_TIMEOUT); + timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT); break; case DISCOV_TYPE_INTERLEAVED: - queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, - DISCOV_INTERLEAVED_TIMEOUT); + timeout = msecs_to_jiffies(hdev->discov_interleaved_timeout); break; case DISCOV_TYPE_BREDR: @@ -3381,6 +3678,11 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status) default: BT_ERR("Invalid discovery type %d", hdev->discovery.type); } + + if (!timeout) + return; + + queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, timeout); } static int start_discovery(struct sock *sk, struct hci_dev *hdev, @@ -3472,11 +3774,21 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, goto failed; } - if (test_bit(HCI_ADVERTISING, &hdev->dev_flags)) { - err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, - MGMT_STATUS_REJECTED); - mgmt_pending_remove(cmd); - goto failed; + if (test_bit(HCI_LE_ADV, &hdev->dev_flags)) { + /* Don't let discovery abort an outgoing + * connection attempt that's using directed + * advertising. + */ + if (hci_conn_hash_lookup_state(hdev, LE_LINK, + BT_CONNECT)) { + err = cmd_status(sk, hdev->id, + MGMT_OP_START_DISCOVERY, + MGMT_STATUS_REJECTED); + mgmt_pending_remove(cmd); + goto failed; + } + + disable_advertising(&req); } /* If controller is scanning, it means the background scanning @@ -3570,8 +3882,6 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, { struct mgmt_cp_stop_discovery *mgmt_cp = data; struct pending_cmd *cmd; - struct hci_cp_remote_name_req_cancel cp; - struct inquiry_entry *e; struct hci_request req; int err; @@ -3601,52 +3911,22 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, hci_req_init(&req, hdev); - switch (hdev->discovery.state) { - case DISCOVERY_FINDING: - if (test_bit(HCI_INQUIRY, &hdev->flags)) { - hci_req_add(&req, HCI_OP_INQUIRY_CANCEL, 0, NULL); - } else { - cancel_delayed_work(&hdev->le_scan_disable); - - hci_req_add_le_scan_disable(&req); - } - - break; + hci_stop_discovery(&req); - case DISCOVERY_RESOLVING: - e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY, - NAME_PENDING); - if (!e) { - mgmt_pending_remove(cmd); - err = cmd_complete(sk, hdev->id, - MGMT_OP_STOP_DISCOVERY, 0, - &mgmt_cp->type, - sizeof(mgmt_cp->type)); - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - goto unlock; - } - - bacpy(&cp.bdaddr, &e->data.bdaddr); - hci_req_add(&req, HCI_OP_REMOTE_NAME_REQ_CANCEL, sizeof(cp), - &cp); - - break; - - default: - BT_DBG("unknown discovery state %u", hdev->discovery.state); - - mgmt_pending_remove(cmd); - err = cmd_complete(sk, hdev->id, MGMT_OP_STOP_DISCOVERY, - MGMT_STATUS_FAILED, &mgmt_cp->type, - sizeof(mgmt_cp->type)); + err = hci_req_run(&req, stop_discovery_complete); + if (!err) { + hci_discovery_set_state(hdev, DISCOVERY_STOPPING); goto unlock; } - err = hci_req_run(&req, stop_discovery_complete); - if (err < 0) - mgmt_pending_remove(cmd); - else - hci_discovery_set_state(hdev, DISCOVERY_STOPPING); + mgmt_pending_remove(cmd); + + /* If no HCI commands were sent we're done */ + if (err == -ENODATA) { + err = cmd_complete(sk, hdev->id, MGMT_OP_STOP_DISCOVERY, 0, + &mgmt_cp->type, sizeof(mgmt_cp->type)); + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + } unlock: hci_dev_unlock(hdev); @@ -3711,12 +3991,18 @@ static int block_device(struct sock *sk, struct hci_dev *hdev, void *data, hci_dev_lock(hdev); - err = hci_blacklist_add(hdev, &cp->addr.bdaddr, cp->addr.type); - if (err < 0) + err = hci_bdaddr_list_add(&hdev->blacklist, &cp->addr.bdaddr, + cp->addr.type); + if (err < 0) { status = MGMT_STATUS_FAILED; - else - status = MGMT_STATUS_SUCCESS; + goto done; + } + mgmt_event(MGMT_EV_DEVICE_BLOCKED, hdev, &cp->addr, sizeof(cp->addr), + sk); + status = MGMT_STATUS_SUCCESS; + +done: err = cmd_complete(sk, hdev->id, MGMT_OP_BLOCK_DEVICE, status, &cp->addr, sizeof(cp->addr)); @@ -3741,12 +4027,18 @@ static int unblock_device(struct sock *sk, struct hci_dev *hdev, void *data, hci_dev_lock(hdev); - err = hci_blacklist_del(hdev, &cp->addr.bdaddr, cp->addr.type); - if (err < 0) + err = hci_bdaddr_list_del(&hdev->blacklist, &cp->addr.bdaddr, + cp->addr.type); + if (err < 0) { status = MGMT_STATUS_INVALID_PARAMS; - else - status = MGMT_STATUS_SUCCESS; + goto done; + } + + mgmt_event(MGMT_EV_DEVICE_UNBLOCKED, hdev, &cp->addr, sizeof(cp->addr), + sk); + status = MGMT_STATUS_SUCCESS; +done: err = cmd_complete(sk, hdev->id, MGMT_OP_UNBLOCK_DEVICE, status, &cp->addr, sizeof(cp->addr)); @@ -3801,6 +4093,11 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status) return; } + if (test_bit(HCI_LE_ADV, &hdev->dev_flags)) + set_bit(HCI_ADVERTISING, &hdev->dev_flags); + else + clear_bit(HCI_ADVERTISING, &hdev->dev_flags); + mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, settings_rsp, &match); @@ -3841,7 +4138,9 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data, * necessary). */ if (!hdev_is_powered(hdev) || val == enabled || - hci_conn_num(hdev, LE_LINK) > 0) { + hci_conn_num(hdev, LE_LINK) > 0 || + (test_bit(HCI_LE_SCAN, &hdev->dev_flags) && + hdev->le_scan_type == LE_SCAN_ACTIVE)) { bool changed = false; if (val != test_bit(HCI_ADVERTISING, &hdev->dev_flags)) { @@ -4093,7 +4392,8 @@ static void set_bredr_scan(struct hci_request *req) */ write_fast_connectable(req, false); - if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags)) + if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags) || + !list_empty(&hdev->whitelist)) scan |= SCAN_PAGE; if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags)) scan |= SCAN_INQUIRY; @@ -4207,7 +4507,8 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) hci_req_init(&req, hdev); - if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags)) + if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags) || + !list_empty(&hdev->whitelist)) set_bredr_scan(&req); /* Since only the advertising data flags will change, there @@ -4240,7 +4541,7 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev, status); if (!lmp_sc_capable(hdev) && - !test_bit(HCI_FORCE_SC, &hdev->dev_flags)) + !test_bit(HCI_FORCE_SC, &hdev->dbg_flags)) return cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, MGMT_STATUS_NOT_SUPPORTED); @@ -4316,21 +4617,37 @@ static int set_debug_keys(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; - bool changed; + bool changed, use_changed; int err; BT_DBG("request for %s", hdev->name); - if (cp->val != 0x00 && cp->val != 0x01) + if (cp->val != 0x00 && cp->val != 0x01 && cp->val != 0x02) return cmd_status(sk, hdev->id, MGMT_OP_SET_DEBUG_KEYS, MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); if (cp->val) - changed = !test_and_set_bit(HCI_DEBUG_KEYS, &hdev->dev_flags); + changed = !test_and_set_bit(HCI_KEEP_DEBUG_KEYS, + &hdev->dev_flags); + else + changed = test_and_clear_bit(HCI_KEEP_DEBUG_KEYS, + &hdev->dev_flags); + + if (cp->val == 0x02) + use_changed = !test_and_set_bit(HCI_USE_DEBUG_KEYS, + &hdev->dev_flags); else - changed = test_and_clear_bit(HCI_DEBUG_KEYS, &hdev->dev_flags); + use_changed = test_and_clear_bit(HCI_USE_DEBUG_KEYS, + &hdev->dev_flags); + + if (hdev_is_powered(hdev) && use_changed && + test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) { + u8 mode = (cp->val == 0x02) ? 0x01 : 0x00; + hci_send_cmd(hdev, HCI_OP_WRITE_SSP_DEBUG_MODE, + sizeof(mode), &mode); + } err = send_settings_rsp(sk, MGMT_OP_SET_DEBUG_KEYS, hdev); if (err < 0) @@ -4414,6 +4731,8 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data, u16 len) { struct mgmt_cp_load_irks *cp = cp_data; + const u16 max_irk_count = ((U16_MAX - sizeof(*cp)) / + sizeof(struct mgmt_irk_info)); u16 irk_count, expected_len; int i, err; @@ -4424,6 +4743,11 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data, MGMT_STATUS_NOT_SUPPORTED); irk_count = __le16_to_cpu(cp->irk_count); + if (irk_count > max_irk_count) { + BT_ERR("load_irks: too big irk_count value %u", irk_count); + return cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS, + MGMT_STATUS_INVALID_PARAMS); + } expected_len = sizeof(*cp) + irk_count * sizeof(struct mgmt_irk_info); if (expected_len != len) { @@ -4493,6 +4817,8 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, void *cp_data, u16 len) { struct mgmt_cp_load_long_term_keys *cp = cp_data; + const u16 max_key_count = ((U16_MAX - sizeof(*cp)) / + sizeof(struct mgmt_ltk_info)); u16 key_count, expected_len; int i, err; @@ -4503,6 +4829,11 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, MGMT_STATUS_NOT_SUPPORTED); key_count = __le16_to_cpu(cp->key_count); + if (key_count > max_key_count) { + BT_ERR("load_ltks: too big key_count value %u", key_count); + return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, + MGMT_STATUS_INVALID_PARAMS); + } expected_len = sizeof(*cp) + key_count * sizeof(struct mgmt_ltk_info); @@ -4530,7 +4861,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, for (i = 0; i < key_count; i++) { struct mgmt_ltk_info *key = &cp->keys[i]; - u8 type, addr_type; + u8 type, addr_type, authenticated; if (key->addr.type == BDADDR_LE_PUBLIC) addr_type = ADDR_LE_DEV_PUBLIC; @@ -4538,12 +4869,23 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, addr_type = ADDR_LE_DEV_RANDOM; if (key->master) - type = HCI_SMP_LTK; + type = SMP_LTK; else - type = HCI_SMP_LTK_SLAVE; + type = SMP_LTK_SLAVE; + + switch (key->type) { + case MGMT_LTK_UNAUTHENTICATED: + authenticated = 0x00; + break; + case MGMT_LTK_AUTHENTICATED: + authenticated = 0x01; + break; + default: + continue; + } hci_add_ltk(hdev, &key->addr.bdaddr, addr_type, type, - key->type, key->val, key->enc_size, key->ediv, + authenticated, key->val, key->enc_size, key->ediv, key->rand); } @@ -4555,6 +4897,773 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, return err; } +struct cmd_conn_lookup { + struct hci_conn *conn; + bool valid_tx_power; + u8 mgmt_status; +}; + +static void get_conn_info_complete(struct pending_cmd *cmd, void *data) +{ + struct cmd_conn_lookup *match = data; + struct mgmt_cp_get_conn_info *cp; + struct mgmt_rp_get_conn_info rp; + struct hci_conn *conn = cmd->user_data; + + if (conn != match->conn) + return; + + cp = (struct mgmt_cp_get_conn_info *) cmd->param; + + memset(&rp, 0, sizeof(rp)); + bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); + rp.addr.type = cp->addr.type; + + if (!match->mgmt_status) { + rp.rssi = conn->rssi; + + if (match->valid_tx_power) { + rp.tx_power = conn->tx_power; + rp.max_tx_power = conn->max_tx_power; + } else { + rp.tx_power = HCI_TX_POWER_INVALID; + rp.max_tx_power = HCI_TX_POWER_INVALID; + } + } + + cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO, + match->mgmt_status, &rp, sizeof(rp)); + + hci_conn_drop(conn); + + mgmt_pending_remove(cmd); +} + +static void conn_info_refresh_complete(struct hci_dev *hdev, u8 status) +{ + struct hci_cp_read_rssi *cp; + struct hci_conn *conn; + struct cmd_conn_lookup match; + u16 handle; + + BT_DBG("status 0x%02x", status); + + hci_dev_lock(hdev); + + /* TX power data is valid in case request completed successfully, + * otherwise we assume it's not valid. At the moment we assume that + * either both or none of current and max values are valid to keep code + * simple. + */ + match.valid_tx_power = !status; + + /* Commands sent in request are either Read RSSI or Read Transmit Power + * Level so we check which one was last sent to retrieve connection + * handle. Both commands have handle as first parameter so it's safe to + * cast data on the same command struct. + * + * First command sent is always Read RSSI and we fail only if it fails. + * In other case we simply override error to indicate success as we + * already remembered if TX power value is actually valid. + */ + cp = hci_sent_cmd_data(hdev, HCI_OP_READ_RSSI); + if (!cp) { + cp = hci_sent_cmd_data(hdev, HCI_OP_READ_TX_POWER); + status = 0; + } + + if (!cp) { + BT_ERR("invalid sent_cmd in response"); + goto unlock; + } + + handle = __le16_to_cpu(cp->handle); + conn = hci_conn_hash_lookup_handle(hdev, handle); + if (!conn) { + BT_ERR("unknown handle (%d) in response", handle); + goto unlock; + } + + match.conn = conn; + match.mgmt_status = mgmt_status(status); + + /* Cache refresh is complete, now reply for mgmt request for given + * connection only. + */ + mgmt_pending_foreach(MGMT_OP_GET_CONN_INFO, hdev, + get_conn_info_complete, &match); + +unlock: + hci_dev_unlock(hdev); +} + +static int get_conn_info(struct sock *sk, struct hci_dev *hdev, void *data, + u16 len) +{ + struct mgmt_cp_get_conn_info *cp = data; + struct mgmt_rp_get_conn_info rp; + struct hci_conn *conn; + unsigned long conn_info_age; + int err = 0; + + BT_DBG("%s", hdev->name); + + memset(&rp, 0, sizeof(rp)); + bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); + rp.addr.type = cp->addr.type; + + if (!bdaddr_type_is_valid(cp->addr.type)) + return cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, + MGMT_STATUS_INVALID_PARAMS, + &rp, sizeof(rp)); + + hci_dev_lock(hdev); + + if (!hdev_is_powered(hdev)) { + err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, + MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp)); + goto unlock; + } + + if (cp->addr.type == BDADDR_BREDR) + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, + &cp->addr.bdaddr); + else + conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->addr.bdaddr); + + if (!conn || conn->state != BT_CONNECTED) { + err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, + MGMT_STATUS_NOT_CONNECTED, &rp, sizeof(rp)); + goto unlock; + } + + /* To avoid client trying to guess when to poll again for information we + * calculate conn info age as random value between min/max set in hdev. + */ + conn_info_age = hdev->conn_info_min_age + + prandom_u32_max(hdev->conn_info_max_age - + hdev->conn_info_min_age); + + /* Query controller to refresh cached values if they are too old or were + * never read. + */ + if (time_after(jiffies, conn->conn_info_timestamp + + msecs_to_jiffies(conn_info_age)) || + !conn->conn_info_timestamp) { + struct hci_request req; + struct hci_cp_read_tx_power req_txp_cp; + struct hci_cp_read_rssi req_rssi_cp; + struct pending_cmd *cmd; + + hci_req_init(&req, hdev); + req_rssi_cp.handle = cpu_to_le16(conn->handle); + hci_req_add(&req, HCI_OP_READ_RSSI, sizeof(req_rssi_cp), + &req_rssi_cp); + + /* For LE links TX power does not change thus we don't need to + * query for it once value is known. + */ + if (!bdaddr_type_is_le(cp->addr.type) || + conn->tx_power == HCI_TX_POWER_INVALID) { + req_txp_cp.handle = cpu_to_le16(conn->handle); + req_txp_cp.type = 0x00; + hci_req_add(&req, HCI_OP_READ_TX_POWER, + sizeof(req_txp_cp), &req_txp_cp); + } + + /* Max TX power needs to be read only once per connection */ + if (conn->max_tx_power == HCI_TX_POWER_INVALID) { + req_txp_cp.handle = cpu_to_le16(conn->handle); + req_txp_cp.type = 0x01; + hci_req_add(&req, HCI_OP_READ_TX_POWER, + sizeof(req_txp_cp), &req_txp_cp); + } + + err = hci_req_run(&req, conn_info_refresh_complete); + if (err < 0) + goto unlock; + + cmd = mgmt_pending_add(sk, MGMT_OP_GET_CONN_INFO, hdev, + data, len); + if (!cmd) { + err = -ENOMEM; + goto unlock; + } + + hci_conn_hold(conn); + cmd->user_data = conn; + + conn->conn_info_timestamp = jiffies; + } else { + /* Cache is valid, just reply with values cached in hci_conn */ + rp.rssi = conn->rssi; + rp.tx_power = conn->tx_power; + rp.max_tx_power = conn->max_tx_power; + + err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, + MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); + } + +unlock: + hci_dev_unlock(hdev); + return err; +} + +static void get_clock_info_complete(struct hci_dev *hdev, u8 status) +{ + struct mgmt_cp_get_clock_info *cp; + struct mgmt_rp_get_clock_info rp; + struct hci_cp_read_clock *hci_cp; + struct pending_cmd *cmd; + struct hci_conn *conn; + + BT_DBG("%s status %u", hdev->name, status); + + hci_dev_lock(hdev); + + hci_cp = hci_sent_cmd_data(hdev, HCI_OP_READ_CLOCK); + if (!hci_cp) + goto unlock; + + if (hci_cp->which) { + u16 handle = __le16_to_cpu(hci_cp->handle); + conn = hci_conn_hash_lookup_handle(hdev, handle); + } else { + conn = NULL; + } + + cmd = mgmt_pending_find_data(MGMT_OP_GET_CLOCK_INFO, hdev, conn); + if (!cmd) + goto unlock; + + cp = cmd->param; + + memset(&rp, 0, sizeof(rp)); + memcpy(&rp.addr, &cp->addr, sizeof(rp.addr)); + + if (status) + goto send_rsp; + + rp.local_clock = cpu_to_le32(hdev->clock); + + if (conn) { + rp.piconet_clock = cpu_to_le32(conn->clock); + rp.accuracy = cpu_to_le16(conn->clock_accuracy); + } + +send_rsp: + cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(status), + &rp, sizeof(rp)); + mgmt_pending_remove(cmd); + if (conn) + hci_conn_drop(conn); + +unlock: + hci_dev_unlock(hdev); +} + +static int get_clock_info(struct sock *sk, struct hci_dev *hdev, void *data, + u16 len) +{ + struct mgmt_cp_get_clock_info *cp = data; + struct mgmt_rp_get_clock_info rp; + struct hci_cp_read_clock hci_cp; + struct pending_cmd *cmd; + struct hci_request req; + struct hci_conn *conn; + int err; + + BT_DBG("%s", hdev->name); + + memset(&rp, 0, sizeof(rp)); + bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); + rp.addr.type = cp->addr.type; + + if (cp->addr.type != BDADDR_BREDR) + return cmd_complete(sk, hdev->id, MGMT_OP_GET_CLOCK_INFO, + MGMT_STATUS_INVALID_PARAMS, + &rp, sizeof(rp)); + + hci_dev_lock(hdev); + + if (!hdev_is_powered(hdev)) { + err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CLOCK_INFO, + MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp)); + goto unlock; + } + + if (bacmp(&cp->addr.bdaddr, BDADDR_ANY)) { + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, + &cp->addr.bdaddr); + if (!conn || conn->state != BT_CONNECTED) { + err = cmd_complete(sk, hdev->id, + MGMT_OP_GET_CLOCK_INFO, + MGMT_STATUS_NOT_CONNECTED, + &rp, sizeof(rp)); + goto unlock; + } + } else { + conn = NULL; + } + + cmd = mgmt_pending_add(sk, MGMT_OP_GET_CLOCK_INFO, hdev, data, len); + if (!cmd) { + err = -ENOMEM; + goto unlock; + } + + hci_req_init(&req, hdev); + + memset(&hci_cp, 0, sizeof(hci_cp)); + hci_req_add(&req, HCI_OP_READ_CLOCK, sizeof(hci_cp), &hci_cp); + + if (conn) { + hci_conn_hold(conn); + cmd->user_data = conn; + + hci_cp.handle = cpu_to_le16(conn->handle); + hci_cp.which = 0x01; /* Piconet clock */ + hci_req_add(&req, HCI_OP_READ_CLOCK, sizeof(hci_cp), &hci_cp); + } + + err = hci_req_run(&req, get_clock_info_complete); + if (err < 0) + mgmt_pending_remove(cmd); + +unlock: + hci_dev_unlock(hdev); + return err; +} + +/* Helper for Add/Remove Device commands */ +static void update_page_scan(struct hci_dev *hdev, u8 scan) +{ + if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) + return; + + if (!hdev_is_powered(hdev)) + return; + + /* If HCI_CONNECTABLE is set then Add/Remove Device should not + * make any changes to page scanning. + */ + if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags)) + return; + + if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags)) + scan |= SCAN_INQUIRY; + + hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); +} + +static void device_added(struct sock *sk, struct hci_dev *hdev, + bdaddr_t *bdaddr, u8 type, u8 action) +{ + struct mgmt_ev_device_added ev; + + bacpy(&ev.addr.bdaddr, bdaddr); + ev.addr.type = type; + ev.action = action; + + mgmt_event(MGMT_EV_DEVICE_ADDED, hdev, &ev, sizeof(ev), sk); +} + +static int add_device(struct sock *sk, struct hci_dev *hdev, + void *data, u16 len) +{ + struct mgmt_cp_add_device *cp = data; + u8 auto_conn, addr_type; + int err; + + BT_DBG("%s", hdev->name); + + if (!bdaddr_type_is_valid(cp->addr.type) || + !bacmp(&cp->addr.bdaddr, BDADDR_ANY)) + return cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); + + if (cp->action != 0x00 && cp->action != 0x01 && cp->action != 0x02) + return cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); + + hci_dev_lock(hdev); + + if (cp->addr.type == BDADDR_BREDR) { + bool update_scan; + + /* Only incoming connections action is supported for now */ + if (cp->action != 0x01) { + err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); + goto unlock; + } + + update_scan = list_empty(&hdev->whitelist); + + err = hci_bdaddr_list_add(&hdev->whitelist, &cp->addr.bdaddr, + cp->addr.type); + if (err) + goto unlock; + + if (update_scan) + update_page_scan(hdev, SCAN_PAGE); + + goto added; + } + + if (cp->addr.type == BDADDR_LE_PUBLIC) + addr_type = ADDR_LE_DEV_PUBLIC; + else + addr_type = ADDR_LE_DEV_RANDOM; + + if (cp->action == 0x02) + auto_conn = HCI_AUTO_CONN_ALWAYS; + else if (cp->action == 0x01) + auto_conn = HCI_AUTO_CONN_DIRECT; + else + auto_conn = HCI_AUTO_CONN_REPORT; + + /* If the connection parameters don't exist for this device, + * they will be created and configured with defaults. + */ + if (hci_conn_params_set(hdev, &cp->addr.bdaddr, addr_type, + auto_conn) < 0) { + err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, + MGMT_STATUS_FAILED, + &cp->addr, sizeof(cp->addr)); + goto unlock; + } + +added: + device_added(sk, hdev, &cp->addr.bdaddr, cp->addr.type, cp->action); + + err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, + MGMT_STATUS_SUCCESS, &cp->addr, sizeof(cp->addr)); + +unlock: + hci_dev_unlock(hdev); + return err; +} + +static void device_removed(struct sock *sk, struct hci_dev *hdev, + bdaddr_t *bdaddr, u8 type) +{ + struct mgmt_ev_device_removed ev; + + bacpy(&ev.addr.bdaddr, bdaddr); + ev.addr.type = type; + + mgmt_event(MGMT_EV_DEVICE_REMOVED, hdev, &ev, sizeof(ev), sk); +} + +static int remove_device(struct sock *sk, struct hci_dev *hdev, + void *data, u16 len) +{ + struct mgmt_cp_remove_device *cp = data; + int err; + + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + if (bacmp(&cp->addr.bdaddr, BDADDR_ANY)) { + struct hci_conn_params *params; + u8 addr_type; + + if (!bdaddr_type_is_valid(cp->addr.type)) { + err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); + goto unlock; + } + + if (cp->addr.type == BDADDR_BREDR) { + err = hci_bdaddr_list_del(&hdev->whitelist, + &cp->addr.bdaddr, + cp->addr.type); + if (err) { + err = cmd_complete(sk, hdev->id, + MGMT_OP_REMOVE_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); + goto unlock; + } + + if (list_empty(&hdev->whitelist)) + update_page_scan(hdev, SCAN_DISABLED); + + device_removed(sk, hdev, &cp->addr.bdaddr, + cp->addr.type); + goto complete; + } + + if (cp->addr.type == BDADDR_LE_PUBLIC) + addr_type = ADDR_LE_DEV_PUBLIC; + else + addr_type = ADDR_LE_DEV_RANDOM; + + params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr, + addr_type); + if (!params) { + err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); + goto unlock; + } + + if (params->auto_connect == HCI_AUTO_CONN_DISABLED) { + err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); + goto unlock; + } + + list_del(¶ms->action); + list_del(¶ms->list); + kfree(params); + hci_update_background_scan(hdev); + + device_removed(sk, hdev, &cp->addr.bdaddr, cp->addr.type); + } else { + struct hci_conn_params *p, *tmp; + struct bdaddr_list *b, *btmp; + + if (cp->addr.type) { + err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); + goto unlock; + } + + list_for_each_entry_safe(b, btmp, &hdev->whitelist, list) { + device_removed(sk, hdev, &b->bdaddr, b->bdaddr_type); + list_del(&b->list); + kfree(b); + } + + update_page_scan(hdev, SCAN_DISABLED); + + list_for_each_entry_safe(p, tmp, &hdev->le_conn_params, list) { + if (p->auto_connect == HCI_AUTO_CONN_DISABLED) + continue; + device_removed(sk, hdev, &p->addr, p->addr_type); + list_del(&p->action); + list_del(&p->list); + kfree(p); + } + + BT_DBG("All LE connection parameters were removed"); + + hci_update_background_scan(hdev); + } + +complete: + err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE, + MGMT_STATUS_SUCCESS, &cp->addr, sizeof(cp->addr)); + +unlock: + hci_dev_unlock(hdev); + return err; +} + +static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data, + u16 len) +{ + struct mgmt_cp_load_conn_param *cp = data; + const u16 max_param_count = ((U16_MAX - sizeof(*cp)) / + sizeof(struct mgmt_conn_param)); + u16 param_count, expected_len; + int i; + + if (!lmp_le_capable(hdev)) + return cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, + MGMT_STATUS_NOT_SUPPORTED); + + param_count = __le16_to_cpu(cp->param_count); + if (param_count > max_param_count) { + BT_ERR("load_conn_param: too big param_count value %u", + param_count); + return cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, + MGMT_STATUS_INVALID_PARAMS); + } + + expected_len = sizeof(*cp) + param_count * + sizeof(struct mgmt_conn_param); + if (expected_len != len) { + BT_ERR("load_conn_param: expected %u bytes, got %u bytes", + expected_len, len); + return cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, + MGMT_STATUS_INVALID_PARAMS); + } + + BT_DBG("%s param_count %u", hdev->name, param_count); + + hci_dev_lock(hdev); + + hci_conn_params_clear_disabled(hdev); + + for (i = 0; i < param_count; i++) { + struct mgmt_conn_param *param = &cp->params[i]; + struct hci_conn_params *hci_param; + u16 min, max, latency, timeout; + u8 addr_type; + + BT_DBG("Adding %pMR (type %u)", ¶m->addr.bdaddr, + param->addr.type); + + if (param->addr.type == BDADDR_LE_PUBLIC) { + addr_type = ADDR_LE_DEV_PUBLIC; + } else if (param->addr.type == BDADDR_LE_RANDOM) { + addr_type = ADDR_LE_DEV_RANDOM; + } else { + BT_ERR("Ignoring invalid connection parameters"); + continue; + } + + min = le16_to_cpu(param->min_interval); + max = le16_to_cpu(param->max_interval); + latency = le16_to_cpu(param->latency); + timeout = le16_to_cpu(param->timeout); + + BT_DBG("min 0x%04x max 0x%04x latency 0x%04x timeout 0x%04x", + min, max, latency, timeout); + + if (hci_check_conn_params(min, max, latency, timeout) < 0) { + BT_ERR("Ignoring invalid connection parameters"); + continue; + } + + hci_param = hci_conn_params_add(hdev, ¶m->addr.bdaddr, + addr_type); + if (!hci_param) { + BT_ERR("Failed to add connection parameters"); + continue; + } + + hci_param->conn_min_interval = min; + hci_param->conn_max_interval = max; + hci_param->conn_latency = latency; + hci_param->supervision_timeout = timeout; + } + + hci_dev_unlock(hdev); + + return cmd_complete(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, 0, NULL, 0); +} + +static int set_external_config(struct sock *sk, struct hci_dev *hdev, + void *data, u16 len) +{ + struct mgmt_cp_set_external_config *cp = data; + bool changed; + int err; + + BT_DBG("%s", hdev->name); + + if (hdev_is_powered(hdev)) + return cmd_status(sk, hdev->id, MGMT_OP_SET_EXTERNAL_CONFIG, + MGMT_STATUS_REJECTED); + + if (cp->config != 0x00 && cp->config != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_SET_EXTERNAL_CONFIG, + MGMT_STATUS_INVALID_PARAMS); + + if (!test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks)) + return cmd_status(sk, hdev->id, MGMT_OP_SET_EXTERNAL_CONFIG, + MGMT_STATUS_NOT_SUPPORTED); + + hci_dev_lock(hdev); + + if (cp->config) + changed = !test_and_set_bit(HCI_EXT_CONFIGURED, + &hdev->dev_flags); + else + changed = test_and_clear_bit(HCI_EXT_CONFIGURED, + &hdev->dev_flags); + + err = send_options_rsp(sk, MGMT_OP_SET_EXTERNAL_CONFIG, hdev); + if (err < 0) + goto unlock; + + if (!changed) + goto unlock; + + err = new_options(hdev, sk); + + if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) == is_configured(hdev)) { + mgmt_index_removed(hdev); + + if (test_and_change_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) { + set_bit(HCI_CONFIG, &hdev->dev_flags); + set_bit(HCI_AUTO_OFF, &hdev->dev_flags); + + queue_work(hdev->req_workqueue, &hdev->power_on); + } else { + set_bit(HCI_RAW, &hdev->flags); + mgmt_index_added(hdev); + } + } + +unlock: + hci_dev_unlock(hdev); + return err; +} + +static int set_public_address(struct sock *sk, struct hci_dev *hdev, + void *data, u16 len) +{ + struct mgmt_cp_set_public_address *cp = data; + bool changed; + int err; + + BT_DBG("%s", hdev->name); + + if (hdev_is_powered(hdev)) + return cmd_status(sk, hdev->id, MGMT_OP_SET_PUBLIC_ADDRESS, + MGMT_STATUS_REJECTED); + + if (!bacmp(&cp->bdaddr, BDADDR_ANY)) + return cmd_status(sk, hdev->id, MGMT_OP_SET_PUBLIC_ADDRESS, + MGMT_STATUS_INVALID_PARAMS); + + if (!hdev->set_bdaddr) + return cmd_status(sk, hdev->id, MGMT_OP_SET_PUBLIC_ADDRESS, + MGMT_STATUS_NOT_SUPPORTED); + + hci_dev_lock(hdev); + + changed = !!bacmp(&hdev->public_addr, &cp->bdaddr); + bacpy(&hdev->public_addr, &cp->bdaddr); + + err = send_options_rsp(sk, MGMT_OP_SET_PUBLIC_ADDRESS, hdev); + if (err < 0) + goto unlock; + + if (!changed) + goto unlock; + + if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) + err = new_options(hdev, sk); + + if (is_configured(hdev)) { + mgmt_index_removed(hdev); + + clear_bit(HCI_UNCONFIGURED, &hdev->dev_flags); + + set_bit(HCI_CONFIG, &hdev->dev_flags); + set_bit(HCI_AUTO_OFF, &hdev->dev_flags); + + queue_work(hdev->req_workqueue, &hdev->power_on); + } + +unlock: + hci_dev_unlock(hdev); + return err; +} + static const struct mgmt_handler { int (*func) (struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len); @@ -4570,7 +5679,7 @@ static const struct mgmt_handler { { set_discoverable, false, MGMT_SET_DISCOVERABLE_SIZE }, { set_connectable, false, MGMT_SETTING_SIZE }, { set_fast_connectable, false, MGMT_SETTING_SIZE }, - { set_pairable, false, MGMT_SETTING_SIZE }, + { set_bondable, false, MGMT_SETTING_SIZE }, { set_link_security, false, MGMT_SETTING_SIZE }, { set_ssp, false, MGMT_SETTING_SIZE }, { set_hs, false, MGMT_SETTING_SIZE }, @@ -4610,9 +5719,17 @@ static const struct mgmt_handler { { set_debug_keys, false, MGMT_SETTING_SIZE }, { set_privacy, false, MGMT_SET_PRIVACY_SIZE }, { load_irks, true, MGMT_LOAD_IRKS_SIZE }, + { get_conn_info, false, MGMT_GET_CONN_INFO_SIZE }, + { get_clock_info, false, MGMT_GET_CLOCK_INFO_SIZE }, + { add_device, false, MGMT_ADD_DEVICE_SIZE }, + { remove_device, false, MGMT_REMOVE_DEVICE_SIZE }, + { load_conn_param, true, MGMT_LOAD_CONN_PARAM_SIZE }, + { read_unconf_index_list, false, MGMT_READ_UNCONF_INDEX_LIST_SIZE }, + { read_config_info, false, MGMT_READ_CONFIG_INFO_SIZE }, + { set_external_config, false, MGMT_SET_EXTERNAL_CONFIG_SIZE }, + { set_public_address, false, MGMT_SET_PUBLIC_ADDRESS_SIZE }, }; - int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) { void *buf; @@ -4656,11 +5773,21 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) } if (test_bit(HCI_SETUP, &hdev->dev_flags) || + test_bit(HCI_CONFIG, &hdev->dev_flags) || test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { err = cmd_status(sk, index, opcode, MGMT_STATUS_INVALID_INDEX); goto done; } + + if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) && + opcode != MGMT_OP_READ_CONFIG_INFO && + opcode != MGMT_OP_SET_EXTERNAL_CONFIG && + opcode != MGMT_OP_SET_PUBLIC_ADDRESS) { + err = cmd_status(sk, index, opcode, + MGMT_STATUS_INVALID_INDEX); + goto done; + } } if (opcode >= ARRAY_SIZE(mgmt_handlers) || @@ -4671,8 +5798,15 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) goto done; } - if ((hdev && opcode < MGMT_OP_READ_INFO) || - (!hdev && opcode >= MGMT_OP_READ_INFO)) { + if (hdev && (opcode <= MGMT_OP_READ_INDEX_LIST || + opcode == MGMT_OP_READ_UNCONF_INDEX_LIST)) { + err = cmd_status(sk, index, opcode, + MGMT_STATUS_INVALID_INDEX); + goto done; + } + + if (!hdev && (opcode > MGMT_OP_READ_INDEX_LIST && + opcode != MGMT_OP_READ_UNCONF_INDEX_LIST)) { err = cmd_status(sk, index, opcode, MGMT_STATUS_INVALID_INDEX); goto done; @@ -4711,7 +5845,13 @@ void mgmt_index_added(struct hci_dev *hdev) if (hdev->dev_type != HCI_BREDR) return; - mgmt_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0, NULL); + if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) + return; + + if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) + mgmt_event(MGMT_EV_UNCONF_INDEX_ADDED, hdev, NULL, 0, NULL); + else + mgmt_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0, NULL); } void mgmt_index_removed(struct hci_dev *hdev) @@ -4721,20 +5861,42 @@ void mgmt_index_removed(struct hci_dev *hdev) if (hdev->dev_type != HCI_BREDR) return; + if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) + return; + mgmt_pending_foreach(0, hdev, cmd_status_rsp, &status); - mgmt_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0, NULL); + if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) + mgmt_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev, NULL, 0, NULL); + else + mgmt_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0, NULL); } /* This function requires the caller holds hdev->lock */ -static void restart_le_auto_conns(struct hci_dev *hdev) +static void restart_le_actions(struct hci_dev *hdev) { struct hci_conn_params *p; list_for_each_entry(p, &hdev->le_conn_params, list) { - if (p->auto_connect == HCI_AUTO_CONN_ALWAYS) - hci_pend_le_conn_add(hdev, &p->addr, p->addr_type); + /* Needed for AUTO_OFF case where might not "really" + * have been powered off. + */ + list_del_init(&p->action); + + switch (p->auto_connect) { + case HCI_AUTO_CONN_DIRECT: + case HCI_AUTO_CONN_ALWAYS: + list_add(&p->action, &hdev->pend_le_conns); + break; + case HCI_AUTO_CONN_REPORT: + list_add(&p->action, &hdev->pend_le_reports); + break; + default: + break; + } } + + hci_update_background_scan(hdev); } static void powered_complete(struct hci_dev *hdev, u8 status) @@ -4745,7 +5907,7 @@ static void powered_complete(struct hci_dev *hdev, u8 status) hci_dev_lock(hdev); - restart_le_auto_conns(hdev); + restart_le_actions(hdev); mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); @@ -4775,8 +5937,8 @@ static int powered_update_hci(struct hci_dev *hdev) lmp_bredr_capable(hdev)) { struct hci_cp_write_le_host_supported cp; - cp.le = 1; - cp.simul = lmp_le_br_capable(hdev); + cp.le = 0x01; + cp.simul = 0x00; /* Check first if we already have the right * host state (host features set) @@ -4902,92 +6064,6 @@ void mgmt_discoverable_timeout(struct hci_dev *hdev) hci_dev_unlock(hdev); } -void mgmt_discoverable(struct hci_dev *hdev, u8 discoverable) -{ - bool changed; - - /* Nothing needed here if there's a pending command since that - * commands request completion callback takes care of everything - * necessary. - */ - if (mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, hdev)) - return; - - /* Powering off may clear the scan mode - don't let that interfere */ - if (!discoverable && mgmt_pending_find(MGMT_OP_SET_POWERED, hdev)) - return; - - if (discoverable) { - changed = !test_and_set_bit(HCI_DISCOVERABLE, &hdev->dev_flags); - } else { - clear_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags); - changed = test_and_clear_bit(HCI_DISCOVERABLE, &hdev->dev_flags); - } - - if (changed) { - struct hci_request req; - - /* In case this change in discoverable was triggered by - * a disabling of connectable there could be a need to - * update the advertising flags. - */ - hci_req_init(&req, hdev); - update_adv_data(&req); - hci_req_run(&req, NULL); - - new_settings(hdev, NULL); - } -} - -void mgmt_connectable(struct hci_dev *hdev, u8 connectable) -{ - bool changed; - - /* Nothing needed here if there's a pending command since that - * commands request completion callback takes care of everything - * necessary. - */ - if (mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, hdev)) - return; - - /* Powering off may clear the scan mode - don't let that interfere */ - if (!connectable && mgmt_pending_find(MGMT_OP_SET_POWERED, hdev)) - return; - - if (connectable) - changed = !test_and_set_bit(HCI_CONNECTABLE, &hdev->dev_flags); - else - changed = test_and_clear_bit(HCI_CONNECTABLE, &hdev->dev_flags); - - if (changed) - new_settings(hdev, NULL); -} - -void mgmt_advertising(struct hci_dev *hdev, u8 advertising) -{ - /* Powering off may stop advertising - don't let that interfere */ - if (!advertising && mgmt_pending_find(MGMT_OP_SET_POWERED, hdev)) - return; - - if (advertising) - set_bit(HCI_ADVERTISING, &hdev->dev_flags); - else - clear_bit(HCI_ADVERTISING, &hdev->dev_flags); -} - -void mgmt_write_scan_failed(struct hci_dev *hdev, u8 scan, u8 status) -{ - u8 mgmt_err = mgmt_status(status); - - if (scan & SCAN_PAGE) - mgmt_pending_foreach(MGMT_OP_SET_CONNECTABLE, hdev, - cmd_status_rsp, &mgmt_err); - - if (scan & SCAN_INQUIRY) - mgmt_pending_foreach(MGMT_OP_SET_DISCOVERABLE, hdev, - cmd_status_rsp, &mgmt_err); -} - void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, bool persistent) { @@ -5005,6 +6081,14 @@ void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, mgmt_event(MGMT_EV_NEW_LINK_KEY, hdev, &ev, sizeof(ev), NULL); } +static u8 mgmt_ltk_type(struct smp_ltk *ltk) +{ + if (ltk->authenticated) + return MGMT_LTK_AUTHENTICATED; + + return MGMT_LTK_UNAUTHENTICATED; +} + void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent) { struct mgmt_ev_new_long_term_key ev; @@ -5030,12 +6114,12 @@ void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent) bacpy(&ev.key.addr.bdaddr, &key->bdaddr); ev.key.addr.type = link_to_bdaddr(LE_LINK, key->bdaddr_type); - ev.key.type = key->authenticated; + ev.key.type = mgmt_ltk_type(key); ev.key.enc_size = key->enc_size; ev.key.ediv = key->ediv; ev.key.rand = key->rand; - if (key->type == HCI_SMP_LTK) + if (key->type == SMP_LTK) ev.key.master = 1; memcpy(ev.key.val, key->val, sizeof(key->val)); @@ -5103,6 +6187,27 @@ void mgmt_new_csrk(struct hci_dev *hdev, struct smp_csrk *csrk, mgmt_event(MGMT_EV_NEW_CSRK, hdev, &ev, sizeof(ev), NULL); } +void mgmt_new_conn_param(struct hci_dev *hdev, bdaddr_t *bdaddr, + u8 bdaddr_type, u8 store_hint, u16 min_interval, + u16 max_interval, u16 latency, u16 timeout) +{ + struct mgmt_ev_new_conn_param ev; + + if (!hci_is_identity_address(bdaddr, bdaddr_type)) + return; + + memset(&ev, 0, sizeof(ev)); + bacpy(&ev.addr.bdaddr, bdaddr); + ev.addr.type = link_to_bdaddr(LE_LINK, bdaddr_type); + ev.store_hint = store_hint; + ev.min_interval = cpu_to_le16(min_interval); + ev.max_interval = cpu_to_le16(max_interval); + ev.latency = cpu_to_le16(latency); + ev.timeout = cpu_to_le16(timeout); + + mgmt_event(MGMT_EV_NEW_CONN_PARAM, hdev, &ev, sizeof(ev), NULL); +} + static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, u8 data_len) { @@ -5521,10 +6626,14 @@ void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status) hci_req_init(&req, hdev); - if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) + if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) { + if (test_bit(HCI_USE_DEBUG_KEYS, &hdev->dev_flags)) + hci_req_add(&req, HCI_OP_WRITE_SSP_DEBUG_MODE, + sizeof(enable), &enable); update_eir(&req); - else + } else { clear_eir(&req); + } hci_req_run(&req, NULL); } @@ -5668,37 +6777,36 @@ void mgmt_read_local_oob_data_complete(struct hci_dev *hdev, u8 *hash192, } void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, - u8 addr_type, u8 *dev_class, s8 rssi, u8 cfm_name, u8 - ssp, u8 *eir, u16 eir_len) + u8 addr_type, u8 *dev_class, s8 rssi, u32 flags, + u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len) { char buf[512]; struct mgmt_ev_device_found *ev = (void *) buf; - struct smp_irk *irk; size_t ev_size; - if (!hci_discovery_active(hdev)) - return; + /* Don't send events for a non-kernel initiated discovery. With + * LE one exception is if we have pend_le_reports > 0 in which + * case we're doing passive scanning and want these events. + */ + if (!hci_discovery_active(hdev)) { + if (link_type == ACL_LINK) + return; + if (link_type == LE_LINK && list_empty(&hdev->pend_le_reports)) + return; + } - /* Leave 5 bytes for a potential CoD field */ - if (sizeof(*ev) + eir_len + 5 > sizeof(buf)) + /* Make sure that the buffer is big enough. The 5 extra bytes + * are for the potential CoD field. + */ + if (sizeof(*ev) + eir_len + scan_rsp_len + 5 > sizeof(buf)) return; memset(buf, 0, sizeof(buf)); - irk = hci_get_irk(hdev, bdaddr, addr_type); - if (irk) { - bacpy(&ev->addr.bdaddr, &irk->bdaddr); - ev->addr.type = link_to_bdaddr(link_type, irk->addr_type); - } else { - bacpy(&ev->addr.bdaddr, bdaddr); - ev->addr.type = link_to_bdaddr(link_type, addr_type); - } - + bacpy(&ev->addr.bdaddr, bdaddr); + ev->addr.type = link_to_bdaddr(link_type, addr_type); ev->rssi = rssi; - if (cfm_name) - ev->flags |= cpu_to_le32(MGMT_DEV_FOUND_CONFIRM_NAME); - if (!ssp) - ev->flags |= cpu_to_le32(MGMT_DEV_FOUND_LEGACY_PAIRING); + ev->flags = cpu_to_le32(flags); if (eir_len > 0) memcpy(ev->eir, eir, eir_len); @@ -5707,8 +6815,11 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, eir_len = eir_append_data(ev->eir, eir_len, EIR_CLASS_OF_DEV, dev_class, 3); - ev->eir_len = cpu_to_le16(eir_len); - ev_size = sizeof(*ev) + eir_len; + if (scan_rsp_len > 0) + memcpy(ev->eir + eir_len, scan_rsp, scan_rsp_len); + + ev->eir_len = cpu_to_le16(eir_len + scan_rsp_len); + ev_size = sizeof(*ev) + eir_len + scan_rsp_len; mgmt_event(MGMT_EV_DEVICE_FOUND, hdev, ev, ev_size, NULL); } @@ -5763,63 +6874,19 @@ void mgmt_discovering(struct hci_dev *hdev, u8 discovering) mgmt_event(MGMT_EV_DISCOVERING, hdev, &ev, sizeof(ev), NULL); } -int mgmt_device_blocked(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type) -{ - struct pending_cmd *cmd; - struct mgmt_ev_device_blocked ev; - - cmd = mgmt_pending_find(MGMT_OP_BLOCK_DEVICE, hdev); - - bacpy(&ev.addr.bdaddr, bdaddr); - ev.addr.type = type; - - return mgmt_event(MGMT_EV_DEVICE_BLOCKED, hdev, &ev, sizeof(ev), - cmd ? cmd->sk : NULL); -} - -int mgmt_device_unblocked(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type) -{ - struct pending_cmd *cmd; - struct mgmt_ev_device_unblocked ev; - - cmd = mgmt_pending_find(MGMT_OP_UNBLOCK_DEVICE, hdev); - - bacpy(&ev.addr.bdaddr, bdaddr); - ev.addr.type = type; - - return mgmt_event(MGMT_EV_DEVICE_UNBLOCKED, hdev, &ev, sizeof(ev), - cmd ? cmd->sk : NULL); -} - static void adv_enable_complete(struct hci_dev *hdev, u8 status) { BT_DBG("%s status %u", hdev->name, status); - - /* Clear the advertising mgmt setting if we failed to re-enable it */ - if (status) { - clear_bit(HCI_ADVERTISING, &hdev->dev_flags); - new_settings(hdev, NULL); - } } void mgmt_reenable_advertising(struct hci_dev *hdev) { struct hci_request req; - if (hci_conn_num(hdev, LE_LINK) > 0) - return; - if (!test_bit(HCI_ADVERTISING, &hdev->dev_flags)) return; hci_req_init(&req, hdev); enable_advertising(&req); - - /* If this fails we have no option but to let user space know - * that we've disabled advertising. - */ - if (hci_req_run(&req, adv_enable_complete) < 0) { - clear_bit(HCI_ADVERTISING, &hdev->dev_flags); - new_settings(hdev, NULL); - } + hci_req_run(&req, adv_enable_complete); } diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index cf620260affa..af73bc3acb40 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -227,7 +227,8 @@ static int rfcomm_check_security(struct rfcomm_dlc *d) break; } - return hci_conn_security(conn->hcon, d->sec_level, auth_type); + return hci_conn_security(conn->hcon, d->sec_level, auth_type, + d->out); } static void rfcomm_session_timeout(unsigned long arg) @@ -307,7 +308,7 @@ struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio) setup_timer(&d->timer, rfcomm_dlc_timeout, (unsigned long)d); skb_queue_head_init(&d->tx_queue); - spin_lock_init(&d->lock); + mutex_init(&d->lock); atomic_set(&d->refcnt, 1); rfcomm_dlc_clear_state(d); @@ -1909,10 +1910,13 @@ static struct rfcomm_session *rfcomm_process_rx(struct rfcomm_session *s) /* Get data directly from socket receive queue without copying it. */ while ((skb = skb_dequeue(&sk->sk_receive_queue))) { skb_orphan(skb); - if (!skb_linearize(skb)) + if (!skb_linearize(skb)) { s = rfcomm_recv_frame(s, skb); - else + if (!s) + break; + } else { kfree_skb(skb); + } } if (s && (sk->sk_state == BT_CLOSED)) diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index c603a5eb4720..8bbbb5ec468c 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -918,7 +918,8 @@ static int rfcomm_sock_shutdown(struct socket *sock, int how) sk->sk_shutdown = SHUTDOWN_MASK; __rfcomm_sock_close(sk); - if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime) + if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime && + !(current->flags & PF_EXITING)) err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime); } release_sock(sk); diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 403ec09f480a..8e385a0ae60e 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -70,7 +70,7 @@ struct rfcomm_dev { }; static LIST_HEAD(rfcomm_dev_list); -static DEFINE_SPINLOCK(rfcomm_dev_lock); +static DEFINE_MUTEX(rfcomm_dev_lock); static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb); static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err); @@ -96,9 +96,9 @@ static void rfcomm_dev_destruct(struct tty_port *port) if (dev->tty_dev) tty_unregister_device(rfcomm_tty_driver, dev->id); - spin_lock(&rfcomm_dev_lock); + mutex_lock(&rfcomm_dev_lock); list_del(&dev->list); - spin_unlock(&rfcomm_dev_lock); + mutex_unlock(&rfcomm_dev_lock); kfree(dev); @@ -161,14 +161,14 @@ static struct rfcomm_dev *rfcomm_dev_get(int id) { struct rfcomm_dev *dev; - spin_lock(&rfcomm_dev_lock); + mutex_lock(&rfcomm_dev_lock); dev = __rfcomm_dev_lookup(id); if (dev && !tty_port_get(&dev->port)) dev = NULL; - spin_unlock(&rfcomm_dev_lock); + mutex_unlock(&rfcomm_dev_lock); return dev; } @@ -224,7 +224,7 @@ static struct rfcomm_dev *__rfcomm_dev_add(struct rfcomm_dev_req *req, if (!dev) return ERR_PTR(-ENOMEM); - spin_lock(&rfcomm_dev_lock); + mutex_lock(&rfcomm_dev_lock); if (req->dev_id < 0) { dev->id = 0; @@ -305,11 +305,11 @@ static struct rfcomm_dev *__rfcomm_dev_add(struct rfcomm_dev_req *req, holds reference to this module. */ __module_get(THIS_MODULE); - spin_unlock(&rfcomm_dev_lock); + mutex_unlock(&rfcomm_dev_lock); return dev; out: - spin_unlock(&rfcomm_dev_lock); + mutex_unlock(&rfcomm_dev_lock); kfree(dev); return ERR_PTR(err); } @@ -524,7 +524,7 @@ static int rfcomm_get_dev_list(void __user *arg) di = dl->dev_info; - spin_lock(&rfcomm_dev_lock); + mutex_lock(&rfcomm_dev_lock); list_for_each_entry(dev, &rfcomm_dev_list, list) { if (!tty_port_get(&dev->port)) @@ -540,7 +540,7 @@ static int rfcomm_get_dev_list(void __user *arg) break; } - spin_unlock(&rfcomm_dev_lock); + mutex_unlock(&rfcomm_dev_lock); dl->dev_num = n; size = sizeof(*dl) + n * sizeof(*di); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index c06dbd3938e8..7ee9e4ab00f8 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -40,13 +40,38 @@ static struct bt_sock_list sco_sk_list = { .lock = __RW_LOCK_UNLOCKED(sco_sk_list.lock) }; -static void __sco_chan_add(struct sco_conn *conn, struct sock *sk, struct sock *parent); -static void sco_chan_del(struct sock *sk, int err); +/* ---- SCO connections ---- */ +struct sco_conn { + struct hci_conn *hcon; + + spinlock_t lock; + struct sock *sk; + + unsigned int mtu; +}; + +#define sco_conn_lock(c) spin_lock(&c->lock); +#define sco_conn_unlock(c) spin_unlock(&c->lock); static void sco_sock_close(struct sock *sk); static void sco_sock_kill(struct sock *sk); +/* ----- SCO socket info ----- */ +#define sco_pi(sk) ((struct sco_pinfo *) sk) + +struct sco_pinfo { + struct bt_sock bt; + bdaddr_t src; + bdaddr_t dst; + __u32 flags; + __u16 setting; + struct sco_conn *conn; +}; + /* ---- SCO timers ---- */ +#define SCO_CONN_TIMEOUT (HZ * 40) +#define SCO_DISCONN_TIMEOUT (HZ * 2) + static void sco_sock_timeout(unsigned long arg) { struct sock *sk = (struct sock *) arg; @@ -102,13 +127,31 @@ static struct sco_conn *sco_conn_add(struct hci_conn *hcon) return conn; } -static struct sock *sco_chan_get(struct sco_conn *conn) +/* Delete channel. + * Must be called on the locked socket. */ +static void sco_chan_del(struct sock *sk, int err) { - struct sock *sk = NULL; - sco_conn_lock(conn); - sk = conn->sk; - sco_conn_unlock(conn); - return sk; + struct sco_conn *conn; + + conn = sco_pi(sk)->conn; + + BT_DBG("sk %p, conn %p, err %d", sk, conn, err); + + if (conn) { + sco_conn_lock(conn); + conn->sk = NULL; + sco_pi(sk)->conn = NULL; + sco_conn_unlock(conn); + + if (conn->hcon) + hci_conn_drop(conn->hcon); + } + + sk->sk_state = BT_CLOSED; + sk->sk_err = err; + sk->sk_state_change(sk); + + sock_set_flag(sk, SOCK_ZAPPED); } static int sco_conn_del(struct hci_conn *hcon, int err) @@ -122,7 +165,10 @@ static int sco_conn_del(struct hci_conn *hcon, int err) BT_DBG("hcon %p conn %p, err %d", hcon, conn, err); /* Kill socket */ - sk = sco_chan_get(conn); + sco_conn_lock(conn); + sk = conn->sk; + sco_conn_unlock(conn); + if (sk) { bh_lock_sock(sk); sco_sock_clear_timer(sk); @@ -136,6 +182,17 @@ static int sco_conn_del(struct hci_conn *hcon, int err) return 0; } +static void __sco_chan_add(struct sco_conn *conn, struct sock *sk, struct sock *parent) +{ + BT_DBG("conn %p", conn); + + sco_pi(sk)->conn = conn; + conn->sk = sk; + + if (parent) + bt_accept_enqueue(parent, sk); +} + static int sco_chan_add(struct sco_conn *conn, struct sock *sk, struct sock *parent) { @@ -240,7 +297,11 @@ static int sco_send_frame(struct sock *sk, struct msghdr *msg, int len) static void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb) { - struct sock *sk = sco_chan_get(conn); + struct sock *sk; + + sco_conn_lock(conn); + sk = conn->sk; + sco_conn_unlock(conn); if (!sk) goto drop; @@ -909,7 +970,8 @@ static int sco_sock_shutdown(struct socket *sock, int how) sco_sock_clear_timer(sk); __sco_sock_close(sk); - if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime) + if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime && + !(current->flags & PF_EXITING)) err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime); } @@ -929,7 +991,8 @@ static int sco_sock_release(struct socket *sock) sco_sock_close(sk); - if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime) { + if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime && + !(current->flags & PF_EXITING)) { lock_sock(sk); err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime); release_sock(sk); @@ -940,44 +1003,6 @@ static int sco_sock_release(struct socket *sock) return err; } -static void __sco_chan_add(struct sco_conn *conn, struct sock *sk, struct sock *parent) -{ - BT_DBG("conn %p", conn); - - sco_pi(sk)->conn = conn; - conn->sk = sk; - - if (parent) - bt_accept_enqueue(parent, sk); -} - -/* Delete channel. - * Must be called on the locked socket. */ -static void sco_chan_del(struct sock *sk, int err) -{ - struct sco_conn *conn; - - conn = sco_pi(sk)->conn; - - BT_DBG("sk %p, conn %p, err %d", sk, conn, err); - - if (conn) { - sco_conn_lock(conn); - conn->sk = NULL; - sco_pi(sk)->conn = NULL; - sco_conn_unlock(conn); - - if (conn->hcon) - hci_conn_drop(conn->hcon); - } - - sk->sk_state = BT_CLOSED; - sk->sk_err = err; - sk->sk_state_change(sk); - - sock_set_flag(sk, SOCK_ZAPPED); -} - static void sco_conn_ready(struct sco_conn *conn) { struct sock *parent; diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index dfb4e1161c10..fd3294300803 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -35,18 +35,43 @@ #define AUTH_REQ_MASK 0x07 -static inline void swap128(const u8 src[16], u8 dst[16]) -{ - int i; - for (i = 0; i < 16; i++) - dst[15 - i] = src[i]; -} +enum { + SMP_FLAG_TK_VALID, + SMP_FLAG_CFM_PENDING, + SMP_FLAG_MITM_AUTH, + SMP_FLAG_COMPLETE, + SMP_FLAG_INITIATOR, +}; + +struct smp_chan { + struct l2cap_conn *conn; + u8 preq[7]; /* SMP Pairing Request */ + u8 prsp[7]; /* SMP Pairing Response */ + u8 prnd[16]; /* SMP Pairing Random (local) */ + u8 rrnd[16]; /* SMP Pairing Random (remote) */ + u8 pcnf[16]; /* SMP Pairing Confirm */ + u8 tk[16]; /* SMP Temporary Key */ + u8 enc_key_size; + u8 remote_key_dist; + bdaddr_t id_addr; + u8 id_addr_type; + u8 irk[16]; + struct smp_csrk *csrk; + struct smp_csrk *slave_csrk; + struct smp_ltk *ltk; + struct smp_ltk *slave_ltk; + struct smp_irk *remote_irk; + unsigned long flags; + + struct crypto_blkcipher *tfm_aes; +}; -static inline void swap56(const u8 src[7], u8 dst[7]) +static inline void swap_buf(const u8 *src, u8 *dst, size_t len) { - int i; - for (i = 0; i < 7; i++) - dst[6 - i] = src[i]; + size_t i; + + for (i = 0; i < len; i++) + dst[len - 1 - i] = src[i]; } static int smp_e(struct crypto_blkcipher *tfm, const u8 *k, u8 *r) @@ -65,7 +90,7 @@ static int smp_e(struct crypto_blkcipher *tfm, const u8 *k, u8 *r) desc.flags = 0; /* The most significant octet of key corresponds to k[0] */ - swap128(k, tmp); + swap_buf(k, tmp, 16); err = crypto_blkcipher_setkey(tfm, tmp, 16); if (err) { @@ -74,7 +99,7 @@ static int smp_e(struct crypto_blkcipher *tfm, const u8 *k, u8 *r) } /* Most significant octet of plaintextData corresponds to data[0] */ - swap128(r, data); + swap_buf(r, data, 16); sg_init_one(&sg, data, 16); @@ -83,7 +108,7 @@ static int smp_e(struct crypto_blkcipher *tfm, const u8 *k, u8 *r) BT_ERR("Encrypt data error %d", err); /* Most significant octet of encryptedData corresponds to data[0] */ - swap128(data, r); + swap_buf(data, r, 16); return err; } @@ -147,13 +172,16 @@ int smp_generate_rpa(struct crypto_blkcipher *tfm, u8 irk[16], bdaddr_t *rpa) return 0; } -static int smp_c1(struct crypto_blkcipher *tfm, u8 k[16], u8 r[16], - u8 preq[7], u8 pres[7], u8 _iat, bdaddr_t *ia, - u8 _rat, bdaddr_t *ra, u8 res[16]) +static int smp_c1(struct smp_chan *smp, u8 k[16], u8 r[16], u8 preq[7], + u8 pres[7], u8 _iat, bdaddr_t *ia, u8 _rat, bdaddr_t *ra, + u8 res[16]) { + struct hci_dev *hdev = smp->conn->hcon->hdev; u8 p1[16], p2[16]; int err; + BT_DBG("%s", hdev->name); + memset(p1, 0, 16); /* p1 = pres || preq || _rat || _iat */ @@ -171,7 +199,7 @@ static int smp_c1(struct crypto_blkcipher *tfm, u8 k[16], u8 r[16], u128_xor((u128 *) res, (u128 *) r, (u128 *) p1); /* res = e(k, res) */ - err = smp_e(tfm, k, res); + err = smp_e(smp->tfm_aes, k, res); if (err) { BT_ERR("Encrypt data error"); return err; @@ -181,23 +209,26 @@ static int smp_c1(struct crypto_blkcipher *tfm, u8 k[16], u8 r[16], u128_xor((u128 *) res, (u128 *) res, (u128 *) p2); /* res = e(k, res) */ - err = smp_e(tfm, k, res); + err = smp_e(smp->tfm_aes, k, res); if (err) BT_ERR("Encrypt data error"); return err; } -static int smp_s1(struct crypto_blkcipher *tfm, u8 k[16], u8 r1[16], - u8 r2[16], u8 _r[16]) +static int smp_s1(struct smp_chan *smp, u8 k[16], u8 r1[16], u8 r2[16], + u8 _r[16]) { + struct hci_dev *hdev = smp->conn->hcon->hdev; int err; + BT_DBG("%s", hdev->name); + /* Just least significant octets from r1 and r2 are considered */ memcpy(_r, r2, 8); memcpy(_r + 8, r1, 8); - err = smp_e(tfm, k, _r); + err = smp_e(smp->tfm_aes, k, _r); if (err) BT_ERR("Encrypt data error"); @@ -276,7 +307,7 @@ static void build_pairing_cmd(struct l2cap_conn *conn, struct hci_dev *hdev = hcon->hdev; u8 local_dist = 0, remote_dist = 0; - if (test_bit(HCI_PAIRABLE, &conn->hcon->hdev->dev_flags)) { + if (test_bit(HCI_BONDABLE, &conn->hcon->hdev->dev_flags)) { local_dist = SMP_DIST_ENC_KEY | SMP_DIST_SIGN; remote_dist = SMP_DIST_ENC_KEY | SMP_DIST_SIGN; authreq |= SMP_AUTH_BONDING; @@ -358,6 +389,18 @@ static const u8 gen_method[5][5] = { { CFM_PASSKEY, CFM_PASSKEY, REQ_PASSKEY, JUST_WORKS, OVERLAP }, }; +static u8 get_auth_method(struct smp_chan *smp, u8 local_io, u8 remote_io) +{ + /* If either side has unknown io_caps, use JUST_CFM (which gets + * converted later to JUST_WORKS if we're initiators. + */ + if (local_io > SMP_IO_KEYBOARD_DISPLAY || + remote_io > SMP_IO_KEYBOARD_DISPLAY) + return JUST_CFM; + + return gen_method[remote_io][local_io]; +} + static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth, u8 local_io, u8 remote_io) { @@ -369,44 +412,44 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth, /* Initialize key for JUST WORKS */ memset(smp->tk, 0, sizeof(smp->tk)); - clear_bit(SMP_FLAG_TK_VALID, &smp->smp_flags); + clear_bit(SMP_FLAG_TK_VALID, &smp->flags); BT_DBG("tk_request: auth:%d lcl:%d rem:%d", auth, local_io, remote_io); - /* If neither side wants MITM, use JUST WORKS */ - /* If either side has unknown io_caps, use JUST WORKS */ - /* Otherwise, look up method from the table */ - if (!(auth & SMP_AUTH_MITM) || - local_io > SMP_IO_KEYBOARD_DISPLAY || - remote_io > SMP_IO_KEYBOARD_DISPLAY) - method = JUST_WORKS; + /* If neither side wants MITM, either "just" confirm an incoming + * request or use just-works for outgoing ones. The JUST_CFM + * will be converted to JUST_WORKS if necessary later in this + * function. If either side has MITM look up the method from the + * table. + */ + if (!(auth & SMP_AUTH_MITM)) + method = JUST_CFM; else - method = gen_method[remote_io][local_io]; + method = get_auth_method(smp, local_io, remote_io); - /* If not bonding, don't ask user to confirm a Zero TK */ - if (!(auth & SMP_AUTH_BONDING) && method == JUST_CFM) + /* Don't confirm locally initiated pairing attempts */ + if (method == JUST_CFM && test_bit(SMP_FLAG_INITIATOR, &smp->flags)) method = JUST_WORKS; - /* Don't confirm locally initiated pairing attempts */ - if (method == JUST_CFM && test_bit(SMP_FLAG_INITIATOR, - &smp->smp_flags)) + /* Don't bother user space with no IO capabilities */ + if (method == JUST_CFM && hcon->io_capability == HCI_IO_NO_INPUT_OUTPUT) method = JUST_WORKS; /* If Just Works, Continue with Zero TK */ if (method == JUST_WORKS) { - set_bit(SMP_FLAG_TK_VALID, &smp->smp_flags); + set_bit(SMP_FLAG_TK_VALID, &smp->flags); return 0; } /* Not Just Works/Confirm results in MITM Authentication */ if (method != JUST_CFM) - set_bit(SMP_FLAG_MITM_AUTH, &smp->smp_flags); + set_bit(SMP_FLAG_MITM_AUTH, &smp->flags); /* If both devices have Keyoard-Display I/O, the master * Confirms and the slave Enters the passkey. */ if (method == OVERLAP) { - if (hcon->link_mode & HCI_LM_MASTER) + if (hcon->role == HCI_ROLE_MASTER) method = CFM_PASSKEY; else method = REQ_PASSKEY; @@ -419,7 +462,7 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth, passkey %= 1000000; put_unaligned_le32(passkey, smp->tk); BT_DBG("PassKey: %d", passkey); - set_bit(SMP_FLAG_TK_VALID, &smp->smp_flags); + set_bit(SMP_FLAG_TK_VALID, &smp->flags); } hci_dev_lock(hcon->hdev); @@ -441,78 +484,49 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth, return ret; } -static void confirm_work(struct work_struct *work) +static u8 smp_confirm(struct smp_chan *smp) { - struct smp_chan *smp = container_of(work, struct smp_chan, confirm); struct l2cap_conn *conn = smp->conn; - struct hci_dev *hdev = conn->hcon->hdev; - struct crypto_blkcipher *tfm = hdev->tfm_aes; struct smp_cmd_pairing_confirm cp; int ret; - u8 reason; BT_DBG("conn %p", conn); - /* Prevent mutual access to hdev->tfm_aes */ - hci_dev_lock(hdev); - - ret = smp_c1(tfm, smp->tk, smp->prnd, smp->preq, smp->prsp, + ret = smp_c1(smp, smp->tk, smp->prnd, smp->preq, smp->prsp, conn->hcon->init_addr_type, &conn->hcon->init_addr, conn->hcon->resp_addr_type, &conn->hcon->resp_addr, cp.confirm_val); + if (ret) + return SMP_UNSPECIFIED; - hci_dev_unlock(hdev); - - if (ret) { - reason = SMP_UNSPECIFIED; - goto error; - } - - clear_bit(SMP_FLAG_CFM_PENDING, &smp->smp_flags); + clear_bit(SMP_FLAG_CFM_PENDING, &smp->flags); smp_send_cmd(smp->conn, SMP_CMD_PAIRING_CONFIRM, sizeof(cp), &cp); - return; - -error: - smp_failure(conn, reason); + return 0; } -static void random_work(struct work_struct *work) +static u8 smp_random(struct smp_chan *smp) { - struct smp_chan *smp = container_of(work, struct smp_chan, random); struct l2cap_conn *conn = smp->conn; struct hci_conn *hcon = conn->hcon; - struct hci_dev *hdev = hcon->hdev; - struct crypto_blkcipher *tfm = hdev->tfm_aes; - u8 reason, confirm[16]; + u8 confirm[16]; int ret; - if (IS_ERR_OR_NULL(tfm)) { - reason = SMP_UNSPECIFIED; - goto error; - } + if (IS_ERR_OR_NULL(smp->tfm_aes)) + return SMP_UNSPECIFIED; BT_DBG("conn %p %s", conn, conn->hcon->out ? "master" : "slave"); - /* Prevent mutual access to hdev->tfm_aes */ - hci_dev_lock(hdev); - - ret = smp_c1(tfm, smp->tk, smp->rrnd, smp->preq, smp->prsp, + ret = smp_c1(smp, smp->tk, smp->rrnd, smp->preq, smp->prsp, hcon->init_addr_type, &hcon->init_addr, hcon->resp_addr_type, &hcon->resp_addr, confirm); - - hci_dev_unlock(hdev); - - if (ret) { - reason = SMP_UNSPECIFIED; - goto error; - } + if (ret) + return SMP_UNSPECIFIED; if (memcmp(smp->pcnf, confirm, sizeof(smp->pcnf)) != 0) { BT_ERR("Pairing failed (confirmation values mismatch)"); - reason = SMP_CONFIRM_FAILED; - goto error; + return SMP_CONFIRM_FAILED; } if (hcon->out) { @@ -520,40 +534,44 @@ static void random_work(struct work_struct *work) __le64 rand = 0; __le16 ediv = 0; - smp_s1(tfm, smp->tk, smp->rrnd, smp->prnd, stk); + smp_s1(smp, smp->tk, smp->rrnd, smp->prnd, stk); memset(stk + smp->enc_key_size, 0, SMP_MAX_ENC_KEY_SIZE - smp->enc_key_size); - if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &hcon->flags)) { - reason = SMP_UNSPECIFIED; - goto error; - } + if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &hcon->flags)) + return SMP_UNSPECIFIED; hci_le_start_enc(hcon, ediv, rand, stk); hcon->enc_key_size = smp->enc_key_size; + set_bit(HCI_CONN_STK_ENCRYPT, &hcon->flags); } else { - u8 stk[16]; + u8 stk[16], auth; __le64 rand = 0; __le16 ediv = 0; smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); - smp_s1(tfm, smp->tk, smp->prnd, smp->rrnd, stk); + smp_s1(smp, smp->tk, smp->prnd, smp->rrnd, stk); memset(stk + smp->enc_key_size, 0, SMP_MAX_ENC_KEY_SIZE - smp->enc_key_size); + if (hcon->pending_sec_level == BT_SECURITY_HIGH) + auth = 1; + else + auth = 0; + + /* Even though there's no _SLAVE suffix this is the + * slave STK we're adding for later lookup (the master + * STK never needs to be stored). + */ hci_add_ltk(hcon->hdev, &hcon->dst, hcon->dst_type, - HCI_SMP_STK_SLAVE, 0, stk, smp->enc_key_size, - ediv, rand); + SMP_STK, auth, stk, smp->enc_key_size, ediv, rand); } - return; - -error: - smp_failure(conn, reason); + return 0; } static struct smp_chan *smp_chan_create(struct l2cap_conn *conn) @@ -561,15 +579,21 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn) struct smp_chan *smp; smp = kzalloc(sizeof(*smp), GFP_ATOMIC); - if (!smp) + if (!smp) { + clear_bit(HCI_CONN_LE_SMP_PEND, &conn->hcon->flags); return NULL; + } - INIT_WORK(&smp->confirm, confirm_work); - INIT_WORK(&smp->random, random_work); + smp->tfm_aes = crypto_alloc_blkcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(smp->tfm_aes)) { + BT_ERR("Unable to create ECB crypto context"); + kfree(smp); + clear_bit(HCI_CONN_LE_SMP_PEND, &conn->hcon->flags); + return NULL; + } smp->conn = conn; conn->smp_chan = smp; - conn->hcon->smp_conn = conn; hci_conn_hold(conn->hcon); @@ -583,12 +607,14 @@ void smp_chan_destroy(struct l2cap_conn *conn) BUG_ON(!smp); - complete = test_bit(SMP_FLAG_COMPLETE, &smp->smp_flags); + complete = test_bit(SMP_FLAG_COMPLETE, &smp->flags); mgmt_smp_complete(conn->hcon, complete); kfree(smp->csrk); kfree(smp->slave_csrk); + crypto_free_blkcipher(smp->tfm_aes); + /* If pairing failed clean up any keys we might have */ if (!complete) { if (smp->ltk) { @@ -609,19 +635,18 @@ void smp_chan_destroy(struct l2cap_conn *conn) kfree(smp); conn->smp_chan = NULL; - conn->hcon->smp_conn = NULL; hci_conn_drop(conn->hcon); } int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey) { - struct l2cap_conn *conn = hcon->smp_conn; + struct l2cap_conn *conn = hcon->l2cap_data; struct smp_chan *smp; u32 value; BT_DBG(""); - if (!conn) + if (!conn || !test_bit(HCI_CONN_LE_SMP_PEND, &hcon->flags)) return -ENOTCONN; smp = conn->smp_chan; @@ -634,7 +659,7 @@ int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey) put_unaligned_le32(value, smp->tk); /* Fall Through */ case MGMT_OP_USER_CONFIRM_REPLY: - set_bit(SMP_FLAG_TK_VALID, &smp->smp_flags); + set_bit(SMP_FLAG_TK_VALID, &smp->flags); break; case MGMT_OP_USER_PASSKEY_NEG_REPLY: case MGMT_OP_USER_CONFIRM_NEG_REPLY: @@ -646,8 +671,11 @@ int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey) } /* If it is our turn to send Pairing Confirm, do so now */ - if (test_bit(SMP_FLAG_CFM_PENDING, &smp->smp_flags)) - queue_work(hcon->hdev->workqueue, &smp->confirm); + if (test_bit(SMP_FLAG_CFM_PENDING, &smp->flags)) { + u8 rsp = smp_confirm(smp); + if (rsp) + smp_failure(conn, rsp); + } return 0; } @@ -655,17 +683,17 @@ int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey) static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) { struct smp_cmd_pairing rsp, *req = (void *) skb->data; + struct hci_dev *hdev = conn->hcon->hdev; struct smp_chan *smp; - u8 key_size; - u8 auth = SMP_AUTH_NONE; + u8 key_size, auth, sec_level; int ret; BT_DBG("conn %p", conn); if (skb->len < sizeof(*req)) - return SMP_UNSPECIFIED; + return SMP_INVALID_PARAMS; - if (conn->hcon->link_mode & HCI_LM_MASTER) + if (conn->hcon->role != HCI_ROLE_SLAVE) return SMP_CMD_NOTSUPP; if (!test_and_set_bit(HCI_CONN_LE_SMP_PEND, &conn->hcon->flags)) @@ -676,15 +704,30 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) if (!smp) return SMP_UNSPECIFIED; + if (!test_bit(HCI_BONDABLE, &hdev->dev_flags) && + (req->auth_req & SMP_AUTH_BONDING)) + return SMP_PAIRING_NOTSUPP; + smp->preq[0] = SMP_CMD_PAIRING_REQ; memcpy(&smp->preq[1], req, sizeof(*req)); skb_pull(skb, sizeof(*req)); /* We didn't start the pairing, so match remote */ - if (req->auth_req & SMP_AUTH_BONDING) - auth = req->auth_req; + auth = req->auth_req; + + sec_level = authreq_to_seclevel(auth); + if (sec_level > conn->hcon->pending_sec_level) + conn->hcon->pending_sec_level = sec_level; - conn->hcon->pending_sec_level = authreq_to_seclevel(auth); + /* If we need MITM check that it can be acheived */ + if (conn->hcon->pending_sec_level >= BT_SECURITY_HIGH) { + u8 method; + + method = get_auth_method(smp, conn->hcon->io_capability, + req->io_capability); + if (method == JUST_WORKS || method == JUST_CFM) + return SMP_AUTH_REQUIREMENTS; + } build_pairing_cmd(conn, req, &rsp, auth); @@ -704,8 +747,6 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) if (ret) return SMP_UNSPECIFIED; - clear_bit(SMP_FLAG_INITIATOR, &smp->smp_flags); - return 0; } @@ -713,16 +754,15 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb) { struct smp_cmd_pairing *req, *rsp = (void *) skb->data; struct smp_chan *smp = conn->smp_chan; - struct hci_dev *hdev = conn->hcon->hdev; u8 key_size, auth = SMP_AUTH_NONE; int ret; BT_DBG("conn %p", conn); if (skb->len < sizeof(*rsp)) - return SMP_UNSPECIFIED; + return SMP_INVALID_PARAMS; - if (!(conn->hcon->link_mode & HCI_LM_MASTER)) + if (conn->hcon->role != HCI_ROLE_MASTER) return SMP_CMD_NOTSUPP; skb_pull(skb, sizeof(*rsp)); @@ -733,6 +773,16 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb) if (check_enc_key_size(conn, key_size)) return SMP_ENC_KEY_SIZE; + /* If we need MITM check that it can be acheived */ + if (conn->hcon->pending_sec_level >= BT_SECURITY_HIGH) { + u8 method; + + method = get_auth_method(smp, req->io_capability, + rsp->io_capability); + if (method == JUST_WORKS || method == JUST_CFM) + return SMP_AUTH_REQUIREMENTS; + } + get_random_bytes(smp->prnd, sizeof(smp->prnd)); smp->prsp[0] = SMP_CMD_PAIRING_RSP; @@ -753,11 +803,11 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb) if (ret) return SMP_UNSPECIFIED; - set_bit(SMP_FLAG_CFM_PENDING, &smp->smp_flags); + set_bit(SMP_FLAG_CFM_PENDING, &smp->flags); /* Can't compose response until we have been confirmed */ - if (test_bit(SMP_FLAG_TK_VALID, &smp->smp_flags)) - queue_work(hdev->workqueue, &smp->confirm); + if (test_bit(SMP_FLAG_TK_VALID, &smp->flags)) + return smp_confirm(smp); return 0; } @@ -765,12 +815,11 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb) static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb) { struct smp_chan *smp = conn->smp_chan; - struct hci_dev *hdev = conn->hcon->hdev; BT_DBG("conn %p %s", conn, conn->hcon->out ? "master" : "slave"); if (skb->len < sizeof(smp->pcnf)) - return SMP_UNSPECIFIED; + return SMP_INVALID_PARAMS; memcpy(smp->pcnf, skb->data, sizeof(smp->pcnf)); skb_pull(skb, sizeof(smp->pcnf)); @@ -778,10 +827,10 @@ static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb) if (conn->hcon->out) smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); - else if (test_bit(SMP_FLAG_TK_VALID, &smp->smp_flags)) - queue_work(hdev->workqueue, &smp->confirm); + else if (test_bit(SMP_FLAG_TK_VALID, &smp->flags)) + return smp_confirm(smp); else - set_bit(SMP_FLAG_CFM_PENDING, &smp->smp_flags); + set_bit(SMP_FLAG_CFM_PENDING, &smp->flags); return 0; } @@ -789,41 +838,63 @@ static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb) static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) { struct smp_chan *smp = conn->smp_chan; - struct hci_dev *hdev = conn->hcon->hdev; BT_DBG("conn %p", conn); if (skb->len < sizeof(smp->rrnd)) - return SMP_UNSPECIFIED; + return SMP_INVALID_PARAMS; memcpy(smp->rrnd, skb->data, sizeof(smp->rrnd)); skb_pull(skb, sizeof(smp->rrnd)); - queue_work(hdev->workqueue, &smp->random); - - return 0; + return smp_random(smp); } -static u8 smp_ltk_encrypt(struct l2cap_conn *conn, u8 sec_level) +static bool smp_ltk_encrypt(struct l2cap_conn *conn, u8 sec_level) { struct smp_ltk *key; struct hci_conn *hcon = conn->hcon; key = hci_find_ltk_by_addr(hcon->hdev, &hcon->dst, hcon->dst_type, - hcon->out); + hcon->role); if (!key) - return 0; + return false; if (sec_level > BT_SECURITY_MEDIUM && !key->authenticated) - return 0; + return false; if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &hcon->flags)) - return 1; + return true; hci_le_start_enc(hcon, key->ediv, key->rand, key->val); hcon->enc_key_size = key->enc_size; - return 1; + /* We never store STKs for master role, so clear this flag */ + clear_bit(HCI_CONN_STK_ENCRYPT, &hcon->flags); + + return true; +} + +bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level) +{ + if (sec_level == BT_SECURITY_LOW) + return true; + + /* If we're encrypted with an STK always claim insufficient + * security. This way we allow the connection to be re-encrypted + * with an LTK, even if the LTK provides the same level of + * security. Only exception is if we don't have an LTK (e.g. + * because of key distribution bits). + */ + if (test_bit(HCI_CONN_STK_ENCRYPT, &hcon->flags) && + hci_find_ltk_by_addr(hcon->hdev, &hcon->dst, hcon->dst_type, + hcon->role)) + return false; + + if (hcon->sec_level >= sec_level) + return true; + + return false; } static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) @@ -832,16 +903,22 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) struct smp_cmd_pairing cp; struct hci_conn *hcon = conn->hcon; struct smp_chan *smp; + u8 sec_level; BT_DBG("conn %p", conn); if (skb->len < sizeof(*rp)) - return SMP_UNSPECIFIED; + return SMP_INVALID_PARAMS; - if (!(conn->hcon->link_mode & HCI_LM_MASTER)) + if (hcon->role != HCI_ROLE_MASTER) return SMP_CMD_NOTSUPP; - hcon->pending_sec_level = authreq_to_seclevel(rp->auth_req); + sec_level = authreq_to_seclevel(rp->auth_req); + if (smp_sufficient_security(hcon, sec_level)) + return 0; + + if (sec_level > hcon->pending_sec_level) + hcon->pending_sec_level = sec_level; if (smp_ltk_encrypt(conn, hcon->pending_sec_level)) return 0; @@ -850,6 +927,12 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) return 0; smp = smp_chan_create(conn); + if (!smp) + return SMP_UNSPECIFIED; + + if (!test_bit(HCI_BONDABLE, &hcon->hdev->dev_flags) && + (rp->auth_req & SMP_AUTH_BONDING)) + return SMP_PAIRING_NOTSUPP; skb_pull(skb, sizeof(*rp)); @@ -861,22 +944,9 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) smp_send_cmd(conn, SMP_CMD_PAIRING_REQ, sizeof(cp), &cp); - clear_bit(SMP_FLAG_INITIATOR, &smp->smp_flags); - return 0; } -bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level) -{ - if (sec_level == BT_SECURITY_LOW) - return true; - - if (hcon->sec_level >= sec_level) - return true; - - return false; -} - int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) { struct l2cap_conn *conn = hcon->l2cap_data; @@ -895,9 +965,12 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) if (smp_sufficient_security(hcon, sec_level)) return 1; - if (hcon->link_mode & HCI_LM_MASTER) - if (smp_ltk_encrypt(conn, sec_level)) - goto done; + if (sec_level > hcon->pending_sec_level) + hcon->pending_sec_level = sec_level; + + if (hcon->role == HCI_ROLE_MASTER) + if (smp_ltk_encrypt(conn, hcon->pending_sec_level)) + return 0; if (test_and_set_bit(HCI_CONN_LE_SMP_PEND, &hcon->flags)) return 0; @@ -908,13 +981,14 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) authreq = seclevel_to_authreq(sec_level); - /* hcon->auth_type is set by pair_device in mgmt.c. If the MITM - * flag is set we should also set it for the SMP request. + /* Require MITM if IO Capability allows or the security level + * requires it. */ - if ((hcon->auth_type & 0x01)) + if (hcon->io_capability != HCI_IO_NO_INPUT_OUTPUT || + hcon->pending_sec_level > BT_SECURITY_MEDIUM) authreq |= SMP_AUTH_MITM; - if (hcon->link_mode & HCI_LM_MASTER) { + if (hcon->role == HCI_ROLE_MASTER) { struct smp_cmd_pairing cp; build_pairing_cmd(conn, &cp, NULL, authreq); @@ -928,10 +1002,7 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) smp_send_cmd(conn, SMP_CMD_SECURITY_REQ, sizeof(cp), &cp); } - set_bit(SMP_FLAG_INITIATOR, &smp->smp_flags); - -done: - hcon->pending_sec_level = sec_level; + set_bit(SMP_FLAG_INITIATOR, &smp->flags); return 0; } @@ -944,7 +1015,7 @@ static int smp_cmd_encrypt_info(struct l2cap_conn *conn, struct sk_buff *skb) BT_DBG("conn %p", conn); if (skb->len < sizeof(*rp)) - return SMP_UNSPECIFIED; + return SMP_INVALID_PARAMS; /* Ignore this PDU if it wasn't requested */ if (!(smp->remote_key_dist & SMP_DIST_ENC_KEY)) @@ -969,7 +1040,7 @@ static int smp_cmd_master_ident(struct l2cap_conn *conn, struct sk_buff *skb) BT_DBG("conn %p", conn); if (skb->len < sizeof(*rp)) - return SMP_UNSPECIFIED; + return SMP_INVALID_PARAMS; /* Ignore this PDU if it wasn't requested */ if (!(smp->remote_key_dist & SMP_DIST_ENC_KEY)) @@ -982,7 +1053,7 @@ static int smp_cmd_master_ident(struct l2cap_conn *conn, struct sk_buff *skb) hci_dev_lock(hdev); authenticated = (hcon->sec_level == BT_SECURITY_HIGH); - ltk = hci_add_ltk(hdev, &hcon->dst, hcon->dst_type, HCI_SMP_LTK, + ltk = hci_add_ltk(hdev, &hcon->dst, hcon->dst_type, SMP_LTK, authenticated, smp->tk, smp->enc_key_size, rp->ediv, rp->rand); smp->ltk = ltk; @@ -1001,7 +1072,7 @@ static int smp_cmd_ident_info(struct l2cap_conn *conn, struct sk_buff *skb) BT_DBG(""); if (skb->len < sizeof(*info)) - return SMP_UNSPECIFIED; + return SMP_INVALID_PARAMS; /* Ignore this PDU if it wasn't requested */ if (!(smp->remote_key_dist & SMP_DIST_ID_KEY)) @@ -1025,7 +1096,7 @@ static int smp_cmd_ident_addr_info(struct l2cap_conn *conn, BT_DBG(""); if (skb->len < sizeof(*info)) - return SMP_UNSPECIFIED; + return SMP_INVALID_PARAMS; /* Ignore this PDU if it wasn't requested */ if (!(smp->remote_key_dist & SMP_DIST_ID_KEY)) @@ -1036,6 +1107,8 @@ static int smp_cmd_ident_addr_info(struct l2cap_conn *conn, skb_pull(skb, sizeof(*info)); + hci_dev_lock(hcon->hdev); + /* Strictly speaking the Core Specification (4.1) allows sending * an empty address which would force us to rely on just the IRK * as "identity information". However, since such @@ -1045,8 +1118,7 @@ static int smp_cmd_ident_addr_info(struct l2cap_conn *conn, */ if (!bacmp(&info->bdaddr, BDADDR_ANY)) { BT_ERR("Ignoring IRK with no identity address"); - smp_distribute_keys(conn); - return 0; + goto distribute; } bacpy(&smp->id_addr, &info->bdaddr); @@ -1060,8 +1132,11 @@ static int smp_cmd_ident_addr_info(struct l2cap_conn *conn, smp->remote_irk = hci_add_irk(conn->hcon->hdev, &smp->id_addr, smp->id_addr_type, smp->irk, &rpa); +distribute: smp_distribute_keys(conn); + hci_dev_unlock(hcon->hdev); + return 0; } @@ -1075,7 +1150,7 @@ static int smp_cmd_sign_info(struct l2cap_conn *conn, struct sk_buff *skb) BT_DBG("conn %p", conn); if (skb->len < sizeof(*rp)) - return SMP_UNSPECIFIED; + return SMP_INVALID_PARAMS; /* Ignore this PDU if it wasn't requested */ if (!(smp->remote_key_dist & SMP_DIST_SIGN)) @@ -1117,7 +1192,7 @@ int smp_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) } if (!test_bit(HCI_LE_ENABLED, &hcon->hdev->dev_flags)) { - err = -ENOTSUPP; + err = -EOPNOTSUPP; reason = SMP_PAIRING_NOTSUPP; goto done; } @@ -1135,7 +1210,7 @@ int smp_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) !conn->smp_chan) { BT_ERR("Unexpected SMP command 0x%02x. Disconnecting.", code); kfree_skb(skb); - return -ENOTSUPP; + return -EOPNOTSUPP; } switch (code) { @@ -1219,6 +1294,22 @@ static void smp_notify_keys(struct l2cap_conn *conn) bacpy(&hcon->dst, &smp->remote_irk->bdaddr); hcon->dst_type = smp->remote_irk->addr_type; l2cap_conn_update_id_addr(hcon); + + /* When receiving an indentity resolving key for + * a remote device that does not use a resolvable + * private address, just remove the key so that + * it is possible to use the controller white + * list for scanning. + * + * Userspace will have been told to not store + * this key at this point. So it is safe to + * just remove it. + */ + if (!bacmp(&smp->remote_irk->rpa, BDADDR_ANY)) { + list_del(&smp->remote_irk->list); + kfree(smp->remote_irk); + smp->remote_irk = NULL; + } } /* The LTKs and CSRKs should be persistent only if both sides @@ -1298,7 +1389,7 @@ int smp_distribute_keys(struct l2cap_conn *conn) authenticated = hcon->sec_level == BT_SECURITY_HIGH; ltk = hci_add_ltk(hdev, &hcon->dst, hcon->dst_type, - HCI_SMP_LTK_SLAVE, authenticated, enc.ltk, + SMP_LTK_SLAVE, authenticated, enc.ltk, smp->enc_key_size, ediv, rand); smp->slave_ltk = ltk; @@ -1358,7 +1449,7 @@ int smp_distribute_keys(struct l2cap_conn *conn) clear_bit(HCI_CONN_LE_SMP_PEND, &hcon->flags); cancel_delayed_work_sync(&conn->security_timer); - set_bit(SMP_FLAG_COMPLETE, &smp->smp_flags); + set_bit(SMP_FLAG_COMPLETE, &smp->flags); smp_notify_keys(conn); smp_chan_destroy(conn); diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h index 1277147a9150..796f4f45f92f 100644 --- a/net/bluetooth/smp.h +++ b/net/bluetooth/smp.h @@ -111,37 +111,16 @@ struct smp_cmd_security_req { #define SMP_CMD_NOTSUPP 0x07 #define SMP_UNSPECIFIED 0x08 #define SMP_REPEATED_ATTEMPTS 0x09 +#define SMP_INVALID_PARAMS 0x0a #define SMP_MIN_ENC_KEY_SIZE 7 #define SMP_MAX_ENC_KEY_SIZE 16 -#define SMP_FLAG_TK_VALID 1 -#define SMP_FLAG_CFM_PENDING 2 -#define SMP_FLAG_MITM_AUTH 3 -#define SMP_FLAG_COMPLETE 4 -#define SMP_FLAG_INITIATOR 5 - -struct smp_chan { - struct l2cap_conn *conn; - u8 preq[7]; /* SMP Pairing Request */ - u8 prsp[7]; /* SMP Pairing Response */ - u8 prnd[16]; /* SMP Pairing Random (local) */ - u8 rrnd[16]; /* SMP Pairing Random (remote) */ - u8 pcnf[16]; /* SMP Pairing Confirm */ - u8 tk[16]; /* SMP Temporary Key */ - u8 enc_key_size; - u8 remote_key_dist; - bdaddr_t id_addr; - u8 id_addr_type; - u8 irk[16]; - struct smp_csrk *csrk; - struct smp_csrk *slave_csrk; - struct smp_ltk *ltk; - struct smp_ltk *slave_ltk; - struct smp_irk *remote_irk; - unsigned long smp_flags; - struct work_struct confirm; - struct work_struct random; +/* LTK types used in internal storage (struct smp_ltk) */ +enum { + SMP_STK, + SMP_LTK, + SMP_LTK_SLAVE, }; /* SMP Commands */ diff --git a/net/bridge/Makefile b/net/bridge/Makefile index e85498b2f166..8590b942bffa 100644 --- a/net/bridge/Makefile +++ b/net/bridge/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_BRIDGE) += bridge.o bridge-y := br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \ - br_ioctl.o br_notify.o br_stp.o br_stp_bpdu.o \ + br_ioctl.o br_stp.o br_stp_bpdu.o \ br_stp_if.o br_stp_timer.o br_netlink.o bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o @@ -16,4 +16,4 @@ bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o -obj-$(CONFIG_BRIDGE_NF_EBTABLES) += netfilter/ +obj-$(CONFIG_NETFILTER) += netfilter/ diff --git a/net/bridge/br.c b/net/bridge/br.c index 19311aafcf5a..1a755a1e5410 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -22,6 +22,104 @@ #include "br_private.h" +/* + * Handle changes in state of network devices enslaved to a bridge. + * + * Note: don't care about up/down if bridge itself is down, because + * port state is checked when bridge is brought up. + */ +static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct net_bridge_port *p; + struct net_bridge *br; + bool changed_addr; + int err; + + /* register of bridge completed, add sysfs entries */ + if ((dev->priv_flags & IFF_EBRIDGE) && event == NETDEV_REGISTER) { + br_sysfs_addbr(dev); + return NOTIFY_DONE; + } + + /* not a port of a bridge */ + p = br_port_get_rtnl(dev); + if (!p) + return NOTIFY_DONE; + + br = p->br; + + switch (event) { + case NETDEV_CHANGEMTU: + dev_set_mtu(br->dev, br_min_mtu(br)); + break; + + case NETDEV_CHANGEADDR: + spin_lock_bh(&br->lock); + br_fdb_changeaddr(p, dev->dev_addr); + changed_addr = br_stp_recalculate_bridge_id(br); + spin_unlock_bh(&br->lock); + + if (changed_addr) + call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev); + + break; + + case NETDEV_CHANGE: + br_port_carrier_check(p); + break; + + case NETDEV_FEAT_CHANGE: + netdev_update_features(br->dev); + break; + + case NETDEV_DOWN: + spin_lock_bh(&br->lock); + if (br->dev->flags & IFF_UP) + br_stp_disable_port(p); + spin_unlock_bh(&br->lock); + break; + + case NETDEV_UP: + if (netif_running(br->dev) && netif_oper_up(dev)) { + spin_lock_bh(&br->lock); + br_stp_enable_port(p); + spin_unlock_bh(&br->lock); + } + break; + + case NETDEV_UNREGISTER: + br_del_if(br, dev); + break; + + case NETDEV_CHANGENAME: + err = br_sysfs_renameif(p); + if (err) + return notifier_from_errno(err); + break; + + case NETDEV_PRE_TYPE_CHANGE: + /* Forbid underlaying device to change its type. */ + return NOTIFY_BAD; + + case NETDEV_RESEND_IGMP: + /* Propagate to master device */ + call_netdevice_notifiers(event, br->dev); + break; + } + + /* Events that may cause spanning tree to refresh */ + if (event == NETDEV_CHANGEADDR || event == NETDEV_UP || + event == NETDEV_CHANGE || event == NETDEV_DOWN) + br_ifinfo_notify(RTM_NEWLINK, p); + + return NOTIFY_DONE; +} + +static struct notifier_block br_device_notifier = { + .notifier_call = br_device_event +}; + static void __net_exit br_net_exit(struct net *net) { struct net_device *dev; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 3e2da2cb72db..568cccd39a3d 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -112,6 +112,12 @@ static void br_dev_set_multicast_list(struct net_device *dev) { } +static void br_dev_change_rx_flags(struct net_device *dev, int change) +{ + if (change & IFF_PROMISC) + br_manage_promisc(netdev_priv(dev)); +} + static int br_dev_stop(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); @@ -309,6 +315,7 @@ static const struct net_device_ops br_netdev_ops = { .ndo_get_stats64 = br_get_stats64, .ndo_set_mac_address = br_set_mac_address, .ndo_set_rx_mode = br_dev_set_multicast_list, + .ndo_change_rx_flags = br_dev_change_rx_flags, .ndo_change_mtu = br_change_mtu, .ndo_do_ioctl = br_dev_ioctl, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -348,14 +355,15 @@ void br_dev_setup(struct net_device *dev) dev->netdev_ops = &br_netdev_ops; dev->destructor = br_dev_free; - SET_ETHTOOL_OPS(dev, &br_ethtool_ops); + dev->ethtool_ops = &br_ethtool_ops; SET_NETDEV_DEVTYPE(dev, &br_type); dev->tx_queue_len = 0; dev->priv_flags = IFF_EBRIDGE; dev->features = COMMON_FEATURES | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL | - NETIF_F_HW_VLAN_CTAG_TX; - dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX; + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; + dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX; dev->vlan_features = COMMON_FEATURES; br->dev = dev; @@ -370,6 +378,7 @@ void br_dev_setup(struct net_device *dev) br->stp_enabled = BR_NO_STP; br->group_fwd_mask = BR_GROUPFWD_DEFAULT; + br->group_fwd_mask_required = BR_GROUPFWD_DEFAULT; br->designated_root = br->bridge_id; br->bridge_max_age = br->max_age = 20 * HZ; @@ -380,4 +389,5 @@ void br_dev_setup(struct net_device *dev) br_netfilter_rtable_init(br); br_stp_timer_init(br); br_multicast_init(br); + br_vlan_init(br); } diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 9203d5a1943f..6f6c95cfe8f2 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -85,8 +85,56 @@ static void fdb_rcu_free(struct rcu_head *head) kmem_cache_free(br_fdb_cache, ent); } +/* When a static FDB entry is added, the mac address from the entry is + * added to the bridge private HW address list and all required ports + * are then updated with the new information. + * Called under RTNL. + */ +static void fdb_add_hw(struct net_bridge *br, const unsigned char *addr) +{ + int err; + struct net_bridge_port *p; + + ASSERT_RTNL(); + + list_for_each_entry(p, &br->port_list, list) { + if (!br_promisc_port(p)) { + err = dev_uc_add(p->dev, addr); + if (err) + goto undo; + } + } + + return; +undo: + list_for_each_entry_continue_reverse(p, &br->port_list, list) { + if (!br_promisc_port(p)) + dev_uc_del(p->dev, addr); + } +} + +/* When a static FDB entry is deleted, the HW address from that entry is + * also removed from the bridge private HW address list and updates all + * the ports with needed information. + * Called under RTNL. + */ +static void fdb_del_hw(struct net_bridge *br, const unsigned char *addr) +{ + struct net_bridge_port *p; + + ASSERT_RTNL(); + + list_for_each_entry(p, &br->port_list, list) { + if (!br_promisc_port(p)) + dev_uc_del(p->dev, addr); + } +} + static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f) { + if (f->is_static) + fdb_del_hw(br, f->addr.addr); + hlist_del_rcu(&f->hlist); fdb_notify(br, f, RTM_DELNEIGH); call_rcu(&f->rcu, fdb_rcu_free); @@ -466,6 +514,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, return -ENOMEM; fdb->is_local = fdb->is_static = 1; + fdb_add_hw(br, addr); fdb_notify(br, fdb, RTM_NEWNEIGH); return 0; } @@ -487,6 +536,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, { struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; struct net_bridge_fdb_entry *fdb; + bool fdb_modified = false; /* some users want to always flood. */ if (hold_time(br) == 0) @@ -507,10 +557,15 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, source->dev->name); } else { /* fastpath: update of existing entry */ - fdb->dst = source; + if (unlikely(source != fdb->dst)) { + fdb->dst = source; + fdb_modified = true; + } fdb->updated = jiffies; if (unlikely(added_by_user)) fdb->added_by_user = 1; + if (unlikely(fdb_modified)) + fdb_notify(br, fdb, RTM_NEWNEIGH); } } else { spin_lock(&br->hash_lock); @@ -565,6 +620,8 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->addr)) goto nla_put_failure; + if (nla_put_u32(skb, NDA_MASTER, br->dev->ifindex)) + goto nla_put_failure; ci.ndm_used = jiffies_to_clock_t(now - fdb->used); ci.ndm_confirmed = 0; ci.ndm_updated = jiffies_to_clock_t(now - fdb->updated); @@ -572,7 +629,7 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) goto nla_put_failure; - if (nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id)) + if (fdb->vlan_id && nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id)) goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -586,6 +643,7 @@ static inline size_t fdb_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN) /* NDA_LLADDR */ + + nla_total_size(sizeof(u32)) /* NDA_MASTER */ + nla_total_size(sizeof(u16)) /* NDA_VLAN */ + nla_total_size(sizeof(struct nda_cacheinfo)); } @@ -618,6 +676,7 @@ errout: int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, + struct net_device *filter_dev, int idx) { struct net_bridge *br = netdev_priv(dev); @@ -633,6 +692,19 @@ int br_fdb_dump(struct sk_buff *skb, if (idx < cb->args[0]) goto skip; + if (filter_dev && + (!f->dst || f->dst->dev != filter_dev)) { + if (filter_dev != dev) + goto skip; + /* !f->dst is a speacial case for bridge + * It means the MAC belongs to the bridge + * Therefore need a little more filtering + * we only want to dump the !f->dst case + */ + if (f->dst) + goto skip; + } + if (fdb_fill_info(skb, br, f, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, @@ -678,13 +750,25 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr, } if (fdb_to_nud(fdb) != state) { - if (state & NUD_PERMANENT) - fdb->is_local = fdb->is_static = 1; - else if (state & NUD_NOARP) { + if (state & NUD_PERMANENT) { + fdb->is_local = 1; + if (!fdb->is_static) { + fdb->is_static = 1; + fdb_add_hw(br, addr); + } + } else if (state & NUD_NOARP) { fdb->is_local = 0; - fdb->is_static = 1; - } else - fdb->is_local = fdb->is_static = 0; + if (!fdb->is_static) { + fdb->is_static = 1; + fdb_add_hw(br, addr); + } + } else { + fdb->is_local = 0; + if (fdb->is_static) { + fdb->is_static = 0; + fdb_del_hw(br, addr); + } + } modified = true; } @@ -874,3 +958,59 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], out: return err; } + +int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p) +{ + struct net_bridge_fdb_entry *fdb, *tmp; + int i; + int err; + + ASSERT_RTNL(); + + for (i = 0; i < BR_HASH_SIZE; i++) { + hlist_for_each_entry(fdb, &br->hash[i], hlist) { + /* We only care for static entries */ + if (!fdb->is_static) + continue; + + err = dev_uc_add(p->dev, fdb->addr.addr); + if (err) + goto rollback; + } + } + return 0; + +rollback: + for (i = 0; i < BR_HASH_SIZE; i++) { + hlist_for_each_entry(tmp, &br->hash[i], hlist) { + /* If we reached the fdb that failed, we can stop */ + if (tmp == fdb) + break; + + /* We only care for static entries */ + if (!tmp->is_static) + continue; + + dev_uc_del(p->dev, tmp->addr.addr); + } + } + return err; +} + +void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p) +{ + struct net_bridge_fdb_entry *fdb; + int i; + + ASSERT_RTNL(); + + for (i = 0; i < BR_HASH_SIZE; i++) { + hlist_for_each_entry_rcu(fdb, &br->hash[i], hlist) { + /* We only care for static entries */ + if (!fdb->is_static) + continue; + + dev_uc_del(p->dev, fdb->addr.addr); + } + } +} diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 5262b8617eb9..078d336a1f37 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -85,6 +85,111 @@ void br_port_carrier_check(struct net_bridge_port *p) spin_unlock_bh(&br->lock); } +static void br_port_set_promisc(struct net_bridge_port *p) +{ + int err = 0; + + if (br_promisc_port(p)) + return; + + err = dev_set_promiscuity(p->dev, 1); + if (err) + return; + + br_fdb_unsync_static(p->br, p); + p->flags |= BR_PROMISC; +} + +static void br_port_clear_promisc(struct net_bridge_port *p) +{ + int err; + + /* Check if the port is already non-promisc or if it doesn't + * support UNICAST filtering. Without unicast filtering support + * we'll end up re-enabling promisc mode anyway, so just check for + * it here. + */ + if (!br_promisc_port(p) || !(p->dev->priv_flags & IFF_UNICAST_FLT)) + return; + + /* Since we'll be clearing the promisc mode, program the port + * first so that we don't have interruption in traffic. + */ + err = br_fdb_sync_static(p->br, p); + if (err) + return; + + dev_set_promiscuity(p->dev, -1); + p->flags &= ~BR_PROMISC; +} + +/* When a port is added or removed or when certain port flags + * change, this function is called to automatically manage + * promiscuity setting of all the bridge ports. We are always called + * under RTNL so can skip using rcu primitives. + */ +void br_manage_promisc(struct net_bridge *br) +{ + struct net_bridge_port *p; + bool set_all = false; + + /* If vlan filtering is disabled or bridge interface is placed + * into promiscuous mode, place all ports in promiscuous mode. + */ + if ((br->dev->flags & IFF_PROMISC) || !br_vlan_enabled(br)) + set_all = true; + + list_for_each_entry(p, &br->port_list, list) { + if (set_all) { + br_port_set_promisc(p); + } else { + /* If the number of auto-ports is <= 1, then all other + * ports will have their output configuration + * statically specified through fdbs. Since ingress + * on the auto-port becomes forwarding/egress to other + * ports and egress configuration is statically known, + * we can say that ingress configuration of the + * auto-port is also statically known. + * This lets us disable promiscuous mode and write + * this config to hw. + */ + if (br->auto_cnt == 0 || + (br->auto_cnt == 1 && br_auto_port(p))) + br_port_clear_promisc(p); + else + br_port_set_promisc(p); + } + } +} + +static void nbp_update_port_count(struct net_bridge *br) +{ + struct net_bridge_port *p; + u32 cnt = 0; + + list_for_each_entry(p, &br->port_list, list) { + if (br_auto_port(p)) + cnt++; + } + if (br->auto_cnt != cnt) { + br->auto_cnt = cnt; + br_manage_promisc(br); + } +} + +static void nbp_delete_promisc(struct net_bridge_port *p) +{ + /* If port is currently promiscuous, unset promiscuity. + * Otherwise, it is a static port so remove all addresses + * from it. + */ + dev_set_allmulti(p->dev, -1); + if (br_promisc_port(p)) + dev_set_promiscuity(p->dev, -1); + else + br_fdb_unsync_static(p->br, p); +} + static void release_nbp(struct kobject *kobj) { struct net_bridge_port *p @@ -133,7 +238,7 @@ static void del_nbp(struct net_bridge_port *p) sysfs_remove_link(br->ifobj, p->dev->name); - dev_set_promiscuity(dev, -1); + nbp_delete_promisc(p); spin_lock_bh(&br->lock); br_stp_disable_port(p); @@ -141,10 +246,11 @@ static void del_nbp(struct net_bridge_port *p) br_ifinfo_notify(RTM_DELLINK, p); + list_del_rcu(&p->list); + nbp_vlan_flush(p); br_fdb_delete_by_port(br, p, 1); - - list_del_rcu(&p->list); + nbp_update_port_count(br); dev->priv_flags &= ~IFF_BRIDGE_PORT; @@ -238,7 +344,7 @@ int br_add_bridge(struct net *net, const char *name) struct net_device *dev; int res; - dev = alloc_netdev(sizeof(struct net_bridge), name, + dev = alloc_netdev(sizeof(struct net_bridge), name, NET_NAME_UNKNOWN, br_dev_setup); if (!dev) @@ -353,7 +459,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) call_netdevice_notifiers(NETDEV_JOIN, dev); - err = dev_set_promiscuity(dev, 1); + err = dev_set_allmulti(dev, 1); if (err) goto put_back; @@ -384,6 +490,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) list_add_rcu(&p->list, &br->port_list); + nbp_update_port_count(br); + netdev_update_features(br->dev); if (br->dev->needed_headroom < dev->needed_headroom) @@ -421,7 +529,7 @@ err2: kobject_put(&p->kobj); p = NULL; /* kobject_put frees */ err1: - dev_set_promiscuity(dev, -1); + dev_set_allmulti(dev, -1); put_back: dev_put(dev); kfree(p); @@ -455,3 +563,11 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) return 0; } + +void br_port_flags_change(struct net_bridge_port *p, unsigned long mask) +{ + struct net_bridge *br = p->br; + + if (mask & BR_AUTO_MASK) + nbp_update_port_count(br); +} diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 7985deaff52f..366c43649079 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -147,8 +147,8 @@ static int br_handle_local_finish(struct sk_buff *skb) struct net_bridge_port *p = br_port_get_rcu(skb->dev); u16 vid = 0; - br_vlan_get_tag(skb, &vid); - if (p->flags & BR_LEARNING) + /* check if vlan is allowed, to avoid spoofing */ + if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid)) br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false); return 0; /* process further */ } @@ -177,6 +177,8 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) p = br_port_get_rcu(skb->dev); if (unlikely(is_link_local_ether_addr(dest))) { + u16 fwd_mask = p->br->group_fwd_mask_required; + /* * See IEEE 802.1D Table 7-10 Reserved addresses * @@ -194,7 +196,8 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) case 0x00: /* Bridge Group Address */ /* If STP is turned off, then must forward to keep loop detection */ - if (p->br->stp_enabled == BR_NO_STP) + if (p->br->stp_enabled == BR_NO_STP || + fwd_mask & (1u << dest[5])) goto forward; break; @@ -203,7 +206,8 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) default: /* Allow selective forwarding for most other protocols */ - if (p->br->group_fwd_mask & (1u << dest[5])) + fwd_mask |= p->br->group_fwd_mask; + if (fwd_mask & (1u << dest[5])) goto forward; } diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index b7b1914dfa25..5df05269d17a 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -418,13 +418,13 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) ip.proto = entry->addr.proto; if (ip.proto == htons(ETH_P_IP)) { - if (timer_pending(&br->ip4_querier.timer)) + if (timer_pending(&br->ip4_other_query.timer)) return -EBUSY; ip.u.ip4 = entry->addr.u.ip4; #if IS_ENABLED(CONFIG_IPV6) } else { - if (timer_pending(&br->ip6_querier.timer)) + if (timer_pending(&br->ip6_other_query.timer)) return -EBUSY; ip.u.ip6 = entry->addr.u.ip6; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 7b757b5dc773..b4845f4b2bb4 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -11,6 +11,7 @@ */ #include <linux/err.h> +#include <linux/export.h> #include <linux/if_ether.h> #include <linux/igmp.h> #include <linux/jhash.h> @@ -35,7 +36,7 @@ #include "br_private.h" static void br_multicast_start_querier(struct net_bridge *br, - struct bridge_mcast_query *query); + struct bridge_mcast_own_query *query); unsigned int br_mdb_rehash_seq; static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b) @@ -761,7 +762,7 @@ static void br_multicast_local_router_expired(unsigned long data) } static void br_multicast_querier_expired(struct net_bridge *br, - struct bridge_mcast_query *query) + struct bridge_mcast_own_query *query) { spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || br->multicast_disabled) @@ -777,7 +778,7 @@ static void br_ip4_multicast_querier_expired(unsigned long data) { struct net_bridge *br = (void *)data; - br_multicast_querier_expired(br, &br->ip4_query); + br_multicast_querier_expired(br, &br->ip4_own_query); } #if IS_ENABLED(CONFIG_IPV6) @@ -785,10 +786,22 @@ static void br_ip6_multicast_querier_expired(unsigned long data) { struct net_bridge *br = (void *)data; - br_multicast_querier_expired(br, &br->ip6_query); + br_multicast_querier_expired(br, &br->ip6_own_query); } #endif +static void br_multicast_select_own_querier(struct net_bridge *br, + struct br_ip *ip, + struct sk_buff *skb) +{ + if (ip->proto == htons(ETH_P_IP)) + br->ip4_querier.addr.u.ip4 = ip_hdr(skb)->saddr; +#if IS_ENABLED(CONFIG_IPV6) + else + br->ip6_querier.addr.u.ip6 = ipv6_hdr(skb)->saddr; +#endif +} + static void __br_multicast_send_query(struct net_bridge *br, struct net_bridge_port *port, struct br_ip *ip) @@ -804,17 +817,19 @@ static void __br_multicast_send_query(struct net_bridge *br, skb->dev = port->dev; NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, dev_queue_xmit); - } else + } else { + br_multicast_select_own_querier(br, ip, skb); netif_rx(skb); + } } static void br_multicast_send_query(struct net_bridge *br, struct net_bridge_port *port, - struct bridge_mcast_query *query) + struct bridge_mcast_own_query *own_query) { unsigned long time; struct br_ip br_group; - struct bridge_mcast_querier *querier = NULL; + struct bridge_mcast_other_query *other_query = NULL; if (!netif_running(br->dev) || br->multicast_disabled || !br->multicast_querier) @@ -822,31 +837,32 @@ static void br_multicast_send_query(struct net_bridge *br, memset(&br_group.u, 0, sizeof(br_group.u)); - if (port ? (query == &port->ip4_query) : - (query == &br->ip4_query)) { - querier = &br->ip4_querier; + if (port ? (own_query == &port->ip4_own_query) : + (own_query == &br->ip4_own_query)) { + other_query = &br->ip4_other_query; br_group.proto = htons(ETH_P_IP); #if IS_ENABLED(CONFIG_IPV6) } else { - querier = &br->ip6_querier; + other_query = &br->ip6_other_query; br_group.proto = htons(ETH_P_IPV6); #endif } - if (!querier || timer_pending(&querier->timer)) + if (!other_query || timer_pending(&other_query->timer)) return; __br_multicast_send_query(br, port, &br_group); time = jiffies; - time += query->startup_sent < br->multicast_startup_query_count ? + time += own_query->startup_sent < br->multicast_startup_query_count ? br->multicast_startup_query_interval : br->multicast_query_interval; - mod_timer(&query->timer, time); + mod_timer(&own_query->timer, time); } -static void br_multicast_port_query_expired(struct net_bridge_port *port, - struct bridge_mcast_query *query) +static void +br_multicast_port_query_expired(struct net_bridge_port *port, + struct bridge_mcast_own_query *query) { struct net_bridge *br = port->br; @@ -868,7 +884,7 @@ static void br_ip4_multicast_port_query_expired(unsigned long data) { struct net_bridge_port *port = (void *)data; - br_multicast_port_query_expired(port, &port->ip4_query); + br_multicast_port_query_expired(port, &port->ip4_own_query); } #if IS_ENABLED(CONFIG_IPV6) @@ -876,7 +892,7 @@ static void br_ip6_multicast_port_query_expired(unsigned long data) { struct net_bridge_port *port = (void *)data; - br_multicast_port_query_expired(port, &port->ip6_query); + br_multicast_port_query_expired(port, &port->ip6_own_query); } #endif @@ -886,11 +902,11 @@ void br_multicast_add_port(struct net_bridge_port *port) setup_timer(&port->multicast_router_timer, br_multicast_router_expired, (unsigned long)port); - setup_timer(&port->ip4_query.timer, br_ip4_multicast_port_query_expired, - (unsigned long)port); + setup_timer(&port->ip4_own_query.timer, + br_ip4_multicast_port_query_expired, (unsigned long)port); #if IS_ENABLED(CONFIG_IPV6) - setup_timer(&port->ip6_query.timer, br_ip6_multicast_port_query_expired, - (unsigned long)port); + setup_timer(&port->ip6_own_query.timer, + br_ip6_multicast_port_query_expired, (unsigned long)port); #endif } @@ -899,7 +915,7 @@ void br_multicast_del_port(struct net_bridge_port *port) del_timer_sync(&port->multicast_router_timer); } -static void br_multicast_enable(struct bridge_mcast_query *query) +static void br_multicast_enable(struct bridge_mcast_own_query *query) { query->startup_sent = 0; @@ -916,9 +932,9 @@ void br_multicast_enable_port(struct net_bridge_port *port) if (br->multicast_disabled || !netif_running(br->dev)) goto out; - br_multicast_enable(&port->ip4_query); + br_multicast_enable(&port->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) - br_multicast_enable(&port->ip6_query); + br_multicast_enable(&port->ip6_own_query); #endif out: @@ -938,9 +954,9 @@ void br_multicast_disable_port(struct net_bridge_port *port) if (!hlist_unhashed(&port->rlist)) hlist_del_init_rcu(&port->rlist); del_timer(&port->multicast_router_timer); - del_timer(&port->ip4_query.timer); + del_timer(&port->ip4_own_query.timer); #if IS_ENABLED(CONFIG_IPV6) - del_timer(&port->ip6_query.timer); + del_timer(&port->ip6_own_query.timer); #endif spin_unlock(&br->multicast_lock); } @@ -1064,15 +1080,80 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, } #endif +static bool br_ip4_multicast_select_querier(struct net_bridge *br, + struct net_bridge_port *port, + __be32 saddr) +{ + if (!timer_pending(&br->ip4_own_query.timer) && + !timer_pending(&br->ip4_other_query.timer)) + goto update; + + if (!br->ip4_querier.addr.u.ip4) + goto update; + + if (ntohl(saddr) <= ntohl(br->ip4_querier.addr.u.ip4)) + goto update; + + return false; + +update: + br->ip4_querier.addr.u.ip4 = saddr; + + /* update protected by general multicast_lock by caller */ + rcu_assign_pointer(br->ip4_querier.port, port); + + return true; +} + +#if IS_ENABLED(CONFIG_IPV6) +static bool br_ip6_multicast_select_querier(struct net_bridge *br, + struct net_bridge_port *port, + struct in6_addr *saddr) +{ + if (!timer_pending(&br->ip6_own_query.timer) && + !timer_pending(&br->ip6_other_query.timer)) + goto update; + + if (ipv6_addr_cmp(saddr, &br->ip6_querier.addr.u.ip6) <= 0) + goto update; + + return false; + +update: + br->ip6_querier.addr.u.ip6 = *saddr; + + /* update protected by general multicast_lock by caller */ + rcu_assign_pointer(br->ip6_querier.port, port); + + return true; +} +#endif + +static bool br_multicast_select_querier(struct net_bridge *br, + struct net_bridge_port *port, + struct br_ip *saddr) +{ + switch (saddr->proto) { + case htons(ETH_P_IP): + return br_ip4_multicast_select_querier(br, port, saddr->u.ip4); +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + return br_ip6_multicast_select_querier(br, port, &saddr->u.ip6); +#endif + } + + return false; +} + static void -br_multicast_update_querier_timer(struct net_bridge *br, - struct bridge_mcast_querier *querier, - unsigned long max_delay) +br_multicast_update_query_timer(struct net_bridge *br, + struct bridge_mcast_other_query *query, + unsigned long max_delay) { - if (!timer_pending(&querier->timer)) - querier->delay_time = jiffies + max_delay; + if (!timer_pending(&query->timer)) + query->delay_time = jiffies + max_delay; - mod_timer(&querier->timer, jiffies + br->multicast_querier_interval); + mod_timer(&query->timer, jiffies + br->multicast_querier_interval); } /* @@ -1125,16 +1206,14 @@ timer: static void br_multicast_query_received(struct net_bridge *br, struct net_bridge_port *port, - struct bridge_mcast_querier *querier, - int saddr, - bool is_general_query, + struct bridge_mcast_other_query *query, + struct br_ip *saddr, unsigned long max_delay) { - if (saddr && is_general_query) - br_multicast_update_querier_timer(br, querier, max_delay); - else if (timer_pending(&querier->timer)) + if (!br_multicast_select_querier(br, port, saddr)) return; + br_multicast_update_query_timer(br, query, max_delay); br_multicast_mark_router(br, port); } @@ -1149,6 +1228,7 @@ static int br_ip4_multicast_query(struct net_bridge *br, struct igmpv3_query *ih3; struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; + struct br_ip saddr; unsigned long max_delay; unsigned long now = jiffies; __be32 group; @@ -1190,11 +1270,14 @@ static int br_ip4_multicast_query(struct net_bridge *br, goto out; } - br_multicast_query_received(br, port, &br->ip4_querier, !!iph->saddr, - !group, max_delay); + if (!group) { + saddr.proto = htons(ETH_P_IP); + saddr.u.ip4 = iph->saddr; - if (!group) + br_multicast_query_received(br, port, &br->ip4_other_query, + &saddr, max_delay); goto out; + } mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group, vid); if (!mp) @@ -1234,6 +1317,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, struct mld2_query *mld2q; struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; + struct br_ip saddr; unsigned long max_delay; unsigned long now = jiffies; const struct in6_addr *group = NULL; @@ -1282,12 +1366,16 @@ static int br_ip6_multicast_query(struct net_bridge *br, goto out; } - br_multicast_query_received(br, port, &br->ip6_querier, - !ipv6_addr_any(&ip6h->saddr), - is_general_query, max_delay); + if (is_general_query) { + saddr.proto = htons(ETH_P_IPV6); + saddr.u.ip6 = ip6h->saddr; - if (!group) + br_multicast_query_received(br, port, &br->ip6_other_query, + &saddr, max_delay); goto out; + } else if (!group) { + goto out; + } mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group, vid); if (!mp) @@ -1315,11 +1403,12 @@ out: } #endif -static void br_multicast_leave_group(struct net_bridge *br, - struct net_bridge_port *port, - struct br_ip *group, - struct bridge_mcast_querier *querier, - struct bridge_mcast_query *query) +static void +br_multicast_leave_group(struct net_bridge *br, + struct net_bridge_port *port, + struct br_ip *group, + struct bridge_mcast_other_query *other_query, + struct bridge_mcast_own_query *own_query) { struct net_bridge_mdb_htable *mdb; struct net_bridge_mdb_entry *mp; @@ -1330,7 +1419,7 @@ static void br_multicast_leave_group(struct net_bridge *br, spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || (port && port->state == BR_STATE_DISABLED) || - timer_pending(&querier->timer)) + timer_pending(&other_query->timer)) goto out; mdb = mlock_dereference(br->mdb, br); @@ -1344,7 +1433,7 @@ static void br_multicast_leave_group(struct net_bridge *br, time = jiffies + br->multicast_last_member_count * br->multicast_last_member_interval; - mod_timer(&query->timer, time); + mod_timer(&own_query->timer, time); for (p = mlock_dereference(mp->ports, br); p != NULL; @@ -1425,17 +1514,19 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br, __u16 vid) { struct br_ip br_group; - struct bridge_mcast_query *query = port ? &port->ip4_query : - &br->ip4_query; + struct bridge_mcast_own_query *own_query; if (ipv4_is_local_multicast(group)) return; + own_query = port ? &port->ip4_own_query : &br->ip4_own_query; + br_group.u.ip4 = group; br_group.proto = htons(ETH_P_IP); br_group.vid = vid; - br_multicast_leave_group(br, port, &br_group, &br->ip4_querier, query); + br_multicast_leave_group(br, port, &br_group, &br->ip4_other_query, + own_query); } #if IS_ENABLED(CONFIG_IPV6) @@ -1445,18 +1536,19 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br, __u16 vid) { struct br_ip br_group; - struct bridge_mcast_query *query = port ? &port->ip6_query : - &br->ip6_query; - + struct bridge_mcast_own_query *own_query; if (ipv6_addr_is_ll_all_nodes(group)) return; + own_query = port ? &port->ip6_own_query : &br->ip6_own_query; + br_group.u.ip6 = *group; br_group.proto = htons(ETH_P_IPV6); br_group.vid = vid; - br_multicast_leave_group(br, port, &br_group, &br->ip6_querier, query); + br_multicast_leave_group(br, port, &br_group, &br->ip6_other_query, + own_query); } #endif @@ -1723,12 +1815,14 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, } static void br_multicast_query_expired(struct net_bridge *br, - struct bridge_mcast_query *query) + struct bridge_mcast_own_query *query, + struct bridge_mcast_querier *querier) { spin_lock(&br->multicast_lock); if (query->startup_sent < br->multicast_startup_query_count) query->startup_sent++; + rcu_assign_pointer(querier, NULL); br_multicast_send_query(br, NULL, query); spin_unlock(&br->multicast_lock); } @@ -1737,7 +1831,7 @@ static void br_ip4_multicast_query_expired(unsigned long data) { struct net_bridge *br = (void *)data; - br_multicast_query_expired(br, &br->ip4_query); + br_multicast_query_expired(br, &br->ip4_own_query, &br->ip4_querier); } #if IS_ENABLED(CONFIG_IPV6) @@ -1745,7 +1839,7 @@ static void br_ip6_multicast_query_expired(unsigned long data) { struct net_bridge *br = (void *)data; - br_multicast_query_expired(br, &br->ip6_query); + br_multicast_query_expired(br, &br->ip6_own_query, &br->ip6_querier); } #endif @@ -1767,28 +1861,30 @@ void br_multicast_init(struct net_bridge *br) br->multicast_querier_interval = 255 * HZ; br->multicast_membership_interval = 260 * HZ; - br->ip4_querier.delay_time = 0; + br->ip4_other_query.delay_time = 0; + br->ip4_querier.port = NULL; #if IS_ENABLED(CONFIG_IPV6) - br->ip6_querier.delay_time = 0; + br->ip6_other_query.delay_time = 0; + br->ip6_querier.port = NULL; #endif spin_lock_init(&br->multicast_lock); setup_timer(&br->multicast_router_timer, br_multicast_local_router_expired, 0); - setup_timer(&br->ip4_querier.timer, br_ip4_multicast_querier_expired, - (unsigned long)br); - setup_timer(&br->ip4_query.timer, br_ip4_multicast_query_expired, + setup_timer(&br->ip4_other_query.timer, + br_ip4_multicast_querier_expired, (unsigned long)br); + setup_timer(&br->ip4_own_query.timer, br_ip4_multicast_query_expired, (unsigned long)br); #if IS_ENABLED(CONFIG_IPV6) - setup_timer(&br->ip6_querier.timer, br_ip6_multicast_querier_expired, - (unsigned long)br); - setup_timer(&br->ip6_query.timer, br_ip6_multicast_query_expired, + setup_timer(&br->ip6_other_query.timer, + br_ip6_multicast_querier_expired, (unsigned long)br); + setup_timer(&br->ip6_own_query.timer, br_ip6_multicast_query_expired, (unsigned long)br); #endif } static void __br_multicast_open(struct net_bridge *br, - struct bridge_mcast_query *query) + struct bridge_mcast_own_query *query) { query->startup_sent = 0; @@ -1800,9 +1896,9 @@ static void __br_multicast_open(struct net_bridge *br, void br_multicast_open(struct net_bridge *br) { - __br_multicast_open(br, &br->ip4_query); + __br_multicast_open(br, &br->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) - __br_multicast_open(br, &br->ip6_query); + __br_multicast_open(br, &br->ip6_own_query); #endif } @@ -1815,11 +1911,11 @@ void br_multicast_stop(struct net_bridge *br) int i; del_timer_sync(&br->multicast_router_timer); - del_timer_sync(&br->ip4_querier.timer); - del_timer_sync(&br->ip4_query.timer); + del_timer_sync(&br->ip4_other_query.timer); + del_timer_sync(&br->ip4_own_query.timer); #if IS_ENABLED(CONFIG_IPV6) - del_timer_sync(&br->ip6_querier.timer); - del_timer_sync(&br->ip6_query.timer); + del_timer_sync(&br->ip6_other_query.timer); + del_timer_sync(&br->ip6_own_query.timer); #endif spin_lock_bh(&br->multicast_lock); @@ -1923,7 +2019,7 @@ unlock: } static void br_multicast_start_querier(struct net_bridge *br, - struct bridge_mcast_query *query) + struct bridge_mcast_own_query *query) { struct net_bridge_port *port; @@ -1934,11 +2030,11 @@ static void br_multicast_start_querier(struct net_bridge *br, port->state == BR_STATE_BLOCKING) continue; - if (query == &br->ip4_query) - br_multicast_enable(&port->ip4_query); + if (query == &br->ip4_own_query) + br_multicast_enable(&port->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) else - br_multicast_enable(&port->ip6_query); + br_multicast_enable(&port->ip6_own_query); #endif } } @@ -1974,9 +2070,9 @@ rollback: goto rollback; } - br_multicast_start_querier(br, &br->ip4_query); + br_multicast_start_querier(br, &br->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) - br_multicast_start_querier(br, &br->ip6_query); + br_multicast_start_querier(br, &br->ip6_own_query); #endif unlock: @@ -2001,16 +2097,16 @@ int br_multicast_set_querier(struct net_bridge *br, unsigned long val) max_delay = br->multicast_query_response_interval; - if (!timer_pending(&br->ip4_querier.timer)) - br->ip4_querier.delay_time = jiffies + max_delay; + if (!timer_pending(&br->ip4_other_query.timer)) + br->ip4_other_query.delay_time = jiffies + max_delay; - br_multicast_start_querier(br, &br->ip4_query); + br_multicast_start_querier(br, &br->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) - if (!timer_pending(&br->ip6_querier.timer)) - br->ip6_querier.delay_time = jiffies + max_delay; + if (!timer_pending(&br->ip6_other_query.timer)) + br->ip6_other_query.delay_time = jiffies + max_delay; - br_multicast_start_querier(br, &br->ip6_query); + br_multicast_start_querier(br, &br->ip6_own_query); #endif unlock: @@ -2061,3 +2157,146 @@ unlock: return err; } + +/** + * br_multicast_list_adjacent - Returns snooped multicast addresses + * @dev: The bridge port adjacent to which to retrieve addresses + * @br_ip_list: The list to store found, snooped multicast IP addresses in + * + * Creates a list of IP addresses (struct br_ip_list) sensed by the multicast + * snooping feature on all bridge ports of dev's bridge device, excluding + * the addresses from dev itself. + * + * Returns the number of items added to br_ip_list. + * + * Notes: + * - br_ip_list needs to be initialized by caller + * - br_ip_list might contain duplicates in the end + * (needs to be taken care of by caller) + * - br_ip_list needs to be freed by caller + */ +int br_multicast_list_adjacent(struct net_device *dev, + struct list_head *br_ip_list) +{ + struct net_bridge *br; + struct net_bridge_port *port; + struct net_bridge_port_group *group; + struct br_ip_list *entry; + int count = 0; + + rcu_read_lock(); + if (!br_ip_list || !br_port_exists(dev)) + goto unlock; + + port = br_port_get_rcu(dev); + if (!port || !port->br) + goto unlock; + + br = port->br; + + list_for_each_entry_rcu(port, &br->port_list, list) { + if (!port->dev || port->dev == dev) + continue; + + hlist_for_each_entry_rcu(group, &port->mglist, mglist) { + entry = kmalloc(sizeof(*entry), GFP_ATOMIC); + if (!entry) + goto unlock; + + entry->addr = group->addr; + list_add(&entry->list, br_ip_list); + count++; + } + } + +unlock: + rcu_read_unlock(); + return count; +} +EXPORT_SYMBOL_GPL(br_multicast_list_adjacent); + +/** + * br_multicast_has_querier_anywhere - Checks for a querier on a bridge + * @dev: The bridge port providing the bridge on which to check for a querier + * @proto: The protocol family to check for: IGMP -> ETH_P_IP, MLD -> ETH_P_IPV6 + * + * Checks whether the given interface has a bridge on top and if so returns + * true if a valid querier exists anywhere on the bridged link layer. + * Otherwise returns false. + */ +bool br_multicast_has_querier_anywhere(struct net_device *dev, int proto) +{ + struct net_bridge *br; + struct net_bridge_port *port; + struct ethhdr eth; + bool ret = false; + + rcu_read_lock(); + if (!br_port_exists(dev)) + goto unlock; + + port = br_port_get_rcu(dev); + if (!port || !port->br) + goto unlock; + + br = port->br; + + memset(ð, 0, sizeof(eth)); + eth.h_proto = htons(proto); + + ret = br_multicast_querier_exists(br, ð); + +unlock: + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(br_multicast_has_querier_anywhere); + +/** + * br_multicast_has_querier_adjacent - Checks for a querier behind a bridge port + * @dev: The bridge port adjacent to which to check for a querier + * @proto: The protocol family to check for: IGMP -> ETH_P_IP, MLD -> ETH_P_IPV6 + * + * Checks whether the given interface has a bridge on top and if so returns + * true if a selected querier is behind one of the other ports of this + * bridge. Otherwise returns false. + */ +bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto) +{ + struct net_bridge *br; + struct net_bridge_port *port; + bool ret = false; + + rcu_read_lock(); + if (!br_port_exists(dev)) + goto unlock; + + port = br_port_get_rcu(dev); + if (!port || !port->br) + goto unlock; + + br = port->br; + + switch (proto) { + case ETH_P_IP: + if (!timer_pending(&br->ip4_other_query.timer) || + rcu_dereference(br->ip4_querier.port) == port) + goto unlock; + break; +#if IS_ENABLED(CONFIG_IPV6) + case ETH_P_IPV6: + if (!timer_pending(&br->ip6_other_query.timer) || + rcu_dereference(br->ip6_querier.port) == port) + goto unlock; + break; +#endif + default: + goto unlock; + } + + ret = true; +unlock: + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(br_multicast_has_querier_adjacent); diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 80e1b0f60a30..a615264cf01a 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -535,7 +535,7 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct if (brnf_pass_vlan_indev == 0 || !vlan_tx_tag_present(skb)) return br; - vlan = __vlan_find_dev_deep(br, skb->vlan_proto, + vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto, vlan_tx_tag_get(skb) & VLAN_VID_MASK); return vlan ? vlan : br; @@ -859,12 +859,12 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, return NF_STOLEN; } -#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV4) +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) static int br_nf_dev_queue_xmit(struct sk_buff *skb) { int ret; - if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) && + if (skb->protocol == htons(ETH_P_IP) && skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu && !skb_is_gso(skb)) { if (br_parse_ip_options(skb)) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index e74b6d530cb6..cb5fcf62f663 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -208,7 +208,6 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, int err = 0; struct net_bridge_port *port = br_port_get_rtnl(dev); - /* not a bridge port and */ if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN)) goto out; @@ -328,6 +327,7 @@ static void br_set_port_flag(struct net_bridge_port *p, struct nlattr *tb[], static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) { int err; + unsigned long old_flags = p->flags; br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE); br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD); @@ -353,6 +353,8 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) if (err) return err; } + + br_port_flags_change(p, old_flags ^ p->flags); return 0; } @@ -445,6 +447,20 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } +static int br_dev_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct net_bridge *br = netdev_priv(dev); + + if (tb[IFLA_ADDRESS]) { + spin_lock_bh(&br->lock); + br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS])); + spin_unlock_bh(&br->lock); + } + + return register_netdevice(dev); +} + static size_t br_get_link_af_size(const struct net_device *dev) { struct net_port_vlans *pv; @@ -473,6 +489,7 @@ struct rtnl_link_ops br_link_ops __read_mostly = { .priv_size = sizeof(struct net_bridge), .setup = br_dev_setup, .validate = br_validate, + .newlink = br_dev_newlink, .dellink = br_dev_delete, }; diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c deleted file mode 100644 index 2998dd1769a0..000000000000 --- a/net/bridge/br_notify.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Device event handling - * Linux ethernet bridge - * - * Authors: - * Lennert Buytenhek <buytenh@gnu.org> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/rtnetlink.h> -#include <net/net_namespace.h> - -#include "br_private.h" - -static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr); - -struct notifier_block br_device_notifier = { - .notifier_call = br_device_event -}; - -/* - * Handle changes in state of network devices enslaved to a bridge. - * - * Note: don't care about up/down if bridge itself is down, because - * port state is checked when bridge is brought up. - */ -static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct net_bridge_port *p; - struct net_bridge *br; - bool changed_addr; - int err; - - /* register of bridge completed, add sysfs entries */ - if ((dev->priv_flags & IFF_EBRIDGE) && event == NETDEV_REGISTER) { - br_sysfs_addbr(dev); - return NOTIFY_DONE; - } - - /* not a port of a bridge */ - p = br_port_get_rtnl(dev); - if (!p) - return NOTIFY_DONE; - - br = p->br; - - switch (event) { - case NETDEV_CHANGEMTU: - dev_set_mtu(br->dev, br_min_mtu(br)); - break; - - case NETDEV_CHANGEADDR: - spin_lock_bh(&br->lock); - br_fdb_changeaddr(p, dev->dev_addr); - changed_addr = br_stp_recalculate_bridge_id(br); - spin_unlock_bh(&br->lock); - - if (changed_addr) - call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev); - - break; - - case NETDEV_CHANGE: - br_port_carrier_check(p); - break; - - case NETDEV_FEAT_CHANGE: - netdev_update_features(br->dev); - break; - - case NETDEV_DOWN: - spin_lock_bh(&br->lock); - if (br->dev->flags & IFF_UP) - br_stp_disable_port(p); - spin_unlock_bh(&br->lock); - break; - - case NETDEV_UP: - if (netif_running(br->dev) && netif_oper_up(dev)) { - spin_lock_bh(&br->lock); - br_stp_enable_port(p); - spin_unlock_bh(&br->lock); - } - break; - - case NETDEV_UNREGISTER: - br_del_if(br, dev); - break; - - case NETDEV_CHANGENAME: - err = br_sysfs_renameif(p); - if (err) - return notifier_from_errno(err); - break; - - case NETDEV_PRE_TYPE_CHANGE: - /* Forbid underlaying device to change its type. */ - return NOTIFY_BAD; - - case NETDEV_RESEND_IGMP: - /* Propagate to master device */ - call_netdevice_notifiers(event, br->dev); - break; - } - - /* Events that may cause spanning tree to refresh */ - if (event == NETDEV_CHANGEADDR || event == NETDEV_UP || - event == NETDEV_CHANGE || event == NETDEV_DOWN) - br_ifinfo_notify(RTM_NEWLINK, p); - - return NOTIFY_DONE; -} diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 06811d79f89f..62a7fa2e3569 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -35,6 +35,8 @@ #define BR_GROUPFWD_DEFAULT 0 /* Don't allow forwarding control protocols like STP and LLDP */ #define BR_GROUPFWD_RESTRICTED 0x4007u +/* The Nearest Customer Bridge Group Address, 01-80-C2-00-00-[00,0B,0C,0D,0F] */ +#define BR_GROUPFWD_8021AD 0xB801u /* Path to usermode spanning tree program */ #define BR_STP_PROG "/sbin/bridge-stp" @@ -54,30 +56,24 @@ struct mac_addr unsigned char addr[ETH_ALEN]; }; -struct br_ip -{ - union { - __be32 ip4; -#if IS_ENABLED(CONFIG_IPV6) - struct in6_addr ip6; -#endif - } u; - __be16 proto; - __u16 vid; -}; - #ifdef CONFIG_BRIDGE_IGMP_SNOOPING /* our own querier */ -struct bridge_mcast_query { +struct bridge_mcast_own_query { struct timer_list timer; u32 startup_sent; }; /* other querier */ -struct bridge_mcast_querier { +struct bridge_mcast_other_query { struct timer_list timer; unsigned long delay_time; }; + +/* selected querier */ +struct bridge_mcast_querier { + struct br_ip addr; + struct net_bridge_port __rcu *port; +}; #endif struct net_port_vlans { @@ -174,11 +170,13 @@ struct net_bridge_port #define BR_ADMIN_COST 0x00000010 #define BR_LEARNING 0x00000020 #define BR_FLOOD 0x00000040 +#define BR_AUTO_MASK (BR_FLOOD | BR_LEARNING) +#define BR_PROMISC 0x00000080 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING - struct bridge_mcast_query ip4_query; + struct bridge_mcast_own_query ip4_own_query; #if IS_ENABLED(CONFIG_IPV6) - struct bridge_mcast_query ip6_query; + struct bridge_mcast_own_query ip6_own_query; #endif /* IS_ENABLED(CONFIG_IPV6) */ unsigned char multicast_router; struct timer_list multicast_router_timer; @@ -198,6 +196,9 @@ struct net_bridge_port #endif }; +#define br_auto_port(p) ((p)->flags & BR_AUTO_MASK) +#define br_promisc_port(p) ((p)->flags & BR_PROMISC) + #define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT) static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev) @@ -227,6 +228,7 @@ struct net_bridge bool nf_call_arptables; #endif u16 group_fwd_mask; + u16 group_fwd_mask_required; /* STP */ bridge_id designated_root; @@ -241,6 +243,7 @@ struct net_bridge unsigned long bridge_forward_delay; u8 group_addr[ETH_ALEN]; + bool group_addr_set; u16 root_port; enum { @@ -277,11 +280,13 @@ struct net_bridge struct hlist_head router_list; struct timer_list multicast_router_timer; + struct bridge_mcast_other_query ip4_other_query; + struct bridge_mcast_own_query ip4_own_query; struct bridge_mcast_querier ip4_querier; - struct bridge_mcast_query ip4_query; #if IS_ENABLED(CONFIG_IPV6) + struct bridge_mcast_other_query ip6_other_query; + struct bridge_mcast_own_query ip6_own_query; struct bridge_mcast_querier ip6_querier; - struct bridge_mcast_query ip6_query; #endif /* IS_ENABLED(CONFIG_IPV6) */ #endif @@ -290,8 +295,10 @@ struct net_bridge struct timer_list topology_change_timer; struct timer_list gc_timer; struct kobject *ifobj; + u32 auto_cnt; #ifdef CONFIG_BRIDGE_VLAN_FILTERING u8 vlan_enabled; + __be16 vlan_proto; struct net_port_vlans __rcu *vlan_info; #endif }; @@ -327,8 +334,6 @@ struct br_input_skb_cb { #define br_debug(br, format, args...) \ pr_debug("%s: " format, (br)->dev->name, ##args) -extern struct notifier_block br_device_notifier; - /* called under bridge lock */ static inline int br_is_root_bridge(const struct net_bridge *br) { @@ -394,7 +399,9 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 nlh_flags); int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct net_device *dev, int idx); + struct net_device *dev, struct net_device *fdev, int idx); +int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p); +void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p); /* br_forward.c */ void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb); @@ -415,6 +422,8 @@ int br_del_if(struct net_bridge *br, struct net_device *dev); int br_min_mtu(const struct net_bridge *br); netdev_features_t br_features_recompute(struct net_bridge *br, netdev_features_t features); +void br_port_flags_change(struct net_bridge_port *port, unsigned long mask); +void br_manage_promisc(struct net_bridge *br); /* br_input.c */ int br_handle_frame_finish(struct sk_buff *skb); @@ -485,7 +494,7 @@ static inline bool br_multicast_is_router(struct net_bridge *br) static inline bool __br_multicast_querier_exists(struct net_bridge *br, - struct bridge_mcast_querier *querier) + struct bridge_mcast_other_query *querier) { return time_is_before_jiffies(querier->delay_time) && (br->multicast_querier || timer_pending(&querier->timer)); @@ -496,10 +505,10 @@ static inline bool br_multicast_querier_exists(struct net_bridge *br, { switch (eth->h_proto) { case (htons(ETH_P_IP)): - return __br_multicast_querier_exists(br, &br->ip4_querier); + return __br_multicast_querier_exists(br, &br->ip4_other_query); #if IS_ENABLED(CONFIG_IPV6) case (htons(ETH_P_IPV6)): - return __br_multicast_querier_exists(br, &br->ip6_querier); + return __br_multicast_querier_exists(br, &br->ip6_other_query); #endif default: return false; @@ -581,6 +590,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, struct sk_buff *skb, u16 *vid); bool br_allowed_egress(struct net_bridge *br, const struct net_port_vlans *v, const struct sk_buff *skb); +bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid); struct sk_buff *br_handle_vlan(struct net_bridge *br, const struct net_port_vlans *v, struct sk_buff *skb); @@ -588,7 +598,10 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags); int br_vlan_delete(struct net_bridge *br, u16 vid); void br_vlan_flush(struct net_bridge *br); bool br_vlan_find(struct net_bridge *br, u16 vid); +void br_recalculate_fwd_mask(struct net_bridge *br); int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); +int br_vlan_set_proto(struct net_bridge *br, unsigned long val); +void br_vlan_init(struct net_bridge *br); int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags); int nbp_vlan_delete(struct net_bridge_port *port, u16 vid); void nbp_vlan_flush(struct net_bridge_port *port); @@ -632,6 +645,10 @@ static inline u16 br_get_pvid(const struct net_port_vlans *v) return v->pvid ?: VLAN_N_VID; } +static inline int br_vlan_enabled(struct net_bridge *br) +{ + return br->vlan_enabled; +} #else static inline bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, @@ -648,6 +665,12 @@ static inline bool br_allowed_egress(struct net_bridge *br, return true; } +static inline bool br_should_learn(struct net_bridge_port *p, + struct sk_buff *skb, u16 *vid) +{ + return true; +} + static inline struct sk_buff *br_handle_vlan(struct net_bridge *br, const struct net_port_vlans *v, struct sk_buff *skb) @@ -674,6 +697,14 @@ static inline bool br_vlan_find(struct net_bridge *br, u16 vid) return false; } +static inline void br_recalculate_fwd_mask(struct net_bridge *br) +{ +} + +static inline void br_vlan_init(struct net_bridge *br) +{ +} + static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) { return -EOPNOTSUPP; @@ -712,6 +743,11 @@ static inline u16 br_get_pvid(const struct net_port_vlans *v) { return VLAN_N_VID; /* Returns invalid vid */ } + +static inline int br_vlan_enabled(struct net_bridge *br) +{ + return 0; +} #endif /* br_netfilter.c */ diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 8dac65552f19..c9e2572b15f4 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -312,10 +312,19 @@ static ssize_t group_addr_store(struct device *d, new_addr[5] == 3) /* 802.1X PAE address */ return -EINVAL; + if (!rtnl_trylock()) + return restart_syscall(); + spin_lock_bh(&br->lock); for (i = 0; i < 6; i++) br->group_addr[i] = new_addr[i]; spin_unlock_bh(&br->lock); + + br->group_addr_set = true; + br_recalculate_fwd_mask(br); + + rtnl_unlock(); + return len; } @@ -700,6 +709,22 @@ static ssize_t vlan_filtering_store(struct device *d, return store_bridge_parm(d, buf, len, br_vlan_filter_toggle); } static DEVICE_ATTR_RW(vlan_filtering); + +static ssize_t vlan_protocol_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%#06x\n", ntohs(br->vlan_proto)); +} + +static ssize_t vlan_protocol_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_vlan_set_proto); +} +static DEVICE_ATTR_RW(vlan_protocol); #endif static struct attribute *bridge_attrs[] = { @@ -745,6 +770,7 @@ static struct attribute *bridge_attrs[] = { #endif #ifdef CONFIG_BRIDGE_VLAN_FILTERING &dev_attr_vlan_filtering.attr, + &dev_attr_vlan_protocol.attr, #endif NULL }; diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index dd595bd7fa82..e561cd59b8a6 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -41,20 +41,30 @@ static ssize_t show_##_name(struct net_bridge_port *p, char *buf) \ } \ static int store_##_name(struct net_bridge_port *p, unsigned long v) \ { \ - unsigned long flags = p->flags; \ - if (v) \ - flags |= _mask; \ - else \ - flags &= ~_mask; \ - if (flags != p->flags) { \ - p->flags = flags; \ - br_ifinfo_notify(RTM_NEWLINK, p); \ - } \ - return 0; \ + return store_flag(p, v, _mask); \ } \ static BRPORT_ATTR(_name, S_IRUGO | S_IWUSR, \ show_##_name, store_##_name) +static int store_flag(struct net_bridge_port *p, unsigned long v, + unsigned long mask) +{ + unsigned long flags; + + flags = p->flags; + + if (v) + flags |= mask; + else + flags &= ~mask; + + if (flags != p->flags) { + p->flags = flags; + br_port_flags_change(p, mask); + br_ifinfo_notify(RTM_NEWLINK, p); + } + return 0; +} static ssize_t show_path_cost(struct net_bridge_port *p, char *buf) { diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 4a3716102789..febb0f87fa37 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -55,12 +55,10 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) if (p) { /* Add VLAN to the device filter if it is supported. - * Stricly speaking, this is not necessary now, since - * devices are made promiscuous by the bridge, but if - * that ever changes this code will allow tagged - * traffic to enter the bridge. + * This ensures tagged traffic enters the bridge when + * promiscuous mode is disabled by br_manage_promisc(). */ - err = vlan_vid_add(dev, htons(ETH_P_8021Q), vid); + err = vlan_vid_add(dev, br->vlan_proto, vid); if (err) return err; } @@ -80,7 +78,7 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) out_filt: if (p) - vlan_vid_del(dev, htons(ETH_P_8021Q), vid); + vlan_vid_del(dev, br->vlan_proto, vid); return err; } @@ -92,8 +90,10 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid) __vlan_delete_pvid(v, vid); clear_bit(vid, v->untagged_bitmap); - if (v->port_idx) - vlan_vid_del(v->parent.port->dev, htons(ETH_P_8021Q), vid); + if (v->port_idx) { + struct net_bridge_port *p = v->parent.port; + vlan_vid_del(p->dev, p->br->vlan_proto, vid); + } clear_bit(vid, v->vlan_bitmap); v->num_vlans--; @@ -158,7 +158,8 @@ out: bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, struct sk_buff *skb, u16 *vid) { - int err; + bool tagged; + __be16 proto; /* If VLAN filtering is disabled on the bridge, all packets are * permitted. @@ -172,19 +173,41 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, if (!v) goto drop; + proto = br->vlan_proto; + /* If vlan tx offload is disabled on bridge device and frame was * sent from vlan device on the bridge device, it does not have * HW accelerated vlan tag. */ if (unlikely(!vlan_tx_tag_present(skb) && - (skb->protocol == htons(ETH_P_8021Q) || - skb->protocol == htons(ETH_P_8021AD)))) { + skb->protocol == proto)) { skb = vlan_untag(skb); if (unlikely(!skb)) return false; } - err = br_vlan_get_tag(skb, vid); + if (!br_vlan_get_tag(skb, vid)) { + /* Tagged frame */ + if (skb->vlan_proto != proto) { + /* Protocol-mismatch, empty out vlan_tci for new tag */ + skb_push(skb, ETH_HLEN); + skb = __vlan_put_tag(skb, skb->vlan_proto, + vlan_tx_tag_get(skb)); + if (unlikely(!skb)) + return false; + + skb_pull(skb, ETH_HLEN); + skb_reset_mac_len(skb); + *vid = 0; + tagged = false; + } else { + tagged = true; + } + } else { + /* Untagged frame */ + tagged = false; + } + if (!*vid) { u16 pvid = br_get_pvid(v); @@ -199,9 +222,9 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, * ingress frame is considered to belong to this vlan. */ *vid = pvid; - if (likely(err)) + if (likely(!tagged)) /* Untagged Frame. */ - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), pvid); + __vlan_hwaccel_put_tag(skb, proto, pvid); else /* Priority-tagged Frame. * At this point, We know that skb->vlan_tci had @@ -241,6 +264,36 @@ bool br_allowed_egress(struct net_bridge *br, return false; } +/* Called under RCU */ +bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid) +{ + struct net_bridge *br = p->br; + struct net_port_vlans *v; + + if (!br->vlan_enabled) + return true; + + v = rcu_dereference(p->vlan_info); + if (!v) + return false; + + if (!br_vlan_get_tag(skb, vid) && skb->vlan_proto != br->vlan_proto) + *vid = 0; + + if (!*vid) { + *vid = br_get_pvid(v); + if (*vid == VLAN_N_VID) + return false; + + return true; + } + + if (test_bit(*vid, v->vlan_bitmap)) + return true; + + return false; +} + /* Must be protected by RTNL. * Must be called with vid in range from 1 to 4094 inclusive. */ @@ -323,6 +376,33 @@ out: return found; } +/* Must be protected by RTNL. */ +static void recalculate_group_addr(struct net_bridge *br) +{ + if (br->group_addr_set) + return; + + spin_lock_bh(&br->lock); + if (!br->vlan_enabled || br->vlan_proto == htons(ETH_P_8021Q)) { + /* Bridge Group Address */ + br->group_addr[5] = 0x00; + } else { /* vlan_enabled && ETH_P_8021AD */ + /* Provider Bridge Group Address */ + br->group_addr[5] = 0x08; + } + spin_unlock_bh(&br->lock); +} + +/* Must be protected by RTNL. */ +void br_recalculate_fwd_mask(struct net_bridge *br) +{ + if (!br->vlan_enabled || br->vlan_proto == htons(ETH_P_8021Q)) + br->group_fwd_mask_required = BR_GROUPFWD_DEFAULT; + else /* vlan_enabled && ETH_P_8021AD */ + br->group_fwd_mask_required = BR_GROUPFWD_8021AD & + ~(1u << br->group_addr[5]); +} + int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val) { if (!rtnl_trylock()) @@ -332,12 +412,88 @@ int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val) goto unlock; br->vlan_enabled = val; + br_manage_promisc(br); + recalculate_group_addr(br); + br_recalculate_fwd_mask(br); unlock: rtnl_unlock(); return 0; } +int br_vlan_set_proto(struct net_bridge *br, unsigned long val) +{ + int err = 0; + struct net_bridge_port *p; + struct net_port_vlans *pv; + __be16 proto, oldproto; + u16 vid, errvid; + + if (val != ETH_P_8021Q && val != ETH_P_8021AD) + return -EPROTONOSUPPORT; + + if (!rtnl_trylock()) + return restart_syscall(); + + proto = htons(val); + if (br->vlan_proto == proto) + goto unlock; + + /* Add VLANs for the new proto to the device filter. */ + list_for_each_entry(p, &br->port_list, list) { + pv = rtnl_dereference(p->vlan_info); + if (!pv) + continue; + + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { + err = vlan_vid_add(p->dev, proto, vid); + if (err) + goto err_filt; + } + } + + oldproto = br->vlan_proto; + br->vlan_proto = proto; + + recalculate_group_addr(br); + br_recalculate_fwd_mask(br); + + /* Delete VLANs for the old proto from the device filter. */ + list_for_each_entry(p, &br->port_list, list) { + pv = rtnl_dereference(p->vlan_info); + if (!pv) + continue; + + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) + vlan_vid_del(p->dev, oldproto, vid); + } + +unlock: + rtnl_unlock(); + return err; + +err_filt: + errvid = vid; + for_each_set_bit(vid, pv->vlan_bitmap, errvid) + vlan_vid_del(p->dev, proto, vid); + + list_for_each_entry_continue_reverse(p, &br->port_list, list) { + pv = rtnl_dereference(p->vlan_info); + if (!pv) + continue; + + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) + vlan_vid_del(p->dev, proto, vid); + } + + goto unlock; +} + +void br_vlan_init(struct net_bridge *br) +{ + br->vlan_proto = htons(ETH_P_8021Q); +} + /* Must be protected by RTNL. * Must be called with vid in range from 1 to 4094 inclusive. */ @@ -404,7 +560,7 @@ void nbp_vlan_flush(struct net_bridge_port *port) return; for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) - vlan_vid_del(port->dev, htons(ETH_P_8021Q), vid); + vlan_vid_del(port->dev, port->br->vlan_proto, vid); __vlan_flush(pv); } diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index 5ca74a0e595f..9cebf47ac840 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -2,14 +2,32 @@ # Bridge netfilter configuration # # -config NF_TABLES_BRIDGE - depends on NF_TABLES +menuconfig NF_TABLES_BRIDGE + depends on BRIDGE && NETFILTER && NF_TABLES tristate "Ethernet Bridge nf_tables support" +if NF_TABLES_BRIDGE + +config NFT_BRIDGE_META + tristate "Netfilter nf_table bridge meta support" + depends on NFT_META + help + Add support for bridge dedicated meta key. + +config NFT_BRIDGE_REJECT + tristate "Netfilter nf_tables bridge reject support" + depends on NFT_REJECT && NFT_REJECT_IPV4 && NFT_REJECT_IPV6 + help + Add support to reject packets. + +config NF_LOG_BRIDGE + tristate "Bridge packet logging" + +endif # NF_TABLES_BRIDGE + menuconfig BRIDGE_NF_EBTABLES tristate "Ethernet Bridge tables (ebtables) support" - depends on BRIDGE && NETFILTER - select NETFILTER_XTABLES + depends on BRIDGE && NETFILTER && NETFILTER_XTABLES help ebtables is a general, extensible frame/packet identification framework. Say 'Y' or 'M' here if you want to do Ethernet @@ -193,22 +211,6 @@ config BRIDGE_EBT_LOG To compile it as a module, choose M here. If unsure, say N. -config BRIDGE_EBT_ULOG - tristate "ebt: ulog support (OBSOLETE)" - help - This option enables the old bridge-specific "ebt_ulog" implementation - which has been obsoleted by the new "nfnetlink_log" code (see - CONFIG_NETFILTER_NETLINK_LOG). - - This option adds the ulog watcher, that you can use in any rule - in any ebtables table. The packet is passed to a userspace - logging daemon using netlink multicast sockets. This differs - from the log watcher in the sense that the complete packet is - sent to userspace instead of a descriptive text and that - netlink multicast sockets are used instead of the syslog. - - To compile it as a module, choose M here. If unsure, say N. - config BRIDGE_EBT_NFLOG tristate "ebt: nflog support" help diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile index ea7629f58b3d..be4d0cea78ce 100644 --- a/net/bridge/netfilter/Makefile +++ b/net/bridge/netfilter/Makefile @@ -3,6 +3,11 @@ # obj-$(CONFIG_NF_TABLES_BRIDGE) += nf_tables_bridge.o +obj-$(CONFIG_NFT_BRIDGE_META) += nft_meta_bridge.o +obj-$(CONFIG_NFT_BRIDGE_REJECT) += nft_reject_bridge.o + +# packet logging +obj-$(CONFIG_NF_LOG_BRIDGE) += nf_log_bridge.o obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o @@ -32,5 +37,4 @@ obj-$(CONFIG_BRIDGE_EBT_SNAT) += ebt_snat.o # watchers obj-$(CONFIG_BRIDGE_EBT_LOG) += ebt_log.o -obj-$(CONFIG_BRIDGE_EBT_ULOG) += ebt_ulog.o obj-$(CONFIG_BRIDGE_EBT_NFLOG) += ebt_nflog.o diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 5322a36867a3..17f2e4bc2a29 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -186,6 +186,10 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par) li.u.log.level = info->loglevel; li.u.log.logflags = info->bitmask; + /* Remember that we have to use ebt_log_packet() not to break backward + * compatibility. We cannot use the default bridge packet logger via + * nf_log_packet() with NFT_LOG_TYPE_LOG here. --Pablo + */ if (info->bitmask & EBT_LOG_NFLOG) nf_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb, par->in, par->out, &li, "%s", info->prefix); @@ -205,54 +209,13 @@ static struct xt_target ebt_log_tg_reg __read_mostly = { .me = THIS_MODULE, }; -static struct nf_logger ebt_log_logger __read_mostly = { - .name = "ebt_log", - .logfn = &ebt_log_packet, - .me = THIS_MODULE, -}; - -static int __net_init ebt_log_net_init(struct net *net) -{ - nf_log_set(net, NFPROTO_BRIDGE, &ebt_log_logger); - return 0; -} - -static void __net_exit ebt_log_net_fini(struct net *net) -{ - nf_log_unset(net, &ebt_log_logger); -} - -static struct pernet_operations ebt_log_net_ops = { - .init = ebt_log_net_init, - .exit = ebt_log_net_fini, -}; - static int __init ebt_log_init(void) { - int ret; - - ret = register_pernet_subsys(&ebt_log_net_ops); - if (ret < 0) - goto err_pernet; - - ret = xt_register_target(&ebt_log_tg_reg); - if (ret < 0) - goto err_target; - - nf_log_register(NFPROTO_BRIDGE, &ebt_log_logger); - - return ret; - -err_target: - unregister_pernet_subsys(&ebt_log_net_ops); -err_pernet: - return ret; + return xt_register_target(&ebt_log_tg_reg); } static void __exit ebt_log_fini(void) { - unregister_pernet_subsys(&ebt_log_net_ops); - nf_log_unregister(&ebt_log_logger); xt_unregister_target(&ebt_log_tg_reg); } diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c deleted file mode 100644 index 7c470c371e14..000000000000 --- a/net/bridge/netfilter/ebt_ulog.c +++ /dev/null @@ -1,393 +0,0 @@ -/* - * netfilter module for userspace bridged Ethernet frames logging daemons - * - * Authors: - * Bart De Schuymer <bdschuym@pandora.be> - * Harald Welte <laforge@netfilter.org> - * - * November, 2004 - * - * Based on ipt_ULOG.c, which is - * (C) 2000-2002 by Harald Welte <laforge@netfilter.org> - * - * This module accepts two parameters: - * - * nlbufsiz: - * The parameter specifies how big the buffer for each netlink multicast - * group is. e.g. If you say nlbufsiz=8192, up to eight kb of packets will - * get accumulated in the kernel until they are sent to userspace. It is - * NOT possible to allocate more than 128kB, and it is strongly discouraged, - * because atomically allocating 128kB inside the network rx softirq is not - * reliable. Please also keep in mind that this buffer size is allocated for - * each nlgroup you are using, so the total kernel memory usage increases - * by that factor. - * - * flushtimeout: - * Specify, after how many hundredths of a second the queue should be - * flushed even if it is not full yet. - * - */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/spinlock.h> -#include <linux/socket.h> -#include <linux/skbuff.h> -#include <linux/kernel.h> -#include <linux/timer.h> -#include <net/netlink.h> -#include <linux/netdevice.h> -#include <linux/netfilter/x_tables.h> -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_ulog.h> -#include <net/netfilter/nf_log.h> -#include <net/netns/generic.h> -#include <net/sock.h> -#include "../br_private.h" - -static unsigned int nlbufsiz = NLMSG_GOODSIZE; -module_param(nlbufsiz, uint, 0600); -MODULE_PARM_DESC(nlbufsiz, "netlink buffer size (number of bytes) " - "(defaults to 4096)"); - -static unsigned int flushtimeout = 10; -module_param(flushtimeout, uint, 0600); -MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths ofa second) " - "(defaults to 10)"); - -typedef struct { - unsigned int qlen; /* number of nlmsgs' in the skb */ - struct nlmsghdr *lastnlh; /* netlink header of last msg in skb */ - struct sk_buff *skb; /* the pre-allocated skb */ - struct timer_list timer; /* the timer function */ - spinlock_t lock; /* the per-queue lock */ -} ebt_ulog_buff_t; - -static int ebt_ulog_net_id __read_mostly; -struct ebt_ulog_net { - unsigned int nlgroup[EBT_ULOG_MAXNLGROUPS]; - ebt_ulog_buff_t ulog_buffers[EBT_ULOG_MAXNLGROUPS]; - struct sock *ebtulognl; -}; - -static struct ebt_ulog_net *ebt_ulog_pernet(struct net *net) -{ - return net_generic(net, ebt_ulog_net_id); -} - -/* send one ulog_buff_t to userspace */ -static void ulog_send(struct ebt_ulog_net *ebt, unsigned int nlgroup) -{ - ebt_ulog_buff_t *ub = &ebt->ulog_buffers[nlgroup]; - - del_timer(&ub->timer); - - if (!ub->skb) - return; - - /* last nlmsg needs NLMSG_DONE */ - if (ub->qlen > 1) - ub->lastnlh->nlmsg_type = NLMSG_DONE; - - NETLINK_CB(ub->skb).dst_group = nlgroup + 1; - netlink_broadcast(ebt->ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC); - - ub->qlen = 0; - ub->skb = NULL; -} - -/* timer function to flush queue in flushtimeout time */ -static void ulog_timer(unsigned long data) -{ - struct ebt_ulog_net *ebt = container_of((void *)data, - struct ebt_ulog_net, - nlgroup[*(unsigned int *)data]); - - ebt_ulog_buff_t *ub = &ebt->ulog_buffers[*(unsigned int *)data]; - spin_lock_bh(&ub->lock); - if (ub->skb) - ulog_send(ebt, *(unsigned int *)data); - spin_unlock_bh(&ub->lock); -} - -static struct sk_buff *ulog_alloc_skb(unsigned int size) -{ - struct sk_buff *skb; - unsigned int n; - - n = max(size, nlbufsiz); - skb = alloc_skb(n, GFP_ATOMIC | __GFP_NOWARN); - if (!skb) { - if (n > size) { - /* try to allocate only as much as we need for - * current packet */ - skb = alloc_skb(size, GFP_ATOMIC); - if (!skb) - pr_debug("cannot even allocate buffer of size %ub\n", - size); - } - } - - return skb; -} - -static void ebt_ulog_packet(struct net *net, unsigned int hooknr, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct ebt_ulog_info *uloginfo, - const char *prefix) -{ - ebt_ulog_packet_msg_t *pm; - size_t size, copy_len; - struct nlmsghdr *nlh; - struct ebt_ulog_net *ebt = ebt_ulog_pernet(net); - unsigned int group = uloginfo->nlgroup; - ebt_ulog_buff_t *ub = &ebt->ulog_buffers[group]; - spinlock_t *lock = &ub->lock; - ktime_t kt; - - if ((uloginfo->cprange == 0) || - (uloginfo->cprange > skb->len + ETH_HLEN)) - copy_len = skb->len + ETH_HLEN; - else - copy_len = uloginfo->cprange; - - size = nlmsg_total_size(sizeof(*pm) + copy_len); - if (size > nlbufsiz) { - pr_debug("Size %Zd needed, but nlbufsiz=%d\n", size, nlbufsiz); - return; - } - - spin_lock_bh(lock); - - if (!ub->skb) { - if (!(ub->skb = ulog_alloc_skb(size))) - goto unlock; - } else if (size > skb_tailroom(ub->skb)) { - ulog_send(ebt, group); - - if (!(ub->skb = ulog_alloc_skb(size))) - goto unlock; - } - - nlh = nlmsg_put(ub->skb, 0, ub->qlen, 0, - size - NLMSG_ALIGN(sizeof(*nlh)), 0); - if (!nlh) { - kfree_skb(ub->skb); - ub->skb = NULL; - goto unlock; - } - ub->qlen++; - - pm = nlmsg_data(nlh); - memset(pm, 0, sizeof(*pm)); - - /* Fill in the ulog data */ - pm->version = EBT_ULOG_VERSION; - kt = ktime_get_real(); - pm->stamp = ktime_to_timeval(kt); - if (ub->qlen == 1) - ub->skb->tstamp = kt; - pm->data_len = copy_len; - pm->mark = skb->mark; - pm->hook = hooknr; - if (uloginfo->prefix != NULL) - strcpy(pm->prefix, uloginfo->prefix); - - if (in) { - strcpy(pm->physindev, in->name); - /* If in isn't a bridge, then physindev==indev */ - if (br_port_exists(in)) - /* rcu_read_lock()ed by nf_hook_slow */ - strcpy(pm->indev, br_port_get_rcu(in)->br->dev->name); - else - strcpy(pm->indev, in->name); - } - - if (out) { - /* If out exists, then out is a bridge port */ - strcpy(pm->physoutdev, out->name); - /* rcu_read_lock()ed by nf_hook_slow */ - strcpy(pm->outdev, br_port_get_rcu(out)->br->dev->name); - } - - if (skb_copy_bits(skb, -ETH_HLEN, pm->data, copy_len) < 0) - BUG(); - - if (ub->qlen > 1) - ub->lastnlh->nlmsg_flags |= NLM_F_MULTI; - - ub->lastnlh = nlh; - - if (ub->qlen >= uloginfo->qthreshold) - ulog_send(ebt, group); - else if (!timer_pending(&ub->timer)) { - ub->timer.expires = jiffies + flushtimeout * HZ / 100; - add_timer(&ub->timer); - } - -unlock: - spin_unlock_bh(lock); -} - -/* this function is registered with the netfilter core */ -static void ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, - const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct nf_loginfo *li, - const char *prefix) -{ - struct ebt_ulog_info loginfo; - - if (!li || li->type != NF_LOG_TYPE_ULOG) { - loginfo.nlgroup = EBT_ULOG_DEFAULT_NLGROUP; - loginfo.cprange = 0; - loginfo.qthreshold = EBT_ULOG_DEFAULT_QTHRESHOLD; - loginfo.prefix[0] = '\0'; - } else { - loginfo.nlgroup = li->u.ulog.group; - loginfo.cprange = li->u.ulog.copy_len; - loginfo.qthreshold = li->u.ulog.qthreshold; - strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix)); - } - - ebt_ulog_packet(net, hooknum, skb, in, out, &loginfo, prefix); -} - -static unsigned int -ebt_ulog_tg(struct sk_buff *skb, const struct xt_action_param *par) -{ - struct net *net = dev_net(par->in ? par->in : par->out); - - ebt_ulog_packet(net, par->hooknum, skb, par->in, par->out, - par->targinfo, NULL); - return EBT_CONTINUE; -} - -static int ebt_ulog_tg_check(const struct xt_tgchk_param *par) -{ - struct ebt_ulog_info *uloginfo = par->targinfo; - - if (!par->net->xt.ebt_ulog_warn_deprecated) { - pr_info("ebt_ulog is deprecated and it will be removed soon, " - "use ebt_nflog instead\n"); - par->net->xt.ebt_ulog_warn_deprecated = true; - } - - if (uloginfo->nlgroup > 31) - return -EINVAL; - - uloginfo->prefix[EBT_ULOG_PREFIX_LEN - 1] = '\0'; - - if (uloginfo->qthreshold > EBT_ULOG_MAX_QLEN) - uloginfo->qthreshold = EBT_ULOG_MAX_QLEN; - - return 0; -} - -static struct xt_target ebt_ulog_tg_reg __read_mostly = { - .name = "ulog", - .revision = 0, - .family = NFPROTO_BRIDGE, - .target = ebt_ulog_tg, - .checkentry = ebt_ulog_tg_check, - .targetsize = sizeof(struct ebt_ulog_info), - .me = THIS_MODULE, -}; - -static struct nf_logger ebt_ulog_logger __read_mostly = { - .name = "ebt_ulog", - .logfn = &ebt_log_packet, - .me = THIS_MODULE, -}; - -static int __net_init ebt_ulog_net_init(struct net *net) -{ - int i; - struct ebt_ulog_net *ebt = ebt_ulog_pernet(net); - - struct netlink_kernel_cfg cfg = { - .groups = EBT_ULOG_MAXNLGROUPS, - }; - - /* initialize ulog_buffers */ - for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { - ebt->nlgroup[i] = i; - setup_timer(&ebt->ulog_buffers[i].timer, ulog_timer, - (unsigned long)&ebt->nlgroup[i]); - spin_lock_init(&ebt->ulog_buffers[i].lock); - } - - ebt->ebtulognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg); - if (!ebt->ebtulognl) - return -ENOMEM; - - nf_log_set(net, NFPROTO_BRIDGE, &ebt_ulog_logger); - return 0; -} - -static void __net_exit ebt_ulog_net_fini(struct net *net) -{ - int i; - struct ebt_ulog_net *ebt = ebt_ulog_pernet(net); - - nf_log_unset(net, &ebt_ulog_logger); - for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { - ebt_ulog_buff_t *ub = &ebt->ulog_buffers[i]; - del_timer(&ub->timer); - - if (ub->skb) { - kfree_skb(ub->skb); - ub->skb = NULL; - } - } - netlink_kernel_release(ebt->ebtulognl); -} - -static struct pernet_operations ebt_ulog_net_ops = { - .init = ebt_ulog_net_init, - .exit = ebt_ulog_net_fini, - .id = &ebt_ulog_net_id, - .size = sizeof(struct ebt_ulog_net), -}; - -static int __init ebt_ulog_init(void) -{ - int ret; - - if (nlbufsiz >= 128*1024) { - pr_warn("Netlink buffer has to be <= 128kB," - "please try a smaller nlbufsiz parameter.\n"); - return -EINVAL; - } - - ret = register_pernet_subsys(&ebt_ulog_net_ops); - if (ret) - goto out_pernet; - - ret = xt_register_target(&ebt_ulog_tg_reg); - if (ret) - goto out_target; - - nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger); - - return 0; - -out_target: - unregister_pernet_subsys(&ebt_ulog_net_ops); -out_pernet: - return ret; -} - -static void __exit ebt_ulog_fini(void) -{ - nf_log_unregister(&ebt_ulog_logger); - xt_unregister_target(&ebt_ulog_tg_reg); - unregister_pernet_subsys(&ebt_ulog_net_ops); -} - -module_init(ebt_ulog_init); -module_exit(ebt_ulog_fini); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); -MODULE_DESCRIPTION("Ebtables: Packet logging to netlink using ULOG"); diff --git a/net/bridge/netfilter/nf_log_bridge.c b/net/bridge/netfilter/nf_log_bridge.c new file mode 100644 index 000000000000..5d9953a90929 --- /dev/null +++ b/net/bridge/netfilter/nf_log_bridge.c @@ -0,0 +1,96 @@ +/* + * (C) 2014 by Pablo Neira Ayuso <pablo@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/skbuff.h> +#include <linux/if_bridge.h> +#include <linux/ip.h> +#include <net/route.h> + +#include <linux/netfilter.h> +#include <net/netfilter/nf_log.h> + +static void nf_log_bridge_packet(struct net *net, u_int8_t pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) +{ + switch (eth_hdr(skb)->h_proto) { + case htons(ETH_P_IP): + nf_log_packet(net, NFPROTO_IPV4, hooknum, skb, in, out, + loginfo, "%s", prefix); + break; + case htons(ETH_P_IPV6): + nf_log_packet(net, NFPROTO_IPV6, hooknum, skb, in, out, + loginfo, "%s", prefix); + break; + case htons(ETH_P_ARP): + case htons(ETH_P_RARP): + nf_log_packet(net, NFPROTO_ARP, hooknum, skb, in, out, + loginfo, "%s", prefix); + break; + } +} + +static struct nf_logger nf_bridge_logger __read_mostly = { + .name = "nf_log_bridge", + .type = NF_LOG_TYPE_LOG, + .logfn = nf_log_bridge_packet, + .me = THIS_MODULE, +}; + +static int __net_init nf_log_bridge_net_init(struct net *net) +{ + nf_log_set(net, NFPROTO_BRIDGE, &nf_bridge_logger); + return 0; +} + +static void __net_exit nf_log_bridge_net_exit(struct net *net) +{ + nf_log_unset(net, &nf_bridge_logger); +} + +static struct pernet_operations nf_log_bridge_net_ops = { + .init = nf_log_bridge_net_init, + .exit = nf_log_bridge_net_exit, +}; + +static int __init nf_log_bridge_init(void) +{ + int ret; + + /* Request to load the real packet loggers. */ + nf_logger_request_module(NFPROTO_IPV4, NF_LOG_TYPE_LOG); + nf_logger_request_module(NFPROTO_IPV6, NF_LOG_TYPE_LOG); + nf_logger_request_module(NFPROTO_ARP, NF_LOG_TYPE_LOG); + + ret = register_pernet_subsys(&nf_log_bridge_net_ops); + if (ret < 0) + return ret; + + nf_log_register(NFPROTO_BRIDGE, &nf_bridge_logger); + return 0; +} + +static void __exit nf_log_bridge_exit(void) +{ + unregister_pernet_subsys(&nf_log_bridge_net_ops); + nf_log_unregister(&nf_bridge_logger); +} + +module_init(nf_log_bridge_init); +module_exit(nf_log_bridge_exit); + +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_DESCRIPTION("Netfilter bridge packet logging"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NF_LOGGER(AF_BRIDGE, 0); diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c new file mode 100644 index 000000000000..4f02109d708f --- /dev/null +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2014 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nft_meta.h> + +#include "../br_private.h" + +static void nft_meta_bridge_get_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_meta *priv = nft_expr_priv(expr); + const struct net_device *in = pkt->in, *out = pkt->out; + struct nft_data *dest = &data[priv->dreg]; + const struct net_bridge_port *p; + + switch (priv->key) { + case NFT_META_BRI_IIFNAME: + if (in == NULL || (p = br_port_get_rcu(in)) == NULL) + goto err; + break; + case NFT_META_BRI_OIFNAME: + if (out == NULL || (p = br_port_get_rcu(out)) == NULL) + goto err; + break; + default: + goto out; + } + + strncpy((char *)dest->data, p->br->dev->name, sizeof(dest->data)); + return; +out: + return nft_meta_get_eval(expr, data, pkt); +err: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static int nft_meta_bridge_get_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_meta *priv = nft_expr_priv(expr); + int err; + + priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); + switch (priv->key) { + case NFT_META_BRI_IIFNAME: + case NFT_META_BRI_OIFNAME: + break; + default: + return nft_meta_get_init(ctx, expr, tb); + } + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + + err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + if (err < 0) + return err; + + return 0; +} + +static struct nft_expr_type nft_meta_bridge_type; +static const struct nft_expr_ops nft_meta_bridge_get_ops = { + .type = &nft_meta_bridge_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), + .eval = nft_meta_bridge_get_eval, + .init = nft_meta_bridge_get_init, + .dump = nft_meta_get_dump, +}; + +static const struct nft_expr_ops nft_meta_bridge_set_ops = { + .type = &nft_meta_bridge_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), + .eval = nft_meta_set_eval, + .init = nft_meta_set_init, + .dump = nft_meta_set_dump, +}; + +static const struct nft_expr_ops * +nft_meta_bridge_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + if (tb[NFTA_META_KEY] == NULL) + return ERR_PTR(-EINVAL); + + if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG]) + return ERR_PTR(-EINVAL); + + if (tb[NFTA_META_DREG]) + return &nft_meta_bridge_get_ops; + + if (tb[NFTA_META_SREG]) + return &nft_meta_bridge_set_ops; + + return ERR_PTR(-EINVAL); +} + +static struct nft_expr_type nft_meta_bridge_type __read_mostly = { + .family = NFPROTO_BRIDGE, + .name = "meta", + .select_ops = &nft_meta_bridge_select_ops, + .policy = nft_meta_policy, + .maxattr = NFTA_META_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_meta_bridge_module_init(void) +{ + return nft_register_expr(&nft_meta_bridge_type); +} + +static void __exit nft_meta_bridge_module_exit(void) +{ + nft_unregister_expr(&nft_meta_bridge_type); +} + +module_init(nft_meta_bridge_module_init); +module_exit(nft_meta_bridge_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>"); +MODULE_ALIAS_NFT_AF_EXPR(AF_BRIDGE, "meta"); diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c new file mode 100644 index 000000000000..ee3ffe93e14e --- /dev/null +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2014 Pablo Neira Ayuso <pablo@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nft_reject.h> + +static void nft_reject_bridge_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + switch (eth_hdr(pkt->skb)->h_proto) { + case htons(ETH_P_IP): + return nft_reject_ipv4_eval(expr, data, pkt); + case htons(ETH_P_IPV6): + return nft_reject_ipv6_eval(expr, data, pkt); + default: + /* No explicit way to reject this protocol, drop it. */ + data[NFT_REG_VERDICT].verdict = NF_DROP; + break; + } +} + +static struct nft_expr_type nft_reject_bridge_type; +static const struct nft_expr_ops nft_reject_bridge_ops = { + .type = &nft_reject_bridge_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), + .eval = nft_reject_bridge_eval, + .init = nft_reject_init, + .dump = nft_reject_dump, +}; + +static struct nft_expr_type nft_reject_bridge_type __read_mostly = { + .family = NFPROTO_BRIDGE, + .name = "reject", + .ops = &nft_reject_bridge_ops, + .policy = nft_reject_policy, + .maxattr = NFTA_REJECT_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_reject_bridge_module_init(void) +{ + return nft_register_expr(&nft_reject_bridge_type); +} + +static void __exit nft_reject_bridge_module_exit(void) +{ + nft_unregister_expr(&nft_reject_bridge_type); +} + +module_init(nft_reject_bridge_module_init); +module_exit(nft_reject_bridge_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_ALIAS_NFT_AF_EXPR(AF_BRIDGE, "reject"); diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index e8437094d15f..43f750e88e19 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -908,8 +908,7 @@ static int caif_release(struct socket *sock) sock->sk = NULL; WARN_ON(IS_ERR(cf_sk->debugfs_socket_dir)); - if (cf_sk->debugfs_socket_dir != NULL) - debugfs_remove_recursive(cf_sk->debugfs_socket_dir); + debugfs_remove_recursive(cf_sk->debugfs_socket_dir); lock_sock(&(cf_sk->sk)); sk->sk_state = CAIF_DISCONNECTED; diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c index 0f455227da83..f5afda1abc76 100644 --- a/net/caif/cfctrl.c +++ b/net/caif/cfctrl.c @@ -547,7 +547,6 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) default: pr_err("Unrecognized Control Frame\n"); goto error; - break; } ret = 0; error: diff --git a/net/can/af_can.c b/net/can/af_can.c index a27f8aad9e99..ce82337521f6 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -338,6 +338,29 @@ static struct dev_rcv_lists *find_dev_rcv_lists(struct net_device *dev) } /** + * effhash - hash function for 29 bit CAN identifier reduction + * @can_id: 29 bit CAN identifier + * + * Description: + * To reduce the linear traversal in one linked list of _single_ EFF CAN + * frame subscriptions the 29 bit identifier is mapped to 10 bits. + * (see CAN_EFF_RCV_HASH_BITS definition) + * + * Return: + * Hash value from 0x000 - 0x3FF ( enforced by CAN_EFF_RCV_HASH_BITS mask ) + */ +static unsigned int effhash(canid_t can_id) +{ + unsigned int hash; + + hash = can_id; + hash ^= can_id >> CAN_EFF_RCV_HASH_BITS; + hash ^= can_id >> (2 * CAN_EFF_RCV_HASH_BITS); + + return hash & ((1 << CAN_EFF_RCV_HASH_BITS) - 1); +} + +/** * find_rcv_list - determine optimal filterlist inside device filter struct * @can_id: pointer to CAN identifier of a given can_filter * @mask: pointer to CAN mask of a given can_filter @@ -400,10 +423,8 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, !(*can_id & CAN_RTR_FLAG)) { if (*can_id & CAN_EFF_FLAG) { - if (*mask == (CAN_EFF_MASK | CAN_EFF_RTR_FLAGS)) { - /* RFC: a future use-case for hash-tables? */ - return &d->rx[RX_EFF]; - } + if (*mask == (CAN_EFF_MASK | CAN_EFF_RTR_FLAGS)) + return &d->rx_eff[effhash(*can_id)]; } else { if (*mask == (CAN_SFF_MASK | CAN_EFF_RTR_FLAGS)) return &d->rx_sff[*can_id]; @@ -632,7 +653,7 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb) return matches; if (can_id & CAN_EFF_FLAG) { - hlist_for_each_entry_rcu(r, &d->rx[RX_EFF], list) { + hlist_for_each_entry_rcu(r, &d->rx_eff[effhash(can_id)], list) { if (r->can_id == can_id) { deliver(skb, r); matches++; diff --git a/net/can/af_can.h b/net/can/af_can.h index 6de58b40535c..fca0fe9fc45a 100644 --- a/net/can/af_can.h +++ b/net/can/af_can.h @@ -59,12 +59,17 @@ struct receiver { char *ident; }; -enum { RX_ERR, RX_ALL, RX_FIL, RX_INV, RX_EFF, RX_MAX }; +#define CAN_SFF_RCV_ARRAY_SZ (1 << CAN_SFF_ID_BITS) +#define CAN_EFF_RCV_HASH_BITS 10 +#define CAN_EFF_RCV_ARRAY_SZ (1 << CAN_EFF_RCV_HASH_BITS) + +enum { RX_ERR, RX_ALL, RX_FIL, RX_INV, RX_MAX }; /* per device receive filters linked at dev->ml_priv */ struct dev_rcv_lists { struct hlist_head rx[RX_MAX]; - struct hlist_head rx_sff[0x800]; + struct hlist_head rx_sff[CAN_SFF_RCV_ARRAY_SZ]; + struct hlist_head rx_eff[CAN_EFF_RCV_ARRAY_SZ]; int remove_on_zero_entries; int entries; }; diff --git a/net/can/gw.c b/net/can/gw.c index ac31891967da..050a2110d43f 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -804,7 +804,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh) u8 limhops = 0; int err = 0; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (nlmsg_len(nlh) < sizeof(*r)) @@ -893,7 +893,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh) u8 limhops = 0; int err = 0; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (nlmsg_len(nlh) < sizeof(*r)) diff --git a/net/can/proc.c b/net/can/proc.c index b543470c8f8b..1a19b985a868 100644 --- a/net/can/proc.c +++ b/net/can/proc.c @@ -80,7 +80,6 @@ static const char rx_list_name[][8] = { [RX_ALL] = "rx_all", [RX_FIL] = "rx_fil", [RX_INV] = "rx_inv", - [RX_EFF] = "rx_eff", }; /* @@ -389,25 +388,26 @@ static const struct file_operations can_rcvlist_proc_fops = { .release = single_release, }; -static inline void can_rcvlist_sff_proc_show_one(struct seq_file *m, - struct net_device *dev, - struct dev_rcv_lists *d) +static inline void can_rcvlist_proc_show_array(struct seq_file *m, + struct net_device *dev, + struct hlist_head *rcv_array, + unsigned int rcv_array_sz) { - int i; + unsigned int i; int all_empty = 1; /* check whether at least one list is non-empty */ - for (i = 0; i < 0x800; i++) - if (!hlist_empty(&d->rx_sff[i])) { + for (i = 0; i < rcv_array_sz; i++) + if (!hlist_empty(&rcv_array[i])) { all_empty = 0; break; } if (!all_empty) { can_print_recv_banner(m); - for (i = 0; i < 0x800; i++) { - if (!hlist_empty(&d->rx_sff[i])) - can_print_rcvlist(m, &d->rx_sff[i], dev); + for (i = 0; i < rcv_array_sz; i++) { + if (!hlist_empty(&rcv_array[i])) + can_print_rcvlist(m, &rcv_array[i], dev); } } else seq_printf(m, " (%s: no entry)\n", DNAME(dev)); @@ -425,12 +425,15 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v) /* sff receive list for 'all' CAN devices (dev == NULL) */ d = &can_rx_alldev_list; - can_rcvlist_sff_proc_show_one(m, NULL, d); + can_rcvlist_proc_show_array(m, NULL, d->rx_sff, ARRAY_SIZE(d->rx_sff)); /* sff receive list for registered CAN devices */ for_each_netdev_rcu(&init_net, dev) { - if (dev->type == ARPHRD_CAN && dev->ml_priv) - can_rcvlist_sff_proc_show_one(m, dev, dev->ml_priv); + if (dev->type == ARPHRD_CAN && dev->ml_priv) { + d = dev->ml_priv; + can_rcvlist_proc_show_array(m, dev, d->rx_sff, + ARRAY_SIZE(d->rx_sff)); + } } rcu_read_unlock(); @@ -452,6 +455,49 @@ static const struct file_operations can_rcvlist_sff_proc_fops = { .release = single_release, }; + +static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v) +{ + struct net_device *dev; + struct dev_rcv_lists *d; + + /* RX_EFF */ + seq_puts(m, "\nreceive list 'rx_eff':\n"); + + rcu_read_lock(); + + /* eff receive list for 'all' CAN devices (dev == NULL) */ + d = &can_rx_alldev_list; + can_rcvlist_proc_show_array(m, NULL, d->rx_eff, ARRAY_SIZE(d->rx_eff)); + + /* eff receive list for registered CAN devices */ + for_each_netdev_rcu(&init_net, dev) { + if (dev->type == ARPHRD_CAN && dev->ml_priv) { + d = dev->ml_priv; + can_rcvlist_proc_show_array(m, dev, d->rx_eff, + ARRAY_SIZE(d->rx_eff)); + } + } + + rcu_read_unlock(); + + seq_putc(m, '\n'); + return 0; +} + +static int can_rcvlist_eff_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, can_rcvlist_eff_proc_show, NULL); +} + +static const struct file_operations can_rcvlist_eff_proc_fops = { + .owner = THIS_MODULE, + .open = can_rcvlist_eff_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + /* * proc utility functions */ @@ -491,8 +537,8 @@ void can_init_proc(void) &can_rcvlist_proc_fops, (void *)RX_FIL); pde_rcvlist_inv = proc_create_data(CAN_PROC_RCVLIST_INV, 0644, can_dir, &can_rcvlist_proc_fops, (void *)RX_INV); - pde_rcvlist_eff = proc_create_data(CAN_PROC_RCVLIST_EFF, 0644, can_dir, - &can_rcvlist_proc_fops, (void *)RX_EFF); + pde_rcvlist_eff = proc_create(CAN_PROC_RCVLIST_EFF, 0644, can_dir, + &can_rcvlist_eff_proc_fops); pde_rcvlist_sff = proc_create(CAN_PROC_RCVLIST_SFF, 0644, can_dir, &can_rcvlist_sff_proc_fops); } diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 67d7721d237e..1675021d8c12 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -72,6 +72,8 @@ const char *ceph_msg_type_name(int type) case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack"; case CEPH_MSG_STATFS: return "statfs"; case CEPH_MSG_STATFS_REPLY: return "statfs_reply"; + case CEPH_MSG_MON_GET_VERSION: return "mon_get_version"; + case CEPH_MSG_MON_GET_VERSION_REPLY: return "mon_get_version_reply"; case CEPH_MSG_MDS_MAP: return "mds_map"; case CEPH_MSG_CLIENT_SESSION: return "client_session"; case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect"; diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 6e7a236525b6..ffeba8f9dda9 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -8,6 +8,7 @@ #include <linux/key-type.h> #include <keys/ceph-type.h> +#include <keys/user-type.h> #include <linux/ceph/decode.h> #include "crypto.h" @@ -423,8 +424,7 @@ int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len, } } -static int ceph_key_instantiate(struct key *key, - struct key_preparsed_payload *prep) +static int ceph_key_preparse(struct key_preparsed_payload *prep) { struct ceph_crypto_key *ckey; size_t datalen = prep->datalen; @@ -435,10 +435,6 @@ static int ceph_key_instantiate(struct key *key, if (datalen <= 0 || datalen > 32767 || !prep->data) goto err; - ret = key_payload_reserve(key, datalen); - if (ret < 0) - goto err; - ret = -ENOMEM; ckey = kmalloc(sizeof(*ckey), GFP_KERNEL); if (!ckey) @@ -450,7 +446,8 @@ static int ceph_key_instantiate(struct key *key, if (ret < 0) goto err_ckey; - key->payload.data = ckey; + prep->payload[0] = ckey; + prep->quotalen = datalen; return 0; err_ckey: @@ -459,12 +456,15 @@ err: return ret; } -static int ceph_key_match(const struct key *key, const void *description) +static void ceph_key_free_preparse(struct key_preparsed_payload *prep) { - return strcmp(key->description, description) == 0; + struct ceph_crypto_key *ckey = prep->payload[0]; + ceph_crypto_key_destroy(ckey); + kfree(ckey); } -static void ceph_key_destroy(struct key *key) { +static void ceph_key_destroy(struct key *key) +{ struct ceph_crypto_key *ckey = key->payload.data; ceph_crypto_key_destroy(ckey); @@ -473,8 +473,10 @@ static void ceph_key_destroy(struct key *key) { struct key_type key_type_ceph = { .name = "ceph", - .instantiate = ceph_key_instantiate, - .match = ceph_key_match, + .preparse = ceph_key_preparse, + .free_preparse = ceph_key_free_preparse, + .instantiate = generic_key_instantiate, + .match = user_match, .destroy = ceph_key_destroy, }; diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 10421a4b76f8..d1a62c69a9f4 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -126,9 +126,13 @@ static int monc_show(struct seq_file *s, void *p) req = rb_entry(rp, struct ceph_mon_generic_request, node); op = le16_to_cpu(req->request->hdr.type); if (op == CEPH_MSG_STATFS) - seq_printf(s, "%lld statfs\n", req->tid); + seq_printf(s, "%llu statfs\n", req->tid); + else if (op == CEPH_MSG_POOLOP) + seq_printf(s, "%llu poolop\n", req->tid); + else if (op == CEPH_MSG_MON_GET_VERSION) + seq_printf(s, "%llu mon_get_version", req->tid); else - seq_printf(s, "%lld unknown\n", req->tid); + seq_printf(s, "%llu unknown\n", req->tid); } mutex_unlock(&monc->mutex); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index dac7f9b98687..1948d592aa54 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -557,7 +557,7 @@ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov, return r; } -static int ceph_tcp_sendpage(struct socket *sock, struct page *page, +static int __ceph_tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, bool more) { int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : MSG_EOR); @@ -570,6 +570,24 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page, return ret; } +static int ceph_tcp_sendpage(struct socket *sock, struct page *page, + int offset, size_t size, bool more) +{ + int ret; + struct kvec iov; + + /* sendpage cannot properly handle pages with page_count == 0, + * we need to fallback to sendmsg if that's the case */ + if (page_count(page) >= 1) + return __ceph_tcp_sendpage(sock, page, offset, size, more); + + iov.iov_base = kmap(page) + offset; + iov.iov_len = size; + ret = ceph_tcp_sendmsg(sock, &iov, 1, size, more); + kunmap(page); + + return ret; +} /* * Shutdown/close the socket for the given connection. diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 2ac9ef35110b..067d3af2eaf6 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -296,6 +296,33 @@ void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc) __send_subscribe(monc); mutex_unlock(&monc->mutex); } +EXPORT_SYMBOL(ceph_monc_request_next_osdmap); + +int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch, + unsigned long timeout) +{ + unsigned long started = jiffies; + int ret; + + mutex_lock(&monc->mutex); + while (monc->have_osdmap < epoch) { + mutex_unlock(&monc->mutex); + + if (timeout != 0 && time_after_eq(jiffies, started + timeout)) + return -ETIMEDOUT; + + ret = wait_event_interruptible_timeout(monc->client->auth_wq, + monc->have_osdmap >= epoch, timeout); + if (ret < 0) + return ret; + + mutex_lock(&monc->mutex); + } + + mutex_unlock(&monc->mutex); + return 0; +} +EXPORT_SYMBOL(ceph_monc_wait_osdmap); /* * @@ -477,14 +504,13 @@ static struct ceph_msg *get_generic_reply(struct ceph_connection *con, return m; } -static int do_generic_request(struct ceph_mon_client *monc, - struct ceph_mon_generic_request *req) +static int __do_generic_request(struct ceph_mon_client *monc, u64 tid, + struct ceph_mon_generic_request *req) { int err; /* register request */ - mutex_lock(&monc->mutex); - req->tid = ++monc->last_tid; + req->tid = tid != 0 ? tid : ++monc->last_tid; req->request->hdr.tid = cpu_to_le64(req->tid); __insert_generic_request(monc, req); monc->num_generic_requests++; @@ -496,13 +522,24 @@ static int do_generic_request(struct ceph_mon_client *monc, mutex_lock(&monc->mutex); rb_erase(&req->node, &monc->generic_request_tree); monc->num_generic_requests--; - mutex_unlock(&monc->mutex); if (!err) err = req->result; return err; } +static int do_generic_request(struct ceph_mon_client *monc, + struct ceph_mon_generic_request *req) +{ + int err; + + mutex_lock(&monc->mutex); + err = __do_generic_request(monc, 0, req); + mutex_unlock(&monc->mutex); + + return err; +} + /* * statfs */ @@ -579,6 +616,96 @@ out: } EXPORT_SYMBOL(ceph_monc_do_statfs); +static void handle_get_version_reply(struct ceph_mon_client *monc, + struct ceph_msg *msg) +{ + struct ceph_mon_generic_request *req; + u64 tid = le64_to_cpu(msg->hdr.tid); + void *p = msg->front.iov_base; + void *end = p + msg->front_alloc_len; + u64 handle; + + dout("%s %p tid %llu\n", __func__, msg, tid); + + ceph_decode_need(&p, end, 2*sizeof(u64), bad); + handle = ceph_decode_64(&p); + if (tid != 0 && tid != handle) + goto bad; + + mutex_lock(&monc->mutex); + req = __lookup_generic_req(monc, handle); + if (req) { + *(u64 *)req->buf = ceph_decode_64(&p); + req->result = 0; + get_generic_request(req); + } + mutex_unlock(&monc->mutex); + if (req) { + complete_all(&req->completion); + put_generic_request(req); + } + + return; +bad: + pr_err("corrupt mon_get_version reply\n"); + ceph_msg_dump(msg); +} + +/* + * Send MMonGetVersion and wait for the reply. + * + * @what: one of "mdsmap", "osdmap" or "monmap" + */ +int ceph_monc_do_get_version(struct ceph_mon_client *monc, const char *what, + u64 *newest) +{ + struct ceph_mon_generic_request *req; + void *p, *end; + u64 tid; + int err; + + req = kzalloc(sizeof(*req), GFP_NOFS); + if (!req) + return -ENOMEM; + + kref_init(&req->kref); + req->buf = newest; + req->buf_len = sizeof(*newest); + init_completion(&req->completion); + + req->request = ceph_msg_new(CEPH_MSG_MON_GET_VERSION, + sizeof(u64) + sizeof(u32) + strlen(what), + GFP_NOFS, true); + if (!req->request) { + err = -ENOMEM; + goto out; + } + + req->reply = ceph_msg_new(CEPH_MSG_MON_GET_VERSION_REPLY, 1024, + GFP_NOFS, true); + if (!req->reply) { + err = -ENOMEM; + goto out; + } + + p = req->request->front.iov_base; + end = p + req->request->front_alloc_len; + + /* fill out request */ + mutex_lock(&monc->mutex); + tid = ++monc->last_tid; + ceph_encode_64(&p, tid); /* handle */ + ceph_encode_string(&p, end, what, strlen(what)); + + err = __do_generic_request(monc, tid, req); + + mutex_unlock(&monc->mutex); +out: + kref_put(&req->kref, release_generic_request); + return err; +} +EXPORT_SYMBOL(ceph_monc_do_get_version); + /* * pool ops */ @@ -981,6 +1108,10 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) handle_statfs_reply(monc, msg); break; + case CEPH_MSG_MON_GET_VERSION_REPLY: + handle_get_version_reply(monc, msg); + break; + case CEPH_MSG_POOLOP_REPLY: handle_poolop_reply(monc, msg); break; @@ -1029,6 +1160,15 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, case CEPH_MSG_AUTH_REPLY: m = ceph_msg_get(monc->m_auth_reply); break; + case CEPH_MSG_MON_GET_VERSION_REPLY: + if (le64_to_cpu(hdr->tid) != 0) + return get_generic_reply(con, hdr, skip); + + /* + * Older OSDs don't set reply tid even if the orignal + * request had a non-zero tid. Workaround this weirdness + * by falling through to the allocate case. + */ case CEPH_MSG_MON_MAP: case CEPH_MSG_MDS_MAP: case CEPH_MSG_OSD_MAP: diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index b0dfce77656a..05be0c181695 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -2491,7 +2491,7 @@ EXPORT_SYMBOL(ceph_osdc_sync); * Call all pending notify callbacks - for use after a watch is * unregistered, to make sure no more callbacks for it will be invoked */ -extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc) +void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc) { flush_workqueue(osdc->notify_wq); } diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index e632b5a52f5b..c547e46084d3 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -329,6 +329,11 @@ static struct crush_map *crush_decode(void *pbyval, void *end) dout("crush decode tunable chooseleaf_descend_once = %d", c->chooseleaf_descend_once); + ceph_decode_need(p, end, sizeof(u8), done); + c->chooseleaf_vary_r = ceph_decode_8(p); + dout("crush decode tunable chooseleaf_vary_r = %d", + c->chooseleaf_vary_r); + done: dout("crush_decode success\n"); return c; @@ -1548,8 +1553,10 @@ static void apply_primary_affinity(struct ceph_osdmap *osdmap, u32 pps, return; for (i = 0; i < len; i++) { - if (osds[i] != CRUSH_ITEM_NONE && - osdmap->osd_primary_affinity[i] != + int osd = osds[i]; + + if (osd != CRUSH_ITEM_NONE && + osdmap->osd_primary_affinity[osd] != CEPH_OSD_DEFAULT_PRIMARY_AFFINITY) { break; } @@ -1563,10 +1570,9 @@ static void apply_primary_affinity(struct ceph_osdmap *osdmap, u32 pps, * osd's pgs get rejected as primary. */ for (i = 0; i < len; i++) { - int osd; + int osd = osds[i]; u32 aff; - osd = osds[i]; if (osd == CRUSH_ITEM_NONE) continue; diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index 815a2249cfa9..555013034f7a 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c @@ -53,7 +53,10 @@ void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty) set_page_dirty_lock(pages[i]); put_page(pages[i]); } - kfree(pages); + if (is_vmalloc_addr(pages)) + vfree(pages); + else + kfree(pages); } EXPORT_SYMBOL(ceph_put_page_vector); @@ -165,36 +168,6 @@ void ceph_copy_from_page_vector(struct page **pages, EXPORT_SYMBOL(ceph_copy_from_page_vector); /* - * copy user data from a page vector into a user pointer - */ -int ceph_copy_page_vector_to_user(struct page **pages, - void __user *data, - loff_t off, size_t len) -{ - int i = 0; - int po = off & ~PAGE_CACHE_MASK; - int left = len; - int l, bad; - - while (left > 0) { - l = min_t(int, left, PAGE_CACHE_SIZE-po); - bad = copy_to_user(data, page_address(pages[i]) + po, l); - if (bad == l) - return -EFAULT; - data += l - bad; - left -= l - bad; - if (po) { - po += l - bad; - if (po == PAGE_CACHE_SIZE) - po = 0; - } - i++; - } - return len; -} -EXPORT_SYMBOL(ceph_copy_page_vector_to_user); - -/* * Zero an extent within a page vector. Offset is relative to the * start of the first page. */ diff --git a/net/compat.c b/net/compat.c index 9a76eaf63184..bc8aeefddf3f 100644 --- a/net/compat.c +++ b/net/compat.c @@ -85,7 +85,7 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov, { int tot_len; - if (kern_msg->msg_namelen) { + if (kern_msg->msg_name && kern_msg->msg_namelen) { if (mode == VERIFY_READ) { int err = move_addr_to_kernel(kern_msg->msg_name, kern_msg->msg_namelen, @@ -93,10 +93,11 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov, if (err < 0) return err; } - if (kern_msg->msg_name) - kern_msg->msg_name = kern_address; - } else + kern_msg->msg_name = kern_address; + } else { kern_msg->msg_name = NULL; + kern_msg->msg_namelen = 0; + } tot_len = iov_from_user_compat_to_kern(kern_iov, (struct compat_iovec __user *)kern_msg->msg_iov, diff --git a/net/core/Makefile b/net/core/Makefile index 826b925aa453..71093d94ad2b 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ - sock_diag.o dev_ioctl.o + sock_diag.o dev_ioctl.o tso.o obj-$(CONFIG_XFRM) += flow.o obj-y += net-sysfs.o diff --git a/net/core/datagram.c b/net/core/datagram.c index a16ed7bbe376..488dd1a825c0 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -740,17 +740,37 @@ __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len) sum = csum_fold(skb_checksum(skb, 0, len, skb->csum)); if (likely(!sum)) { - if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && + !skb->csum_complete_sw) netdev_rx_csum_fault(skb->dev); - skb->ip_summed = CHECKSUM_UNNECESSARY; } + skb->csum_valid = !sum; return sum; } EXPORT_SYMBOL(__skb_checksum_complete_head); __sum16 __skb_checksum_complete(struct sk_buff *skb) { - return __skb_checksum_complete_head(skb, skb->len); + __wsum csum; + __sum16 sum; + + csum = skb_checksum(skb, 0, skb->len, 0); + + /* skb->csum holds pseudo checksum */ + sum = csum_fold(csum_add(skb->csum, csum)); + if (likely(!sum)) { + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && + !skb->csum_complete_sw) + netdev_rx_csum_fault(skb->dev); + } + + /* Save full packet checksum */ + skb->csum = csum; + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum_complete_sw = 1; + skb->csum_valid = !sum; + + return sum; } EXPORT_SYMBOL(__skb_checksum_complete); diff --git a/net/core/dev.c b/net/core/dev.c index d2c8a06b3a98..1c15b189c52b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -132,6 +132,7 @@ #include <linux/hashtable.h> #include <linux/vmalloc.h> #include <linux/if_macvlan.h> +#include <linux/errqueue.h> #include "net-sysfs.h" @@ -148,6 +149,9 @@ struct list_head ptype_all __read_mostly; /* Taps */ static struct list_head offload_base __read_mostly; static int netif_rx_internal(struct sk_buff *skb); +static int call_netdevice_notifiers_info(unsigned long val, + struct net_device *dev, + struct netdev_notifier_info *info); /* * The @dev_base_head list is protected by @dev_base_lock and the rtnl @@ -1082,6 +1086,7 @@ static int dev_get_valid_name(struct net *net, */ int dev_change_name(struct net_device *dev, const char *newname) { + unsigned char old_assign_type; char oldname[IFNAMSIZ]; int err = 0; int ret; @@ -1109,10 +1114,17 @@ int dev_change_name(struct net_device *dev, const char *newname) return err; } + if (oldname[0] && !strchr(oldname, '%')) + netdev_info(dev, "renamed from %s\n", oldname); + + old_assign_type = dev->name_assign_type; + dev->name_assign_type = NET_NAME_RENAMED; + rollback: ret = device_rename(&dev->dev, dev->name); if (ret) { memcpy(dev->name, oldname, IFNAMSIZ); + dev->name_assign_type = old_assign_type; write_seqcount_end(&devnet_rename_seq); return ret; } @@ -1141,6 +1153,8 @@ rollback: write_seqcount_begin(&devnet_rename_seq); memcpy(dev->name, oldname, IFNAMSIZ); memcpy(oldname, newname, IFNAMSIZ); + dev->name_assign_type = old_assign_type; + old_assign_type = NET_NAME_RENAMED; goto rollback; } else { pr_err("%s: name change rollback failed: %d\n", @@ -1207,7 +1221,11 @@ EXPORT_SYMBOL(netdev_features_change); void netdev_state_change(struct net_device *dev) { if (dev->flags & IFF_UP) { - call_netdevice_notifiers(NETDEV_CHANGE, dev); + struct netdev_notifier_change_info change_info; + + change_info.flags_changed = 0; + call_netdevice_notifiers_info(NETDEV_CHANGE, dev, + &change_info.info); rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL); } } @@ -1326,7 +1344,7 @@ static int __dev_close_many(struct list_head *head) * dev->stop() will invoke napi_disable() on all of it's * napi_struct instances on this device. */ - smp_mb__after_clear_bit(); /* Commit netif_running(). */ + smp_mb__after_atomic(); /* Commit netif_running(). */ } dev_deactivate_many(head); @@ -1661,6 +1679,29 @@ bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb) } EXPORT_SYMBOL_GPL(is_skb_forwardable); +int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb) +{ + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { + if (skb_copy_ubufs(skb, GFP_ATOMIC)) { + atomic_long_inc(&dev->rx_dropped); + kfree_skb(skb); + return NET_RX_DROP; + } + } + + if (unlikely(!is_skb_forwardable(dev, skb))) { + atomic_long_inc(&dev->rx_dropped); + kfree_skb(skb); + return NET_RX_DROP; + } + + skb_scrub_packet(skb, true); + skb->protocol = eth_type_trans(skb, dev); + + return 0; +} +EXPORT_SYMBOL_GPL(__dev_forward_skb); + /** * dev_forward_skb - loopback an skb to another netif * @@ -1681,24 +1722,7 @@ EXPORT_SYMBOL_GPL(is_skb_forwardable); */ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) { - if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { - if (skb_copy_ubufs(skb, GFP_ATOMIC)) { - atomic_long_inc(&dev->rx_dropped); - kfree_skb(skb); - return NET_RX_DROP; - } - } - - if (unlikely(!is_skb_forwardable(dev, skb))) { - atomic_long_inc(&dev->rx_dropped); - kfree_skb(skb); - return NET_RX_DROP; - } - - skb_scrub_packet(skb, true); - skb->protocol = eth_type_trans(skb, dev); - - return netif_rx_internal(skb); + return __dev_forward_skb(dev, skb) ?: netif_rx_internal(skb); } EXPORT_SYMBOL_GPL(dev_forward_skb); @@ -2283,8 +2307,8 @@ EXPORT_SYMBOL(skb_checksum_help); __be16 skb_network_protocol(struct sk_buff *skb, int *depth) { + unsigned int vlan_depth = skb->mac_len; __be16 type = skb->protocol; - int vlan_depth = skb->mac_len; /* Tunnel gso handlers can set protocol to ethernet. */ if (type == htons(ETH_P_TEB)) { @@ -2297,15 +2321,30 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth) type = eth->h_proto; } - while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) { - struct vlan_hdr *vh; + /* if skb->protocol is 802.1Q/AD then the header should already be + * present at mac_len - VLAN_HLEN (if mac_len > 0), or at + * ETH_HLEN otherwise + */ + if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) { + if (vlan_depth) { + if (WARN_ON(vlan_depth < VLAN_HLEN)) + return 0; + vlan_depth -= VLAN_HLEN; + } else { + vlan_depth = ETH_HLEN; + } + do { + struct vlan_hdr *vh; - if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) - return 0; + if (unlikely(!pskb_may_pull(skb, + vlan_depth + VLAN_HLEN))) + return 0; - vh = (struct vlan_hdr *)(skb->data + vlan_depth); - type = vh->h_vlan_encapsulated_proto; - vlan_depth += VLAN_HLEN; + vh = (struct vlan_hdr *)(skb->data + vlan_depth); + type = vh->h_vlan_encapsulated_proto; + vlan_depth += VLAN_HLEN; + } while (type == htons(ETH_P_8021Q) || + type == htons(ETH_P_8021AD)); } *depth = vlan_depth; @@ -2386,8 +2425,8 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, skb_warn_bad_offload(skb); - if (skb_header_cloned(skb) && - (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + err = skb_cow_head(skb, 0); + if (err < 0) return ERR_PTR(err); } @@ -2418,7 +2457,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault); * 2. No high memory really exists on this machine. */ -static int illegal_highdma(const struct net_device *dev, struct sk_buff *skb) +static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) { #ifdef CONFIG_HIGHMEM int i; @@ -2492,39 +2531,63 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) return 0; } +/* If MPLS offload request, verify we are testing hardware MPLS features + * instead of standard features for the netdev. + */ +#ifdef CONFIG_NET_MPLS_GSO +static netdev_features_t net_mpls_features(struct sk_buff *skb, + netdev_features_t features, + __be16 type) +{ + if (type == htons(ETH_P_MPLS_UC) || type == htons(ETH_P_MPLS_MC)) + features &= skb->dev->mpls_features; + + return features; +} +#else +static netdev_features_t net_mpls_features(struct sk_buff *skb, + netdev_features_t features, + __be16 type) +{ + return features; +} +#endif + static netdev_features_t harmonize_features(struct sk_buff *skb, - const struct net_device *dev, - netdev_features_t features) + netdev_features_t features) { int tmp; + __be16 type; + + type = skb_network_protocol(skb, &tmp); + features = net_mpls_features(skb, features, type); if (skb->ip_summed != CHECKSUM_NONE && - !can_checksum_protocol(features, skb_network_protocol(skb, &tmp))) { + !can_checksum_protocol(features, type)) { features &= ~NETIF_F_ALL_CSUM; - } else if (illegal_highdma(dev, skb)) { + } else if (illegal_highdma(skb->dev, skb)) { features &= ~NETIF_F_SG; } return features; } -netdev_features_t netif_skb_dev_features(struct sk_buff *skb, - const struct net_device *dev) +netdev_features_t netif_skb_features(struct sk_buff *skb) { __be16 protocol = skb->protocol; - netdev_features_t features = dev->features; + netdev_features_t features = skb->dev->features; - if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs) + if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs) features &= ~NETIF_F_GSO_MASK; if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; protocol = veh->h_vlan_encapsulated_proto; } else if (!vlan_tx_tag_present(skb)) { - return harmonize_features(skb, dev, features); + return harmonize_features(skb, features); } - features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | + features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) @@ -2532,9 +2595,9 @@ netdev_features_t netif_skb_dev_features(struct sk_buff *skb, NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; - return harmonize_features(skb, dev, features); + return harmonize_features(skb, features); } -EXPORT_SYMBOL(netif_skb_dev_features); +EXPORT_SYMBOL(netif_skb_features); int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) @@ -2693,8 +2756,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, /* * Heuristic to force contended enqueues to serialize on a * separate lock before trying to get qdisc main lock. - * This permits __QDISC_STATE_RUNNING owner to get the lock more often - * and dequeue packets faster. + * This permits __QDISC___STATE_RUNNING owner to get the lock more + * often and dequeue packets faster. */ contended = qdisc_is_running(q); if (unlikely(contended)) @@ -2814,6 +2877,9 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) skb_reset_mac_header(skb); + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP)) + __skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED); + /* Disable soft irqs for various locks below. Also * stops preemption for RCU. */ @@ -3343,7 +3409,7 @@ static void net_tx_action(struct softirq_action *h) root_lock = qdisc_lock(q); if (spin_trylock(root_lock)) { - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); clear_bit(__QDISC_STATE_SCHED, &q->state); qdisc_run(q); @@ -3353,7 +3419,7 @@ static void net_tx_action(struct softirq_action *h) &q->state)) { __netif_reschedule(q); } else { - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); clear_bit(__QDISC_STATE_SCHED, &q->state); } @@ -3953,6 +4019,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff } NAPI_GRO_CB(skb)->count = 1; NAPI_GRO_CB(skb)->age = jiffies; + NAPI_GRO_CB(skb)->last = skb; skb_shinfo(skb)->gso_size = skb_gro_len(skb); skb->next = napi->gro_list; napi->gro_list = skb; @@ -4043,6 +4110,8 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) skb->vlan_tci = 0; skb->dev = napi->dev; skb->skb_iif = 0; + skb->encapsulation = 0; + skb_shinfo(skb)->gso_type = 0; skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); napi->skb = skb; @@ -4181,9 +4250,8 @@ static int process_backlog(struct napi_struct *napi, int quota) #endif napi->weight = weight_p; local_irq_disable(); - while (work < quota) { + while (1) { struct sk_buff *skb; - unsigned int qlen; while ((skb = __skb_dequeue(&sd->process_queue))) { local_irq_enable(); @@ -4197,24 +4265,24 @@ static int process_backlog(struct napi_struct *napi, int quota) } rps_lock(sd); - qlen = skb_queue_len(&sd->input_pkt_queue); - if (qlen) - skb_queue_splice_tail_init(&sd->input_pkt_queue, - &sd->process_queue); - - if (qlen < quota - work) { + if (skb_queue_empty(&sd->input_pkt_queue)) { /* * Inline a custom version of __napi_complete(). * only current cpu owns and manipulates this napi, - * and NAPI_STATE_SCHED is the only possible flag set on backlog. - * we can use a plain write instead of clear_bit(), + * and NAPI_STATE_SCHED is the only possible flag set + * on backlog. + * We can use a plain write instead of clear_bit(), * and we dont need an smp_mb() memory barrier. */ list_del(&napi->poll_list); napi->state = 0; + rps_unlock(sd); - quota = work + qlen; + break; } + + skb_queue_splice_tail_init(&sd->input_pkt_queue, + &sd->process_queue); rps_unlock(sd); } local_irq_enable(); @@ -4244,7 +4312,7 @@ void __napi_complete(struct napi_struct *n) BUG_ON(n->gro_list); list_del(&n->poll_list); - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); clear_bit(NAPI_STATE_SCHED, &n->state); } EXPORT_SYMBOL(__napi_complete); @@ -4543,6 +4611,32 @@ void *netdev_adjacent_get_private(struct list_head *adj_list) EXPORT_SYMBOL(netdev_adjacent_get_private); /** + * netdev_upper_get_next_dev_rcu - Get the next dev from upper list + * @dev: device + * @iter: list_head ** of the current position + * + * Gets the next device from the dev's upper list, starting from iter + * position. The caller must hold RCU read lock. + */ +struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, + struct list_head **iter) +{ + struct netdev_adjacent *upper; + + WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held()); + + upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); + + if (&upper->list == &dev->adj_list.upper) + return NULL; + + *iter = &upper->list; + + return upper->dev; +} +EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu); + +/** * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list * @dev: device * @iter: list_head ** of the current position @@ -4624,6 +4718,32 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev, EXPORT_SYMBOL(netdev_lower_get_next_private_rcu); /** + * netdev_lower_get_next - Get the next device from the lower neighbour + * list + * @dev: device + * @iter: list_head ** of the current position + * + * Gets the next netdev_adjacent from the dev's lower neighbour + * list, starting from iter position. The caller must hold RTNL lock or + * its own locking that guarantees that the neighbour lower + * list will remain unchainged. + */ +void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter) +{ + struct netdev_adjacent *lower; + + lower = list_entry((*iter)->next, struct netdev_adjacent, list); + + if (&lower->list == &dev->adj_list.lower) + return NULL; + + *iter = &lower->list; + + return lower->dev; +} +EXPORT_SYMBOL(netdev_lower_get_next); + +/** * netdev_lower_get_first_private_rcu - Get the first ->private from the * lower neighbour list, RCU * variant @@ -5073,6 +5193,30 @@ void *netdev_lower_dev_get_private(struct net_device *dev, } EXPORT_SYMBOL(netdev_lower_dev_get_private); + +int dev_get_nest_level(struct net_device *dev, + bool (*type_check)(struct net_device *dev)) +{ + struct net_device *lower = NULL; + struct list_head *iter; + int max_nest = -1; + int nest; + + ASSERT_RTNL(); + + netdev_for_each_lower_dev(dev, lower, iter) { + nest = dev_get_nest_level(lower, type_check); + if (max_nest < nest) + max_nest = nest; + } + + if (type_check(dev)) + max_nest++; + + return max_nest; +} +EXPORT_SYMBOL(dev_get_nest_level); + static void dev_change_rx_flags(struct net_device *dev, int flags) { const struct net_device_ops *ops = dev->netdev_ops; @@ -5238,7 +5382,6 @@ void __dev_set_rx_mode(struct net_device *dev) if (ops->ndo_set_rx_mode) ops->ndo_set_rx_mode(dev); } -EXPORT_SYMBOL(__dev_set_rx_mode); void dev_set_rx_mode(struct net_device *dev) { @@ -5311,13 +5454,9 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags) */ ret = 0; - if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */ + if ((old_flags ^ flags) & IFF_UP) ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev); - if (!ret) - dev_set_rx_mode(dev); - } - if ((flags ^ dev->gflags) & IFF_PROMISC) { int inc = (flags & IFF_PROMISC) ? 1 : -1; unsigned int old_flags = dev->flags; @@ -5543,7 +5682,7 @@ static int dev_new_index(struct net *net) /* Delayed registration/unregisteration */ static LIST_HEAD(net_todo_list); -static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq); +DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq); static void net_set_todo(struct net_device *dev) { @@ -5600,10 +5739,6 @@ static void rollback_registered_many(struct list_head *head) */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); - if (!dev->rtnl_link_ops || - dev->rtnl_link_state == RTNL_LINK_INITIALIZED) - rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL); - /* * Flush the unicast and multicast chains */ @@ -5613,6 +5748,10 @@ static void rollback_registered_many(struct list_head *head) if (dev->netdev_ops->ndo_uninit) dev->netdev_ops->ndo_uninit(dev); + if (!dev->rtnl_link_ops || + dev->rtnl_link_state == RTNL_LINK_INITIALIZED) + rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL); + /* Notifier chain MUST detach us all upper devices. */ WARN_ON(netdev_has_any_upper_dev(dev)); @@ -5838,10 +5977,7 @@ static void netdev_init_one_queue(struct net_device *dev, static void netif_free_tx_queues(struct net_device *dev) { - if (is_vmalloc_addr(dev->_tx)) - vfree(dev->_tx); - else - kfree(dev->_tx); + kvfree(dev->_tx); } static int netif_alloc_netdev_queues(struct net_device *dev) @@ -6315,25 +6451,24 @@ void netdev_freemem(struct net_device *dev) { char *addr = (char *)dev - dev->padded; - if (is_vmalloc_addr(addr)) - vfree(addr); - else - kfree(addr); + kvfree(addr); } /** * alloc_netdev_mqs - allocate network device - * @sizeof_priv: size of private data to allocate space for - * @name: device name format string - * @setup: callback to initialize device - * @txqs: the number of TX subqueues to allocate - * @rxqs: the number of RX subqueues to allocate + * @sizeof_priv: size of private data to allocate space for + * @name: device name format string + * @name_assign_type: origin of device name + * @setup: callback to initialize device + * @txqs: the number of TX subqueues to allocate + * @rxqs: the number of RX subqueues to allocate * * Allocates a struct net_device with private data area for driver use * and performs basic initialization. Also allocates subqueue structs * for each queue on the device. */ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, + unsigned char name_assign_type, void (*setup)(struct net_device *), unsigned int txqs, unsigned int rxqs) { @@ -6412,6 +6547,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, #endif strcpy(dev->name, name); + dev->name_assign_type = name_assign_type; dev->group = INIT_NETDEV_GROUP; if (!dev->ethtool_ops) dev->ethtool_ops = &default_ethtool_ops; @@ -6423,11 +6559,6 @@ free_all: free_pcpu: free_percpu(dev->pcpu_refcnt); - netif_free_tx_queues(dev); -#ifdef CONFIG_SYSFS - kfree(dev->_rx); -#endif - free_dev: netdev_freemem(dev); return NULL; @@ -6524,6 +6655,9 @@ EXPORT_SYMBOL(unregister_netdevice_queue); /** * unregister_netdevice_many - unregister many devices * @head: list of devices + * + * Note: As most callers use a stack allocated list_head, + * we force a list_del() to make sure stack wont be corrupted later. */ void unregister_netdevice_many(struct list_head *head) { @@ -6533,6 +6667,7 @@ void unregister_netdevice_many(struct list_head *head) rollback_registered_many(head); list_for_each_entry(dev, head, unreg_list) net_set_todo(dev); + list_del(head); } } EXPORT_SYMBOL(unregister_netdevice_many); @@ -6824,12 +6959,14 @@ static int __netdev_printk(const char *level, const struct net_device *dev, if (dev && dev->dev.parent) { r = dev_printk_emit(level[1] - '0', dev->dev.parent, - "%s %s %s: %pV", + "%s %s %s%s: %pV", dev_driver_string(dev->dev.parent), dev_name(dev->dev.parent), - netdev_name(dev), vaf); + netdev_name(dev), netdev_reg_state(dev), + vaf); } else if (dev) { - r = printk("%s%s: %pV", level, netdev_name(dev), vaf); + r = printk("%s%s%s: %pV", level, netdev_name(dev), + netdev_reg_state(dev), vaf); } else { r = printk("%s(NULL net_device): %pV", level, vaf); } @@ -6981,14 +7118,13 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list) rtnl_lock_unregistering(net_list); list_for_each_entry(net, net_list, exit_list) { for_each_netdev_reverse(net, dev) { - if (dev->rtnl_link_ops) + if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) dev->rtnl_link_ops->dellink(dev, &dev_kill_list); else unregister_netdevice_queue(dev, &dev_kill_list); } } unregister_netdevice_many(&dev_kill_list); - list_del(&dev_kill_list); rtnl_unlock(); } diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 329d5794e7dc..b6b230600b97 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -225,6 +225,91 @@ void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, } EXPORT_SYMBOL(__hw_addr_unsync); +/** + * __hw_addr_sync_dev - Synchonize device's multicast list + * @list: address list to syncronize + * @dev: device to sync + * @sync: function to call if address should be added + * @unsync: function to call if address should be removed + * + * This funciton is intended to be called from the ndo_set_rx_mode + * function of devices that require explicit address add/remove + * notifications. The unsync function may be NULL in which case + * the addresses requiring removal will simply be removed without + * any notification to the device. + **/ +int __hw_addr_sync_dev(struct netdev_hw_addr_list *list, + struct net_device *dev, + int (*sync)(struct net_device *, const unsigned char *), + int (*unsync)(struct net_device *, + const unsigned char *)) +{ + struct netdev_hw_addr *ha, *tmp; + int err; + + /* first go through and flush out any stale entries */ + list_for_each_entry_safe(ha, tmp, &list->list, list) { + if (!ha->sync_cnt || ha->refcount != 1) + continue; + + /* if unsync is defined and fails defer unsyncing address */ + if (unsync && unsync(dev, ha->addr)) + continue; + + ha->sync_cnt--; + __hw_addr_del_entry(list, ha, false, false); + } + + /* go through and sync new entries to the list */ + list_for_each_entry_safe(ha, tmp, &list->list, list) { + if (ha->sync_cnt) + continue; + + err = sync(dev, ha->addr); + if (err) + return err; + + ha->sync_cnt++; + ha->refcount++; + } + + return 0; +} +EXPORT_SYMBOL(__hw_addr_sync_dev); + +/** + * __hw_addr_unsync_dev - Remove synchonized addresses from device + * @list: address list to remove syncronized addresses from + * @dev: device to sync + * @unsync: function to call if address should be removed + * + * Remove all addresses that were added to the device by __hw_addr_sync_dev(). + * This function is intended to be called from the ndo_stop or ndo_open + * functions on devices that require explicit address add/remove + * notifications. If the unsync function pointer is NULL then this function + * can be used to just reset the sync_cnt for the addresses in the list. + **/ +void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list, + struct net_device *dev, + int (*unsync)(struct net_device *, + const unsigned char *)) +{ + struct netdev_hw_addr *ha, *tmp; + + list_for_each_entry_safe(ha, tmp, &list->list, list) { + if (!ha->sync_cnt) + continue; + + /* if unsync is defined and fails defer unsyncing address */ + if (unsync && unsync(dev, ha->addr)) + continue; + + ha->sync_cnt--; + __hw_addr_del_entry(list, ha, false, false); + } +} +EXPORT_SYMBOL(__hw_addr_unsync_dev); + static void __hw_addr_flush(struct netdev_hw_addr_list *list) { struct netdev_hw_addr *ha, *tmp; diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index e70301eb7a4a..50f9a9db5792 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -289,10 +289,8 @@ static int net_dm_cmd_trace(struct sk_buff *skb, switch (info->genlhdr->cmd) { case NET_DM_CMD_START: return set_all_monitor_traces(TRACE_ON); - break; case NET_DM_CMD_STOP: return set_all_monitor_traces(TRACE_OFF); - break; } return -ENOTSUPP; diff --git a/net/core/dst.c b/net/core/dst.c index 80d6286c8b62..a028409ee438 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -269,6 +269,15 @@ again: } EXPORT_SYMBOL(dst_destroy); +static void dst_destroy_rcu(struct rcu_head *head) +{ + struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); + + dst = dst_destroy(dst); + if (dst) + __dst_free(dst); +} + void dst_release(struct dst_entry *dst) { if (dst) { @@ -276,11 +285,8 @@ void dst_release(struct dst_entry *dst) newrefcnt = atomic_dec_return(&dst->__refcnt); WARN_ON(newrefcnt < 0); - if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) { - dst = dst_destroy(dst); - if (dst) - __dst_free(dst); - } + if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) + call_rcu(&dst->rcu_head, dst_destroy_rcu); } } EXPORT_SYMBOL(dst_release); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 640ba0e5831c..17cb912793fa 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -557,6 +557,23 @@ err_out: return ret; } +static int ethtool_copy_validate_indir(u32 *indir, void __user *useraddr, + struct ethtool_rxnfc *rx_rings, + u32 size) +{ + int i; + + if (copy_from_user(indir, useraddr, size * sizeof(indir[0]))) + return -EFAULT; + + /* Validate ring indices */ + for (i = 0; i < size; i++) + if (indir[i] >= rx_rings->data) + return -EINVAL; + + return 0; +} + static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, void __user *useraddr) { @@ -565,7 +582,7 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, int ret; if (!dev->ethtool_ops->get_rxfh_indir_size || - !dev->ethtool_ops->get_rxfh_indir) + !dev->ethtool_ops->get_rxfh) return -EOPNOTSUPP; dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); if (dev_size == 0) @@ -591,7 +608,7 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, if (!indir) return -ENOMEM; - ret = dev->ethtool_ops->get_rxfh_indir(dev, indir); + ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL); if (ret) goto out; @@ -613,8 +630,9 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, u32 *indir; const struct ethtool_ops *ops = dev->ethtool_ops; int ret; + u32 ringidx_offset = offsetof(struct ethtool_rxfh_indir, ring_index[0]); - if (!ops->get_rxfh_indir_size || !ops->set_rxfh_indir || + if (!ops->get_rxfh_indir_size || !ops->set_rxfh || !ops->get_rxnfc) return -EOPNOTSUPP; @@ -643,28 +661,184 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, for (i = 0; i < dev_size; i++) indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data); } else { - if (copy_from_user(indir, - useraddr + - offsetof(struct ethtool_rxfh_indir, - ring_index[0]), - dev_size * sizeof(indir[0]))) { + ret = ethtool_copy_validate_indir(indir, + useraddr + ringidx_offset, + &rx_rings, + dev_size); + if (ret) + goto out; + } + + ret = ops->set_rxfh(dev, indir, NULL); + +out: + kfree(indir); + return ret; +} + +static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, + void __user *useraddr) +{ + int ret; + const struct ethtool_ops *ops = dev->ethtool_ops; + u32 user_indir_size, user_key_size; + u32 dev_indir_size = 0, dev_key_size = 0; + struct ethtool_rxfh rxfh; + u32 total_size; + u32 indir_bytes; + u32 *indir = NULL; + u8 *hkey = NULL; + u8 *rss_config; + + if (!(dev->ethtool_ops->get_rxfh_indir_size || + dev->ethtool_ops->get_rxfh_key_size) || + !dev->ethtool_ops->get_rxfh) + return -EOPNOTSUPP; + + if (ops->get_rxfh_indir_size) + dev_indir_size = ops->get_rxfh_indir_size(dev); + if (ops->get_rxfh_key_size) + dev_key_size = ops->get_rxfh_key_size(dev); + + if ((dev_key_size + dev_indir_size) == 0) + return -EOPNOTSUPP; + + if (copy_from_user(&rxfh, useraddr, sizeof(rxfh))) + return -EFAULT; + user_indir_size = rxfh.indir_size; + user_key_size = rxfh.key_size; + + /* Check that reserved fields are 0 for now */ + if (rxfh.rss_context || rxfh.rsvd[0] || rxfh.rsvd[1]) + return -EINVAL; + + rxfh.indir_size = dev_indir_size; + rxfh.key_size = dev_key_size; + if (copy_to_user(useraddr, &rxfh, sizeof(rxfh))) + return -EFAULT; + + /* If the user buffer size is 0, this is just a query for the + * device table size and key size. Otherwise, if the User size is + * not equal to device table size or key size it's an error. + */ + if (!user_indir_size && !user_key_size) + return 0; + + if ((user_indir_size && (user_indir_size != dev_indir_size)) || + (user_key_size && (user_key_size != dev_key_size))) + return -EINVAL; + + indir_bytes = user_indir_size * sizeof(indir[0]); + total_size = indir_bytes + user_key_size; + rss_config = kzalloc(total_size, GFP_USER); + if (!rss_config) + return -ENOMEM; + + if (user_indir_size) + indir = (u32 *)rss_config; + + if (user_key_size) + hkey = rss_config + indir_bytes; + + ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey); + if (!ret) { + if (copy_to_user(useraddr + + offsetof(struct ethtool_rxfh, rss_config[0]), + rss_config, total_size)) ret = -EFAULT; + } + + kfree(rss_config); + + return ret; +} + +static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, + void __user *useraddr) +{ + int ret; + const struct ethtool_ops *ops = dev->ethtool_ops; + struct ethtool_rxnfc rx_rings; + struct ethtool_rxfh rxfh; + u32 dev_indir_size = 0, dev_key_size = 0, i; + u32 *indir = NULL, indir_bytes = 0; + u8 *hkey = NULL; + u8 *rss_config; + u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]); + + if (!(ops->get_rxfh_indir_size || ops->get_rxfh_key_size) || + !ops->get_rxnfc || !ops->set_rxfh) + return -EOPNOTSUPP; + + if (ops->get_rxfh_indir_size) + dev_indir_size = ops->get_rxfh_indir_size(dev); + if (ops->get_rxfh_key_size) + dev_key_size = dev->ethtool_ops->get_rxfh_key_size(dev); + if ((dev_key_size + dev_indir_size) == 0) + return -EOPNOTSUPP; + + if (copy_from_user(&rxfh, useraddr, sizeof(rxfh))) + return -EFAULT; + + /* Check that reserved fields are 0 for now */ + if (rxfh.rss_context || rxfh.rsvd[0] || rxfh.rsvd[1]) + return -EINVAL; + + /* If either indir or hash key is valid, proceed further. + * It is not valid to request that both be unchanged. + */ + if ((rxfh.indir_size && + rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE && + rxfh.indir_size != dev_indir_size) || + (rxfh.key_size && (rxfh.key_size != dev_key_size)) || + (rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE && + rxfh.key_size == 0)) + return -EINVAL; + + if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) + indir_bytes = dev_indir_size * sizeof(indir[0]); + + rss_config = kzalloc(indir_bytes + rxfh.key_size, GFP_USER); + if (!rss_config) + return -ENOMEM; + + rx_rings.cmd = ETHTOOL_GRXRINGS; + ret = ops->get_rxnfc(dev, &rx_rings, NULL); + if (ret) + goto out; + + /* rxfh.indir_size == 0 means reset the indir table to default. + * rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE means leave it unchanged. + */ + if (rxfh.indir_size && + rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) { + indir = (u32 *)rss_config; + ret = ethtool_copy_validate_indir(indir, + useraddr + rss_cfg_offset, + &rx_rings, + rxfh.indir_size); + if (ret) goto out; - } + } else if (rxfh.indir_size == 0) { + indir = (u32 *)rss_config; + for (i = 0; i < dev_indir_size; i++) + indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data); + } - /* Validate ring indices */ - for (i = 0; i < dev_size; i++) { - if (indir[i] >= rx_rings.data) { - ret = -EINVAL; - goto out; - } + if (rxfh.key_size) { + hkey = rss_config + indir_bytes; + if (copy_from_user(hkey, + useraddr + rss_cfg_offset + indir_bytes, + rxfh.key_size)) { + ret = -EFAULT; + goto out; } } - ret = ops->set_rxfh_indir(dev, indir); + ret = ops->set_rxfh(dev, indir, hkey); out: - kfree(indir); + kfree(rss_config); return ret; } @@ -1491,6 +1665,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GRXCLSRULE: case ETHTOOL_GRXCLSRLALL: case ETHTOOL_GRXFHINDIR: + case ETHTOOL_GRSSH: case ETHTOOL_GFEATURES: case ETHTOOL_GCHANNELS: case ETHTOOL_GET_TS_INFO: @@ -1628,6 +1803,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SRXFHINDIR: rc = ethtool_set_rxfh_indir(dev, useraddr); break; + case ETHTOOL_GRSSH: + rc = ethtool_get_rxfh(dev, useraddr); + break; + case ETHTOOL_SRSSH: + rc = ethtool_set_rxfh(dev, useraddr); + break; case ETHTOOL_GFEATURES: rc = ethtool_get_features(dev, useraddr); break; diff --git a/net/core/filter.c b/net/core/filter.c index cd58614660cf..d814b8a89d0f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -18,7 +18,7 @@ * 2 of the License, or (at your option) any later version. * * Andi Kleen - Fix a few bad bugs and races. - * Kris Katterjohn - Added many additional checks in sk_chk_filter() + * Kris Katterjohn - Added many additional checks in bpf_check_classic() */ #include <linux/module.h> @@ -45,32 +45,6 @@ #include <linux/seccomp.h> #include <linux/if_vlan.h> -/* No hurry in this branch - * - * Exported for the bpf jit load helper. - */ -void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size) -{ - u8 *ptr = NULL; - - if (k >= SKF_NET_OFF) - ptr = skb_network_header(skb) + k - SKF_NET_OFF; - else if (k >= SKF_LL_OFF) - ptr = skb_mac_header(skb) + k - SKF_LL_OFF; - - if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb)) - return ptr; - return NULL; -} - -static inline void *load_pointer(const struct sk_buff *skb, int k, - unsigned int size, void *buffer) -{ - if (k >= 0) - return skb_header_pointer(skb, k, size, buffer); - return bpf_internal_load_pointer_neg_helper(skb, k, size); -} - /** * sk_filter - run a packet through a socket filter * @sk: sock associated with &sk_buff @@ -113,463 +87,15 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(sk_filter); -/* Base function for offset calculation. Needs to go into .text section, - * therefore keeping it non-static as well; will also be used by JITs - * anyway later on, so do not let the compiler omit it. - */ -noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) -{ - return 0; -} - -/** - * __sk_run_filter - run a filter on a given context - * @ctx: buffer to run the filter on - * @insn: filter to apply - * - * Decode and apply filter instructions to the skb->data. Return length to - * keep, 0 for none. @ctx is the data we are operating on, @insn is the - * array of filter instructions. - */ -unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn) -{ - u64 stack[MAX_BPF_STACK / sizeof(u64)]; - u64 regs[MAX_BPF_REG], tmp; - void *ptr; - int off; - -#define K insn->imm -#define A regs[insn->a_reg] -#define X regs[insn->x_reg] -#define R0 regs[0] - -#define CONT ({insn++; goto select_insn; }) -#define CONT_JMP ({insn++; goto select_insn; }) - - static const void *jumptable[256] = { - [0 ... 255] = &&default_label, - /* Now overwrite non-defaults ... */ -#define DL(A, B, C) [A|B|C] = &&A##_##B##_##C - DL(BPF_ALU, BPF_ADD, BPF_X), - DL(BPF_ALU, BPF_ADD, BPF_K), - DL(BPF_ALU, BPF_SUB, BPF_X), - DL(BPF_ALU, BPF_SUB, BPF_K), - DL(BPF_ALU, BPF_AND, BPF_X), - DL(BPF_ALU, BPF_AND, BPF_K), - DL(BPF_ALU, BPF_OR, BPF_X), - DL(BPF_ALU, BPF_OR, BPF_K), - DL(BPF_ALU, BPF_LSH, BPF_X), - DL(BPF_ALU, BPF_LSH, BPF_K), - DL(BPF_ALU, BPF_RSH, BPF_X), - DL(BPF_ALU, BPF_RSH, BPF_K), - DL(BPF_ALU, BPF_XOR, BPF_X), - DL(BPF_ALU, BPF_XOR, BPF_K), - DL(BPF_ALU, BPF_MUL, BPF_X), - DL(BPF_ALU, BPF_MUL, BPF_K), - DL(BPF_ALU, BPF_MOV, BPF_X), - DL(BPF_ALU, BPF_MOV, BPF_K), - DL(BPF_ALU, BPF_DIV, BPF_X), - DL(BPF_ALU, BPF_DIV, BPF_K), - DL(BPF_ALU, BPF_MOD, BPF_X), - DL(BPF_ALU, BPF_MOD, BPF_K), - DL(BPF_ALU, BPF_NEG, 0), - DL(BPF_ALU, BPF_END, BPF_TO_BE), - DL(BPF_ALU, BPF_END, BPF_TO_LE), - DL(BPF_ALU64, BPF_ADD, BPF_X), - DL(BPF_ALU64, BPF_ADD, BPF_K), - DL(BPF_ALU64, BPF_SUB, BPF_X), - DL(BPF_ALU64, BPF_SUB, BPF_K), - DL(BPF_ALU64, BPF_AND, BPF_X), - DL(BPF_ALU64, BPF_AND, BPF_K), - DL(BPF_ALU64, BPF_OR, BPF_X), - DL(BPF_ALU64, BPF_OR, BPF_K), - DL(BPF_ALU64, BPF_LSH, BPF_X), - DL(BPF_ALU64, BPF_LSH, BPF_K), - DL(BPF_ALU64, BPF_RSH, BPF_X), - DL(BPF_ALU64, BPF_RSH, BPF_K), - DL(BPF_ALU64, BPF_XOR, BPF_X), - DL(BPF_ALU64, BPF_XOR, BPF_K), - DL(BPF_ALU64, BPF_MUL, BPF_X), - DL(BPF_ALU64, BPF_MUL, BPF_K), - DL(BPF_ALU64, BPF_MOV, BPF_X), - DL(BPF_ALU64, BPF_MOV, BPF_K), - DL(BPF_ALU64, BPF_ARSH, BPF_X), - DL(BPF_ALU64, BPF_ARSH, BPF_K), - DL(BPF_ALU64, BPF_DIV, BPF_X), - DL(BPF_ALU64, BPF_DIV, BPF_K), - DL(BPF_ALU64, BPF_MOD, BPF_X), - DL(BPF_ALU64, BPF_MOD, BPF_K), - DL(BPF_ALU64, BPF_NEG, 0), - DL(BPF_JMP, BPF_CALL, 0), - DL(BPF_JMP, BPF_JA, 0), - DL(BPF_JMP, BPF_JEQ, BPF_X), - DL(BPF_JMP, BPF_JEQ, BPF_K), - DL(BPF_JMP, BPF_JNE, BPF_X), - DL(BPF_JMP, BPF_JNE, BPF_K), - DL(BPF_JMP, BPF_JGT, BPF_X), - DL(BPF_JMP, BPF_JGT, BPF_K), - DL(BPF_JMP, BPF_JGE, BPF_X), - DL(BPF_JMP, BPF_JGE, BPF_K), - DL(BPF_JMP, BPF_JSGT, BPF_X), - DL(BPF_JMP, BPF_JSGT, BPF_K), - DL(BPF_JMP, BPF_JSGE, BPF_X), - DL(BPF_JMP, BPF_JSGE, BPF_K), - DL(BPF_JMP, BPF_JSET, BPF_X), - DL(BPF_JMP, BPF_JSET, BPF_K), - DL(BPF_JMP, BPF_EXIT, 0), - DL(BPF_STX, BPF_MEM, BPF_B), - DL(BPF_STX, BPF_MEM, BPF_H), - DL(BPF_STX, BPF_MEM, BPF_W), - DL(BPF_STX, BPF_MEM, BPF_DW), - DL(BPF_STX, BPF_XADD, BPF_W), - DL(BPF_STX, BPF_XADD, BPF_DW), - DL(BPF_ST, BPF_MEM, BPF_B), - DL(BPF_ST, BPF_MEM, BPF_H), - DL(BPF_ST, BPF_MEM, BPF_W), - DL(BPF_ST, BPF_MEM, BPF_DW), - DL(BPF_LDX, BPF_MEM, BPF_B), - DL(BPF_LDX, BPF_MEM, BPF_H), - DL(BPF_LDX, BPF_MEM, BPF_W), - DL(BPF_LDX, BPF_MEM, BPF_DW), - DL(BPF_LD, BPF_ABS, BPF_W), - DL(BPF_LD, BPF_ABS, BPF_H), - DL(BPF_LD, BPF_ABS, BPF_B), - DL(BPF_LD, BPF_IND, BPF_W), - DL(BPF_LD, BPF_IND, BPF_H), - DL(BPF_LD, BPF_IND, BPF_B), -#undef DL - }; - - regs[FP_REG] = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; - regs[ARG1_REG] = (u64) (unsigned long) ctx; - -select_insn: - goto *jumptable[insn->code]; - - /* ALU */ -#define ALU(OPCODE, OP) \ - BPF_ALU64_##OPCODE##_BPF_X: \ - A = A OP X; \ - CONT; \ - BPF_ALU_##OPCODE##_BPF_X: \ - A = (u32) A OP (u32) X; \ - CONT; \ - BPF_ALU64_##OPCODE##_BPF_K: \ - A = A OP K; \ - CONT; \ - BPF_ALU_##OPCODE##_BPF_K: \ - A = (u32) A OP (u32) K; \ - CONT; - - ALU(BPF_ADD, +) - ALU(BPF_SUB, -) - ALU(BPF_AND, &) - ALU(BPF_OR, |) - ALU(BPF_LSH, <<) - ALU(BPF_RSH, >>) - ALU(BPF_XOR, ^) - ALU(BPF_MUL, *) -#undef ALU - BPF_ALU_BPF_NEG_0: - A = (u32) -A; - CONT; - BPF_ALU64_BPF_NEG_0: - A = -A; - CONT; - BPF_ALU_BPF_MOV_BPF_X: - A = (u32) X; - CONT; - BPF_ALU_BPF_MOV_BPF_K: - A = (u32) K; - CONT; - BPF_ALU64_BPF_MOV_BPF_X: - A = X; - CONT; - BPF_ALU64_BPF_MOV_BPF_K: - A = K; - CONT; - BPF_ALU64_BPF_ARSH_BPF_X: - (*(s64 *) &A) >>= X; - CONT; - BPF_ALU64_BPF_ARSH_BPF_K: - (*(s64 *) &A) >>= K; - CONT; - BPF_ALU64_BPF_MOD_BPF_X: - if (unlikely(X == 0)) - return 0; - tmp = A; - A = do_div(tmp, X); - CONT; - BPF_ALU_BPF_MOD_BPF_X: - if (unlikely(X == 0)) - return 0; - tmp = (u32) A; - A = do_div(tmp, (u32) X); - CONT; - BPF_ALU64_BPF_MOD_BPF_K: - tmp = A; - A = do_div(tmp, K); - CONT; - BPF_ALU_BPF_MOD_BPF_K: - tmp = (u32) A; - A = do_div(tmp, (u32) K); - CONT; - BPF_ALU64_BPF_DIV_BPF_X: - if (unlikely(X == 0)) - return 0; - do_div(A, X); - CONT; - BPF_ALU_BPF_DIV_BPF_X: - if (unlikely(X == 0)) - return 0; - tmp = (u32) A; - do_div(tmp, (u32) X); - A = (u32) tmp; - CONT; - BPF_ALU64_BPF_DIV_BPF_K: - do_div(A, K); - CONT; - BPF_ALU_BPF_DIV_BPF_K: - tmp = (u32) A; - do_div(tmp, (u32) K); - A = (u32) tmp; - CONT; - BPF_ALU_BPF_END_BPF_TO_BE: - switch (K) { - case 16: - A = (__force u16) cpu_to_be16(A); - break; - case 32: - A = (__force u32) cpu_to_be32(A); - break; - case 64: - A = (__force u64) cpu_to_be64(A); - break; - } - CONT; - BPF_ALU_BPF_END_BPF_TO_LE: - switch (K) { - case 16: - A = (__force u16) cpu_to_le16(A); - break; - case 32: - A = (__force u32) cpu_to_le32(A); - break; - case 64: - A = (__force u64) cpu_to_le64(A); - break; - } - CONT; - - /* CALL */ - BPF_JMP_BPF_CALL_0: - /* Function call scratches R1-R5 registers, preserves R6-R9, - * and stores return value into R0. - */ - R0 = (__bpf_call_base + insn->imm)(regs[1], regs[2], regs[3], - regs[4], regs[5]); - CONT; - - /* JMP */ - BPF_JMP_BPF_JA_0: - insn += insn->off; - CONT; - BPF_JMP_BPF_JEQ_BPF_X: - if (A == X) { - insn += insn->off; - CONT_JMP; - } - CONT; - BPF_JMP_BPF_JEQ_BPF_K: - if (A == K) { - insn += insn->off; - CONT_JMP; - } - CONT; - BPF_JMP_BPF_JNE_BPF_X: - if (A != X) { - insn += insn->off; - CONT_JMP; - } - CONT; - BPF_JMP_BPF_JNE_BPF_K: - if (A != K) { - insn += insn->off; - CONT_JMP; - } - CONT; - BPF_JMP_BPF_JGT_BPF_X: - if (A > X) { - insn += insn->off; - CONT_JMP; - } - CONT; - BPF_JMP_BPF_JGT_BPF_K: - if (A > K) { - insn += insn->off; - CONT_JMP; - } - CONT; - BPF_JMP_BPF_JGE_BPF_X: - if (A >= X) { - insn += insn->off; - CONT_JMP; - } - CONT; - BPF_JMP_BPF_JGE_BPF_K: - if (A >= K) { - insn += insn->off; - CONT_JMP; - } - CONT; - BPF_JMP_BPF_JSGT_BPF_X: - if (((s64)A) > ((s64)X)) { - insn += insn->off; - CONT_JMP; - } - CONT; - BPF_JMP_BPF_JSGT_BPF_K: - if (((s64)A) > ((s64)K)) { - insn += insn->off; - CONT_JMP; - } - CONT; - BPF_JMP_BPF_JSGE_BPF_X: - if (((s64)A) >= ((s64)X)) { - insn += insn->off; - CONT_JMP; - } - CONT; - BPF_JMP_BPF_JSGE_BPF_K: - if (((s64)A) >= ((s64)K)) { - insn += insn->off; - CONT_JMP; - } - CONT; - BPF_JMP_BPF_JSET_BPF_X: - if (A & X) { - insn += insn->off; - CONT_JMP; - } - CONT; - BPF_JMP_BPF_JSET_BPF_K: - if (A & K) { - insn += insn->off; - CONT_JMP; - } - CONT; - BPF_JMP_BPF_EXIT_0: - return R0; - - /* STX and ST and LDX*/ -#define LDST(SIZEOP, SIZE) \ - BPF_STX_BPF_MEM_##SIZEOP: \ - *(SIZE *)(unsigned long) (A + insn->off) = X; \ - CONT; \ - BPF_ST_BPF_MEM_##SIZEOP: \ - *(SIZE *)(unsigned long) (A + insn->off) = K; \ - CONT; \ - BPF_LDX_BPF_MEM_##SIZEOP: \ - A = *(SIZE *)(unsigned long) (X + insn->off); \ - CONT; - - LDST(BPF_B, u8) - LDST(BPF_H, u16) - LDST(BPF_W, u32) - LDST(BPF_DW, u64) -#undef LDST - BPF_STX_BPF_XADD_BPF_W: /* lock xadd *(u32 *)(A + insn->off) += X */ - atomic_add((u32) X, (atomic_t *)(unsigned long) - (A + insn->off)); - CONT; - BPF_STX_BPF_XADD_BPF_DW: /* lock xadd *(u64 *)(A + insn->off) += X */ - atomic64_add((u64) X, (atomic64_t *)(unsigned long) - (A + insn->off)); - CONT; - BPF_LD_BPF_ABS_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + K)) */ - off = K; -load_word: - /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only - * appearing in the programs where ctx == skb. All programs - * keep 'ctx' in regs[CTX_REG] == R6, sk_convert_filter() - * saves it in R6, internal BPF verifier will check that - * R6 == ctx. - * - * BPF_ABS and BPF_IND are wrappers of function calls, so - * they scratch R1-R5 registers, preserve R6-R9, and store - * return value into R0. - * - * Implicit input: - * ctx - * - * Explicit input: - * X == any register - * K == 32-bit immediate - * - * Output: - * R0 - 8/16/32-bit skb data converted to cpu endianness - */ - ptr = load_pointer((struct sk_buff *) ctx, off, 4, &tmp); - if (likely(ptr != NULL)) { - R0 = get_unaligned_be32(ptr); - CONT; - } - return 0; - BPF_LD_BPF_ABS_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + K)) */ - off = K; -load_half: - ptr = load_pointer((struct sk_buff *) ctx, off, 2, &tmp); - if (likely(ptr != NULL)) { - R0 = get_unaligned_be16(ptr); - CONT; - } - return 0; - BPF_LD_BPF_ABS_BPF_B: /* R0 = *(u8 *) (ctx + K) */ - off = K; -load_byte: - ptr = load_pointer((struct sk_buff *) ctx, off, 1, &tmp); - if (likely(ptr != NULL)) { - R0 = *(u8 *)ptr; - CONT; - } - return 0; - BPF_LD_BPF_IND_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + X + K)) */ - off = K + X; - goto load_word; - BPF_LD_BPF_IND_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + X + K)) */ - off = K + X; - goto load_half; - BPF_LD_BPF_IND_BPF_B: /* R0 = *(u8 *) (skb->data + X + K) */ - off = K + X; - goto load_byte; - - default_label: - /* If we ever reach this, we have a bug somewhere. */ - WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code); - return 0; -#undef CONT_JMP -#undef CONT - -#undef R0 -#undef X -#undef A -#undef K -} - -u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx, - const struct sock_filter_int *insni) - __attribute__ ((alias ("__sk_run_filter"))); - -u32 sk_run_filter_int_skb(const struct sk_buff *ctx, - const struct sock_filter_int *insni) - __attribute__ ((alias ("__sk_run_filter"))); -EXPORT_SYMBOL_GPL(sk_run_filter_int_skb); - /* Helper to find the offset of pkt_type in sk_buff structure. We want * to make sure its still a 3bit field starting at a byte boundary; * taken from arch/x86/net/bpf_jit_comp.c. */ +#ifdef __BIG_ENDIAN_BITFIELD +#define PKT_TYPE_MAX (7 << 5) +#else #define PKT_TYPE_MAX 7 +#endif static unsigned int pkt_type_offset(void) { struct sk_buff skb_probe = { .pkt_type = ~0, }; @@ -585,16 +111,14 @@ static unsigned int pkt_type_offset(void) return -1; } -static u64 __skb_get_pay_offset(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) +static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) { - struct sk_buff *skb = (struct sk_buff *)(long) ctx; - - return __skb_get_poff(skb); + return __skb_get_poff((struct sk_buff *)(unsigned long) ctx); } -static u64 __skb_get_nlattr(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) +static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) { - struct sk_buff *skb = (struct sk_buff *)(long) ctx; + struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx; struct nlattr *nla; if (skb_is_nonlinear(skb)) @@ -603,19 +127,19 @@ static u64 __skb_get_nlattr(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) if (skb->len < sizeof(struct nlattr)) return 0; - if (A > skb->len - sizeof(struct nlattr)) + if (a > skb->len - sizeof(struct nlattr)) return 0; - nla = nla_find((struct nlattr *) &skb->data[A], skb->len - A, X); + nla = nla_find((struct nlattr *) &skb->data[a], skb->len - a, x); if (nla) return (void *) nla - (void *) skb->data; return 0; } -static u64 __skb_get_nlattr_nest(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) +static u64 __skb_get_nlattr_nest(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) { - struct sk_buff *skb = (struct sk_buff *)(long) ctx; + struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx; struct nlattr *nla; if (skb_is_nonlinear(skb)) @@ -624,154 +148,117 @@ static u64 __skb_get_nlattr_nest(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) if (skb->len < sizeof(struct nlattr)) return 0; - if (A > skb->len - sizeof(struct nlattr)) + if (a > skb->len - sizeof(struct nlattr)) return 0; - nla = (struct nlattr *) &skb->data[A]; - if (nla->nla_len > skb->len - A) + nla = (struct nlattr *) &skb->data[a]; + if (nla->nla_len > skb->len - a) return 0; - nla = nla_find_nested(nla, X); + nla = nla_find_nested(nla, x); if (nla) return (void *) nla - (void *) skb->data; return 0; } -static u64 __get_raw_cpu_id(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) +static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) { return raw_smp_processor_id(); } -/* Register mappings for user programs. */ -#define A_REG 0 -#define X_REG 7 -#define TMP_REG 8 -#define ARG2_REG 2 -#define ARG3_REG 3 +/* note that this only generates 32-bit random numbers */ +static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) +{ + return prandom_u32(); +} static bool convert_bpf_extensions(struct sock_filter *fp, - struct sock_filter_int **insnp) + struct bpf_insn **insnp) { - struct sock_filter_int *insn = *insnp; + struct bpf_insn *insn = *insnp; switch (fp->k) { case SKF_AD_OFF + SKF_AD_PROTOCOL: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); - insn->code = BPF_LDX | BPF_MEM | BPF_H; - insn->a_reg = A_REG; - insn->x_reg = CTX_REG; - insn->off = offsetof(struct sk_buff, protocol); - insn++; - + /* A = *(u16 *) (CTX + offsetof(protocol)) */ + *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, + offsetof(struct sk_buff, protocol)); /* A = ntohs(A) [emitting a nop or swap16] */ - insn->code = BPF_ALU | BPF_END | BPF_FROM_BE; - insn->a_reg = A_REG; - insn->imm = 16; + *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16); break; case SKF_AD_OFF + SKF_AD_PKTTYPE: - insn->code = BPF_LDX | BPF_MEM | BPF_B; - insn->a_reg = A_REG; - insn->x_reg = CTX_REG; - insn->off = pkt_type_offset(); + *insn = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX, + pkt_type_offset()); if (insn->off < 0) return false; insn++; - - insn->code = BPF_ALU | BPF_AND | BPF_K; - insn->a_reg = A_REG; - insn->imm = PKT_TYPE_MAX; + *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX); +#ifdef __BIG_ENDIAN_BITFIELD + insn++; + *insn = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 5); +#endif break; case SKF_AD_OFF + SKF_AD_IFINDEX: case SKF_AD_OFF + SKF_AD_HATYPE: - if (FIELD_SIZEOF(struct sk_buff, dev) == 8) - insn->code = BPF_LDX | BPF_MEM | BPF_DW; - else - insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = TMP_REG; - insn->x_reg = CTX_REG; - insn->off = offsetof(struct sk_buff, dev); - insn++; - - insn->code = BPF_JMP | BPF_JNE | BPF_K; - insn->a_reg = TMP_REG; - insn->imm = 0; - insn->off = 1; - insn++; - - insn->code = BPF_JMP | BPF_EXIT; - insn++; - BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2); - - insn->a_reg = A_REG; - insn->x_reg = TMP_REG; - - if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX) { - insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->off = offsetof(struct net_device, ifindex); - } else { - insn->code = BPF_LDX | BPF_MEM | BPF_H; - insn->off = offsetof(struct net_device, type); - } + BUILD_BUG_ON(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)) < 0); + + *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)), + BPF_REG_TMP, BPF_REG_CTX, + offsetof(struct sk_buff, dev)); + /* if (tmp != 0) goto pc + 1 */ + *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1); + *insn++ = BPF_EXIT_INSN(); + if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX) + *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP, + offsetof(struct net_device, ifindex)); + else + *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP, + offsetof(struct net_device, type)); break; case SKF_AD_OFF + SKF_AD_MARK: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); - insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = A_REG; - insn->x_reg = CTX_REG; - insn->off = offsetof(struct sk_buff, mark); + *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, + offsetof(struct sk_buff, mark)); break; case SKF_AD_OFF + SKF_AD_RXHASH: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); - insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = A_REG; - insn->x_reg = CTX_REG; - insn->off = offsetof(struct sk_buff, hash); + *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, + offsetof(struct sk_buff, hash)); break; case SKF_AD_OFF + SKF_AD_QUEUE: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); - insn->code = BPF_LDX | BPF_MEM | BPF_H; - insn->a_reg = A_REG; - insn->x_reg = CTX_REG; - insn->off = offsetof(struct sk_buff, queue_mapping); + *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, + offsetof(struct sk_buff, queue_mapping)); break; case SKF_AD_OFF + SKF_AD_VLAN_TAG: case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); - - insn->code = BPF_LDX | BPF_MEM | BPF_H; - insn->a_reg = A_REG; - insn->x_reg = CTX_REG; - insn->off = offsetof(struct sk_buff, vlan_tci); - insn++; - BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); + /* A = *(u16 *) (CTX + offsetof(vlan_tci)) */ + *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, + offsetof(struct sk_buff, vlan_tci)); if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) { - insn->code = BPF_ALU | BPF_AND | BPF_K; - insn->a_reg = A_REG; - insn->imm = ~VLAN_TAG_PRESENT; + *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, + ~VLAN_TAG_PRESENT); } else { - insn->code = BPF_ALU | BPF_RSH | BPF_K; - insn->a_reg = A_REG; - insn->imm = 12; - insn++; - - insn->code = BPF_ALU | BPF_AND | BPF_K; - insn->a_reg = A_REG; - insn->imm = 1; + /* A >>= 12 */ + *insn++ = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12); + /* A &= 1 */ + *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 1); } break; @@ -779,46 +266,36 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_NLATTR: case SKF_AD_OFF + SKF_AD_NLATTR_NEST: case SKF_AD_OFF + SKF_AD_CPU: - /* arg1 = ctx */ - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = ARG1_REG; - insn->x_reg = CTX_REG; - insn++; - + case SKF_AD_OFF + SKF_AD_RANDOM: + /* arg1 = CTX */ + *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX); /* arg2 = A */ - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = ARG2_REG; - insn->x_reg = A_REG; - insn++; - + *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A); /* arg3 = X */ - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = ARG3_REG; - insn->x_reg = X_REG; - insn++; - - /* Emit call(ctx, arg2=A, arg3=X) */ - insn->code = BPF_JMP | BPF_CALL; + *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X); + /* Emit call(arg1=CTX, arg2=A, arg3=X) */ switch (fp->k) { case SKF_AD_OFF + SKF_AD_PAY_OFFSET: - insn->imm = __skb_get_pay_offset - __bpf_call_base; + *insn = BPF_EMIT_CALL(__skb_get_pay_offset); break; case SKF_AD_OFF + SKF_AD_NLATTR: - insn->imm = __skb_get_nlattr - __bpf_call_base; + *insn = BPF_EMIT_CALL(__skb_get_nlattr); break; case SKF_AD_OFF + SKF_AD_NLATTR_NEST: - insn->imm = __skb_get_nlattr_nest - __bpf_call_base; + *insn = BPF_EMIT_CALL(__skb_get_nlattr_nest); break; case SKF_AD_OFF + SKF_AD_CPU: - insn->imm = __get_raw_cpu_id - __bpf_call_base; + *insn = BPF_EMIT_CALL(__get_raw_cpu_id); + break; + case SKF_AD_OFF + SKF_AD_RANDOM: + *insn = BPF_EMIT_CALL(__get_random_u32); break; } break; case SKF_AD_OFF + SKF_AD_ALU_XOR_X: - insn->code = BPF_ALU | BPF_XOR | BPF_X; - insn->a_reg = A_REG; - insn->x_reg = X_REG; + /* A ^= X */ + *insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X); break; default: @@ -835,7 +312,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp, } /** - * sk_convert_filter - convert filter program + * bpf_convert_filter - convert filter program * @prog: the user passed filter program * @len: the length of the user passed filter program * @new_prog: buffer where converted program will be stored @@ -845,12 +322,12 @@ static bool convert_bpf_extensions(struct sock_filter *fp, * Conversion workflow: * * 1) First pass for calculating the new program length: - * sk_convert_filter(old_prog, old_len, NULL, &new_len) + * bpf_convert_filter(old_prog, old_len, NULL, &new_len) * * 2) 2nd pass to remap in two passes: 1st pass finds new * jump offsets, 2nd pass remapping: - * new_prog = kmalloc(sizeof(struct sock_filter_int) * new_len); - * sk_convert_filter(old_prog, old_len, new_prog, &new_len); + * new_prog = kmalloc(sizeof(struct bpf_insn) * new_len); + * bpf_convert_filter(old_prog, old_len, new_prog, &new_len); * * User BPF's register A is mapped to our BPF register 6, user BPF * register X is mapped to BPF register 7; frame pointer is always @@ -858,23 +335,23 @@ static bool convert_bpf_extensions(struct sock_filter *fp, * for socket filters: ctx == 'struct sk_buff *', for seccomp: * ctx == 'struct seccomp_data *'. */ -int sk_convert_filter(struct sock_filter *prog, int len, - struct sock_filter_int *new_prog, int *new_len) +int bpf_convert_filter(struct sock_filter *prog, int len, + struct bpf_insn *new_prog, int *new_len) { int new_flen = 0, pass = 0, target, i; - struct sock_filter_int *new_insn; + struct bpf_insn *new_insn; struct sock_filter *fp; int *addrs = NULL; u8 bpf_src; BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK); - BUILD_BUG_ON(FP_REG + 1 != MAX_BPF_REG); + BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG); - if (len <= 0 || len >= BPF_MAXINSNS) + if (len <= 0 || len > BPF_MAXINSNS) return -EINVAL; if (new_prog) { - addrs = kzalloc(len * sizeof(*addrs), GFP_KERNEL); + addrs = kcalloc(len, sizeof(*addrs), GFP_KERNEL); if (!addrs) return -ENOMEM; } @@ -883,16 +360,13 @@ do_pass: new_insn = new_prog; fp = prog; - if (new_insn) { - new_insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - new_insn->a_reg = CTX_REG; - new_insn->x_reg = ARG1_REG; - } + if (new_insn) + *new_insn = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1); new_insn++; for (i = 0; i < len; fp++, i++) { - struct sock_filter_int tmp_insns[6] = { }; - struct sock_filter_int *insn = tmp_insns; + struct bpf_insn tmp_insns[6] = { }; + struct bpf_insn *insn = tmp_insns; if (addrs) addrs[i] = new_insn - new_prog; @@ -935,17 +409,16 @@ do_pass: convert_bpf_extensions(fp, &insn)) break; - insn->code = fp->code; - insn->a_reg = A_REG; - insn->x_reg = X_REG; - insn->imm = fp->k; + *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k); break; - /* Jump opcodes map as-is, but offsets need adjustment. */ - case BPF_JMP | BPF_JA: - target = i + fp->k + 1; - insn->code = fp->code; -#define EMIT_JMP \ + /* Jump transformation cannot use BPF block macros + * everywhere as offset calculation and target updates + * require a bit more work than the rest, i.e. jump + * opcodes map as-is, but offsets need adjustment. + */ + +#define BPF_EMIT_JMP \ do { \ if (target >= len || target < 0) \ goto err; \ @@ -954,7 +427,10 @@ do_pass: insn->off -= insn - tmp_insns; \ } while (0) - EMIT_JMP; + case BPF_JMP | BPF_JA: + target = i + fp->k + 1; + insn->code = fp->code; + BPF_EMIT_JMP; break; case BPF_JMP | BPF_JEQ | BPF_K: @@ -970,17 +446,14 @@ do_pass: * immediate into tmp register and use it * in compare insn. */ - insn->code = BPF_ALU | BPF_MOV | BPF_K; - insn->a_reg = TMP_REG; - insn->imm = fp->k; - insn++; + *insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k); - insn->a_reg = A_REG; - insn->x_reg = TMP_REG; + insn->dst_reg = BPF_REG_A; + insn->src_reg = BPF_REG_TMP; bpf_src = BPF_X; } else { - insn->a_reg = A_REG; - insn->x_reg = X_REG; + insn->dst_reg = BPF_REG_A; + insn->src_reg = BPF_REG_X; insn->imm = fp->k; bpf_src = BPF_SRC(fp->code); } @@ -989,7 +462,7 @@ do_pass: if (fp->jf == 0) { insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; target = i + fp->jt + 1; - EMIT_JMP; + BPF_EMIT_JMP; break; } @@ -997,127 +470,94 @@ do_pass: if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) { insn->code = BPF_JMP | BPF_JNE | bpf_src; target = i + fp->jf + 1; - EMIT_JMP; + BPF_EMIT_JMP; break; } /* Other jumps are mapped into two insns: Jxx and JA. */ target = i + fp->jt + 1; insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; - EMIT_JMP; + BPF_EMIT_JMP; insn++; insn->code = BPF_JMP | BPF_JA; target = i + fp->jf + 1; - EMIT_JMP; + BPF_EMIT_JMP; break; /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */ case BPF_LDX | BPF_MSH | BPF_B: - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = TMP_REG; - insn->x_reg = A_REG; - insn++; - - insn->code = BPF_LD | BPF_ABS | BPF_B; - insn->a_reg = A_REG; - insn->imm = fp->k; - insn++; - - insn->code = BPF_ALU | BPF_AND | BPF_K; - insn->a_reg = A_REG; - insn->imm = 0xf; - insn++; - - insn->code = BPF_ALU | BPF_LSH | BPF_K; - insn->a_reg = A_REG; - insn->imm = 2; - insn++; - - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = X_REG; - insn->x_reg = A_REG; - insn++; - - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = A_REG; - insn->x_reg = TMP_REG; + /* tmp = A */ + *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_A); + /* A = BPF_R0 = *(u8 *) (skb->data + K) */ + *insn++ = BPF_LD_ABS(BPF_B, fp->k); + /* A &= 0xf */ + *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf); + /* A <<= 2 */ + *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2); + /* X = A */ + *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); + /* A = tmp */ + *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP); break; /* RET_K, RET_A are remaped into 2 insns. */ case BPF_RET | BPF_A: case BPF_RET | BPF_K: - insn->code = BPF_ALU | BPF_MOV | - (BPF_RVAL(fp->code) == BPF_K ? - BPF_K : BPF_X); - insn->a_reg = 0; - insn->x_reg = A_REG; - insn->imm = fp->k; - insn++; - - insn->code = BPF_JMP | BPF_EXIT; + *insn++ = BPF_MOV32_RAW(BPF_RVAL(fp->code) == BPF_K ? + BPF_K : BPF_X, BPF_REG_0, + BPF_REG_A, fp->k); + *insn = BPF_EXIT_INSN(); break; /* Store to stack. */ case BPF_ST: case BPF_STX: - insn->code = BPF_STX | BPF_MEM | BPF_W; - insn->a_reg = FP_REG; - insn->x_reg = fp->code == BPF_ST ? A_REG : X_REG; - insn->off = -(BPF_MEMWORDS - fp->k) * 4; + *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) == + BPF_ST ? BPF_REG_A : BPF_REG_X, + -(BPF_MEMWORDS - fp->k) * 4); break; /* Load from stack. */ case BPF_LD | BPF_MEM: case BPF_LDX | BPF_MEM: - insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ? - A_REG : X_REG; - insn->x_reg = FP_REG; - insn->off = -(BPF_MEMWORDS - fp->k) * 4; + *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ? + BPF_REG_A : BPF_REG_X, BPF_REG_FP, + -(BPF_MEMWORDS - fp->k) * 4); break; /* A = K or X = K */ case BPF_LD | BPF_IMM: case BPF_LDX | BPF_IMM: - insn->code = BPF_ALU | BPF_MOV | BPF_K; - insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ? - A_REG : X_REG; - insn->imm = fp->k; + *insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ? + BPF_REG_A : BPF_REG_X, fp->k); break; /* X = A */ case BPF_MISC | BPF_TAX: - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = X_REG; - insn->x_reg = A_REG; + *insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); break; /* A = X */ case BPF_MISC | BPF_TXA: - insn->code = BPF_ALU64 | BPF_MOV | BPF_X; - insn->a_reg = A_REG; - insn->x_reg = X_REG; + *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X); break; /* A = skb->len or X = skb->len */ case BPF_LD | BPF_W | BPF_LEN: case BPF_LDX | BPF_W | BPF_LEN: - insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ? - A_REG : X_REG; - insn->x_reg = CTX_REG; - insn->off = offsetof(struct sk_buff, len); + *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ? + BPF_REG_A : BPF_REG_X, BPF_REG_CTX, + offsetof(struct sk_buff, len)); break; - /* access seccomp_data fields */ + /* Access seccomp_data fields. */ case BPF_LDX | BPF_ABS | BPF_W: - insn->code = BPF_LDX | BPF_MEM | BPF_W; - insn->a_reg = A_REG; - insn->x_reg = CTX_REG; - insn->off = fp->k; + /* A = *(u32 *) (ctx + K) */ + *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k); break; + /* Unkown instruction. */ default: goto err; } @@ -1126,7 +566,6 @@ do_pass: if (new_prog) memcpy(new_insn, tmp_insns, sizeof(*insn) * (insn - tmp_insns)); - new_insn += insn - tmp_insns; } @@ -1141,7 +580,6 @@ do_pass: new_flen = new_insn - new_prog; if (pass > 2) goto err; - goto do_pass; } @@ -1163,46 +601,48 @@ err: * a cell if not previously written, and we check all branches to be sure * a malicious user doesn't try to abuse us. */ -static int check_load_and_stores(struct sock_filter *filter, int flen) +static int check_load_and_stores(const struct sock_filter *filter, int flen) { - u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */ + u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */ int pc, ret = 0; BUILD_BUG_ON(BPF_MEMWORDS > 16); - masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL); + + masks = kmalloc_array(flen, sizeof(*masks), GFP_KERNEL); if (!masks) return -ENOMEM; + memset(masks, 0xff, flen * sizeof(*masks)); for (pc = 0; pc < flen; pc++) { memvalid &= masks[pc]; switch (filter[pc].code) { - case BPF_S_ST: - case BPF_S_STX: + case BPF_ST: + case BPF_STX: memvalid |= (1 << filter[pc].k); break; - case BPF_S_LD_MEM: - case BPF_S_LDX_MEM: + case BPF_LD | BPF_MEM: + case BPF_LDX | BPF_MEM: if (!(memvalid & (1 << filter[pc].k))) { ret = -EINVAL; goto error; } break; - case BPF_S_JMP_JA: - /* a jump must set masks on target */ + case BPF_JMP | BPF_JA: + /* A jump must set masks on target */ masks[pc + 1 + filter[pc].k] &= memvalid; memvalid = ~0; break; - case BPF_S_JMP_JEQ_K: - case BPF_S_JMP_JEQ_X: - case BPF_S_JMP_JGE_K: - case BPF_S_JMP_JGE_X: - case BPF_S_JMP_JGT_K: - case BPF_S_JMP_JGT_X: - case BPF_S_JMP_JSET_X: - case BPF_S_JMP_JSET_K: - /* a jump must set masks on targets */ + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP | BPF_JSET | BPF_X: + /* A jump must set masks on targets */ masks[pc + 1 + filter[pc].jt] &= memvalid; masks[pc + 1 + filter[pc].jf] &= memvalid; memvalid = ~0; @@ -1214,8 +654,74 @@ error: return ret; } +static bool chk_code_allowed(u16 code_to_probe) +{ + static const bool codes[] = { + /* 32 bit ALU operations */ + [BPF_ALU | BPF_ADD | BPF_K] = true, + [BPF_ALU | BPF_ADD | BPF_X] = true, + [BPF_ALU | BPF_SUB | BPF_K] = true, + [BPF_ALU | BPF_SUB | BPF_X] = true, + [BPF_ALU | BPF_MUL | BPF_K] = true, + [BPF_ALU | BPF_MUL | BPF_X] = true, + [BPF_ALU | BPF_DIV | BPF_K] = true, + [BPF_ALU | BPF_DIV | BPF_X] = true, + [BPF_ALU | BPF_MOD | BPF_K] = true, + [BPF_ALU | BPF_MOD | BPF_X] = true, + [BPF_ALU | BPF_AND | BPF_K] = true, + [BPF_ALU | BPF_AND | BPF_X] = true, + [BPF_ALU | BPF_OR | BPF_K] = true, + [BPF_ALU | BPF_OR | BPF_X] = true, + [BPF_ALU | BPF_XOR | BPF_K] = true, + [BPF_ALU | BPF_XOR | BPF_X] = true, + [BPF_ALU | BPF_LSH | BPF_K] = true, + [BPF_ALU | BPF_LSH | BPF_X] = true, + [BPF_ALU | BPF_RSH | BPF_K] = true, + [BPF_ALU | BPF_RSH | BPF_X] = true, + [BPF_ALU | BPF_NEG] = true, + /* Load instructions */ + [BPF_LD | BPF_W | BPF_ABS] = true, + [BPF_LD | BPF_H | BPF_ABS] = true, + [BPF_LD | BPF_B | BPF_ABS] = true, + [BPF_LD | BPF_W | BPF_LEN] = true, + [BPF_LD | BPF_W | BPF_IND] = true, + [BPF_LD | BPF_H | BPF_IND] = true, + [BPF_LD | BPF_B | BPF_IND] = true, + [BPF_LD | BPF_IMM] = true, + [BPF_LD | BPF_MEM] = true, + [BPF_LDX | BPF_W | BPF_LEN] = true, + [BPF_LDX | BPF_B | BPF_MSH] = true, + [BPF_LDX | BPF_IMM] = true, + [BPF_LDX | BPF_MEM] = true, + /* Store instructions */ + [BPF_ST] = true, + [BPF_STX] = true, + /* Misc instructions */ + [BPF_MISC | BPF_TAX] = true, + [BPF_MISC | BPF_TXA] = true, + /* Return instructions */ + [BPF_RET | BPF_K] = true, + [BPF_RET | BPF_A] = true, + /* Jump instructions */ + [BPF_JMP | BPF_JA] = true, + [BPF_JMP | BPF_JEQ | BPF_K] = true, + [BPF_JMP | BPF_JEQ | BPF_X] = true, + [BPF_JMP | BPF_JGE | BPF_K] = true, + [BPF_JMP | BPF_JGE | BPF_X] = true, + [BPF_JMP | BPF_JGT | BPF_K] = true, + [BPF_JMP | BPF_JGT | BPF_X] = true, + [BPF_JMP | BPF_JSET | BPF_K] = true, + [BPF_JMP | BPF_JSET | BPF_X] = true, + }; + + if (code_to_probe >= ARRAY_SIZE(codes)) + return false; + + return codes[code_to_probe]; +} + /** - * sk_chk_filter - verify socket filter code + * bpf_check_classic - verify socket filter code * @filter: filter to verify * @flen: length of filter * @@ -1228,163 +734,86 @@ error: * * Returns 0 if the rule set is legal or -EINVAL if not. */ -int sk_chk_filter(struct sock_filter *filter, unsigned int flen) +int bpf_check_classic(const struct sock_filter *filter, unsigned int flen) { - /* - * Valid instructions are initialized to non-0. - * Invalid instructions are initialized to 0. - */ - static const u8 codes[] = { - [BPF_ALU|BPF_ADD|BPF_K] = BPF_S_ALU_ADD_K, - [BPF_ALU|BPF_ADD|BPF_X] = BPF_S_ALU_ADD_X, - [BPF_ALU|BPF_SUB|BPF_K] = BPF_S_ALU_SUB_K, - [BPF_ALU|BPF_SUB|BPF_X] = BPF_S_ALU_SUB_X, - [BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K, - [BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X, - [BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X, - [BPF_ALU|BPF_MOD|BPF_K] = BPF_S_ALU_MOD_K, - [BPF_ALU|BPF_MOD|BPF_X] = BPF_S_ALU_MOD_X, - [BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K, - [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X, - [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K, - [BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X, - [BPF_ALU|BPF_XOR|BPF_K] = BPF_S_ALU_XOR_K, - [BPF_ALU|BPF_XOR|BPF_X] = BPF_S_ALU_XOR_X, - [BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K, - [BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X, - [BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K, - [BPF_ALU|BPF_RSH|BPF_X] = BPF_S_ALU_RSH_X, - [BPF_ALU|BPF_NEG] = BPF_S_ALU_NEG, - [BPF_LD|BPF_W|BPF_ABS] = BPF_S_LD_W_ABS, - [BPF_LD|BPF_H|BPF_ABS] = BPF_S_LD_H_ABS, - [BPF_LD|BPF_B|BPF_ABS] = BPF_S_LD_B_ABS, - [BPF_LD|BPF_W|BPF_LEN] = BPF_S_LD_W_LEN, - [BPF_LD|BPF_W|BPF_IND] = BPF_S_LD_W_IND, - [BPF_LD|BPF_H|BPF_IND] = BPF_S_LD_H_IND, - [BPF_LD|BPF_B|BPF_IND] = BPF_S_LD_B_IND, - [BPF_LD|BPF_IMM] = BPF_S_LD_IMM, - [BPF_LDX|BPF_W|BPF_LEN] = BPF_S_LDX_W_LEN, - [BPF_LDX|BPF_B|BPF_MSH] = BPF_S_LDX_B_MSH, - [BPF_LDX|BPF_IMM] = BPF_S_LDX_IMM, - [BPF_MISC|BPF_TAX] = BPF_S_MISC_TAX, - [BPF_MISC|BPF_TXA] = BPF_S_MISC_TXA, - [BPF_RET|BPF_K] = BPF_S_RET_K, - [BPF_RET|BPF_A] = BPF_S_RET_A, - [BPF_ALU|BPF_DIV|BPF_K] = BPF_S_ALU_DIV_K, - [BPF_LD|BPF_MEM] = BPF_S_LD_MEM, - [BPF_LDX|BPF_MEM] = BPF_S_LDX_MEM, - [BPF_ST] = BPF_S_ST, - [BPF_STX] = BPF_S_STX, - [BPF_JMP|BPF_JA] = BPF_S_JMP_JA, - [BPF_JMP|BPF_JEQ|BPF_K] = BPF_S_JMP_JEQ_K, - [BPF_JMP|BPF_JEQ|BPF_X] = BPF_S_JMP_JEQ_X, - [BPF_JMP|BPF_JGE|BPF_K] = BPF_S_JMP_JGE_K, - [BPF_JMP|BPF_JGE|BPF_X] = BPF_S_JMP_JGE_X, - [BPF_JMP|BPF_JGT|BPF_K] = BPF_S_JMP_JGT_K, - [BPF_JMP|BPF_JGT|BPF_X] = BPF_S_JMP_JGT_X, - [BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K, - [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X, - }; - int pc; bool anc_found; + int pc; if (flen == 0 || flen > BPF_MAXINSNS) return -EINVAL; - /* check the filter code now */ + /* Check the filter code now */ for (pc = 0; pc < flen; pc++) { - struct sock_filter *ftest = &filter[pc]; - u16 code = ftest->code; + const struct sock_filter *ftest = &filter[pc]; - if (code >= ARRAY_SIZE(codes)) - return -EINVAL; - code = codes[code]; - if (!code) + /* May we actually operate on this code? */ + if (!chk_code_allowed(ftest->code)) return -EINVAL; + /* Some instructions need special checks */ - switch (code) { - case BPF_S_ALU_DIV_K: - case BPF_S_ALU_MOD_K: - /* check for division by zero */ + switch (ftest->code) { + case BPF_ALU | BPF_DIV | BPF_K: + case BPF_ALU | BPF_MOD | BPF_K: + /* Check for division by zero */ if (ftest->k == 0) return -EINVAL; break; - case BPF_S_LD_MEM: - case BPF_S_LDX_MEM: - case BPF_S_ST: - case BPF_S_STX: - /* check for invalid memory addresses */ + case BPF_LD | BPF_MEM: + case BPF_LDX | BPF_MEM: + case BPF_ST: + case BPF_STX: + /* Check for invalid memory addresses */ if (ftest->k >= BPF_MEMWORDS) return -EINVAL; break; - case BPF_S_JMP_JA: - /* - * Note, the large ftest->k might cause loops. + case BPF_JMP | BPF_JA: + /* Note, the large ftest->k might cause loops. * Compare this with conditional jumps below, * where offsets are limited. --ANK (981016) */ - if (ftest->k >= (unsigned int)(flen-pc-1)) + if (ftest->k >= (unsigned int)(flen - pc - 1)) return -EINVAL; break; - case BPF_S_JMP_JEQ_K: - case BPF_S_JMP_JEQ_X: - case BPF_S_JMP_JGE_K: - case BPF_S_JMP_JGE_X: - case BPF_S_JMP_JGT_K: - case BPF_S_JMP_JGT_X: - case BPF_S_JMP_JSET_X: - case BPF_S_JMP_JSET_K: - /* for conditionals both must be safe */ + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP | BPF_JSET | BPF_X: + /* Both conditionals must be safe */ if (pc + ftest->jt + 1 >= flen || pc + ftest->jf + 1 >= flen) return -EINVAL; break; - case BPF_S_LD_W_ABS: - case BPF_S_LD_H_ABS: - case BPF_S_LD_B_ABS: + case BPF_LD | BPF_W | BPF_ABS: + case BPF_LD | BPF_H | BPF_ABS: + case BPF_LD | BPF_B | BPF_ABS: anc_found = false; -#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \ - code = BPF_S_ANC_##CODE; \ - anc_found = true; \ - break - switch (ftest->k) { - ANCILLARY(PROTOCOL); - ANCILLARY(PKTTYPE); - ANCILLARY(IFINDEX); - ANCILLARY(NLATTR); - ANCILLARY(NLATTR_NEST); - ANCILLARY(MARK); - ANCILLARY(QUEUE); - ANCILLARY(HATYPE); - ANCILLARY(RXHASH); - ANCILLARY(CPU); - ANCILLARY(ALU_XOR_X); - ANCILLARY(VLAN_TAG); - ANCILLARY(VLAN_TAG_PRESENT); - ANCILLARY(PAY_OFFSET); - } - - /* ancillary operation unknown or unsupported */ + if (bpf_anc_helper(ftest) & BPF_ANC) + anc_found = true; + /* Ancillary operation unknown or unsupported */ if (anc_found == false && ftest->k >= SKF_AD_OFF) return -EINVAL; } - ftest->code = code; } - /* last instruction must be a RET code */ + /* Last instruction must be a RET code */ switch (filter[flen - 1].code) { - case BPF_S_RET_K: - case BPF_S_RET_A: + case BPF_RET | BPF_K: + case BPF_RET | BPF_A: return check_load_and_stores(filter, flen); } + return -EINVAL; } -EXPORT_SYMBOL(sk_chk_filter); +EXPORT_SYMBOL(bpf_check_classic); -static int sk_store_orig_filter(struct sk_filter *fp, - const struct sock_fprog *fprog) +static int bpf_prog_store_orig_filter(struct bpf_prog *fp, + const struct sock_fprog *fprog) { - unsigned int fsize = sk_filter_proglen(fprog); + unsigned int fsize = bpf_classic_proglen(fprog); struct sock_fprog_kern *fkprog; fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL); @@ -1402,7 +831,7 @@ static int sk_store_orig_filter(struct sk_filter *fp, return 0; } -static void sk_release_orig_filter(struct sk_filter *fp) +static void bpf_release_orig_filter(struct bpf_prog *fp) { struct sock_fprog_kern *fprog = fp->orig_prog; @@ -1412,6 +841,18 @@ static void sk_release_orig_filter(struct sk_filter *fp) } } +static void __bpf_prog_release(struct bpf_prog *prog) +{ + bpf_release_orig_filter(prog); + bpf_prog_free(prog); +} + +static void __sk_filter_release(struct sk_filter *fp) +{ + __bpf_prog_release(fp->prog); + kfree(fp); +} + /** * sk_filter_release_rcu - Release a socket filter by rcu_head * @rcu: rcu_head that contains the sk_filter to free @@ -1420,8 +861,7 @@ static void sk_filter_release_rcu(struct rcu_head *rcu) { struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); - sk_release_orig_filter(fp); - bpf_jit_free(fp); + __sk_filter_release(fp); } /** @@ -1438,45 +878,34 @@ static void sk_filter_release(struct sk_filter *fp) void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) { - atomic_sub(sk_filter_size(fp->len), &sk->sk_omem_alloc); - sk_filter_release(fp); -} + u32 filter_size = bpf_prog_size(fp->prog->len); -void sk_filter_charge(struct sock *sk, struct sk_filter *fp) -{ - atomic_inc(&fp->refcnt); - atomic_add(sk_filter_size(fp->len), &sk->sk_omem_alloc); + atomic_sub(filter_size, &sk->sk_omem_alloc); + sk_filter_release(fp); } -static struct sk_filter *__sk_migrate_realloc(struct sk_filter *fp, - struct sock *sk, - unsigned int len) +/* try to charge the socket memory if there is space available + * return true on success + */ +bool sk_filter_charge(struct sock *sk, struct sk_filter *fp) { - struct sk_filter *fp_new; - - if (sk == NULL) - return krealloc(fp, len, GFP_KERNEL); - - fp_new = sock_kmalloc(sk, len, GFP_KERNEL); - if (fp_new) { - memcpy(fp_new, fp, sizeof(struct sk_filter)); - /* As we're kepping orig_prog in fp_new along, - * we need to make sure we're not evicting it - * from the old fp. - */ - fp->orig_prog = NULL; - sk_filter_uncharge(sk, fp); + u32 filter_size = bpf_prog_size(fp->prog->len); + + /* same check as in sock_kmalloc() */ + if (filter_size <= sysctl_optmem_max && + atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) { + atomic_inc(&fp->refcnt); + atomic_add(filter_size, &sk->sk_omem_alloc); + return true; } - - return fp_new; + return false; } -static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp, - struct sock *sk) +static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) { struct sock_filter *old_prog; - struct sk_filter *old_fp; - int i, err, new_len, old_len = fp->len; + struct bpf_prog *old_fp; + int err, new_len, old_len = fp->len; /* We are free to overwrite insns et al right here as it * won't be used at this point in time anymore internally @@ -1484,14 +913,7 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp, * representation. */ BUILD_BUG_ON(sizeof(struct sock_filter) != - sizeof(struct sock_filter_int)); - - /* For now, we need to unfiddle BPF_S_* identifiers in place. - * This can sooner or later on be subject to removal, e.g. when - * JITs have been converted. - */ - for (i = 0; i < fp->len; i++) - sk_decode_filter(&fp->insns[i], &fp->insns[i]); + sizeof(struct bpf_insn)); /* Conversion cannot happen on overlapping memory areas, * so we need to keep the user BPF around until the 2nd @@ -1505,13 +927,13 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp, } /* 1st pass: calculate the new program length. */ - err = sk_convert_filter(old_prog, old_len, NULL, &new_len); + err = bpf_convert_filter(old_prog, old_len, NULL, &new_len); if (err) goto out_err_free; /* Expand fp for appending the new filter representation. */ old_fp = fp; - fp = __sk_migrate_realloc(old_fp, sk, sk_filter_size(new_len)); + fp = krealloc(old_fp, bpf_prog_size(new_len), GFP_KERNEL); if (!fp) { /* The old_fp is still around in case we couldn't * allocate new memory, so uncharge on that one. @@ -1521,44 +943,42 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp, goto out_err_free; } - fp->bpf_func = sk_run_filter_int_skb; fp->len = new_len; - /* 2nd pass: remap sock_filter insns into sock_filter_int insns. */ - err = sk_convert_filter(old_prog, old_len, fp->insnsi, &new_len); + /* 2nd pass: remap sock_filter insns into bpf_insn insns. */ + err = bpf_convert_filter(old_prog, old_len, fp->insnsi, &new_len); if (err) - /* 2nd sk_convert_filter() can fail only if it fails + /* 2nd bpf_convert_filter() can fail only if it fails * to allocate memory, remapping must succeed. Note, * that at this time old_fp has already been released - * by __sk_migrate_realloc(). + * by krealloc(). */ goto out_err_free; + bpf_prog_select_runtime(fp); + kfree(old_prog); return fp; out_err_free: kfree(old_prog); out_err: - /* Rollback filter setup. */ - if (sk != NULL) - sk_filter_uncharge(sk, fp); - else - kfree(fp); + __bpf_prog_release(fp); return ERR_PTR(err); } -static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp, - struct sock *sk) +static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp) { int err; fp->bpf_func = NULL; fp->jited = 0; - err = sk_chk_filter(fp->insns, fp->len); - if (err) + err = bpf_check_classic(fp->insns, fp->len); + if (err) { + __bpf_prog_release(fp); return ERR_PTR(err); + } /* Probe if we can JIT compile the filter and if so, do * the compilation of the filter. @@ -1569,38 +989,36 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp, * internal BPF translation for the optimized interpreter. */ if (!fp->jited) - fp = __sk_migrate_filter(fp, sk); + fp = bpf_migrate_filter(fp); return fp; } /** - * sk_unattached_filter_create - create an unattached filter - * @fprog: the filter program + * bpf_prog_create - create an unattached filter * @pfp: the unattached filter that is created + * @fprog: the filter program * * Create a filter independent of any socket. We first run some * sanity checks on it to make sure it does not explode on us later. * If an error occurs or there is insufficient memory for the filter * a negative errno code is returned. On success the return is zero. */ -int sk_unattached_filter_create(struct sk_filter **pfp, - struct sock_fprog *fprog) +int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog) { - unsigned int fsize = sk_filter_proglen(fprog); - struct sk_filter *fp; + unsigned int fsize = bpf_classic_proglen(fprog); + struct bpf_prog *fp; /* Make sure new filter is there and in the right amounts. */ if (fprog->filter == NULL) return -EINVAL; - fp = kmalloc(sk_filter_size(fprog->len), GFP_KERNEL); + fp = kmalloc(bpf_prog_size(fprog->len), GFP_KERNEL); if (!fp) return -ENOMEM; memcpy(fp->insns, fprog->filter, fsize); - atomic_set(&fp->refcnt, 1); fp->len = fprog->len; /* Since unattached filters are not copied back to user * space through sk_get_filter(), we do not need to hold @@ -1608,23 +1026,23 @@ int sk_unattached_filter_create(struct sk_filter **pfp, */ fp->orig_prog = NULL; - /* __sk_prepare_filter() already takes care of uncharging + /* bpf_prepare_filter() already takes care of freeing * memory in case something goes wrong. */ - fp = __sk_prepare_filter(fp, NULL); + fp = bpf_prepare_filter(fp); if (IS_ERR(fp)) return PTR_ERR(fp); *pfp = fp; return 0; } -EXPORT_SYMBOL_GPL(sk_unattached_filter_create); +EXPORT_SYMBOL_GPL(bpf_prog_create); -void sk_unattached_filter_destroy(struct sk_filter *fp) +void bpf_prog_destroy(struct bpf_prog *fp) { - sk_filter_release(fp); + __bpf_prog_release(fp); } -EXPORT_SYMBOL_GPL(sk_unattached_filter_destroy); +EXPORT_SYMBOL_GPL(bpf_prog_destroy); /** * sk_attach_filter - attach a socket filter @@ -1639,8 +1057,9 @@ EXPORT_SYMBOL_GPL(sk_unattached_filter_destroy); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) { struct sk_filter *fp, *old_fp; - unsigned int fsize = sk_filter_proglen(fprog); - unsigned int sk_fsize = sk_filter_size(fprog->len); + unsigned int fsize = bpf_classic_proglen(fprog); + unsigned int bpf_fsize = bpf_prog_size(fprog->len); + struct bpf_prog *prog; int err; if (sock_flag(sk, SOCK_FILTER_LOCKED)) @@ -1650,30 +1069,43 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) if (fprog->filter == NULL) return -EINVAL; - fp = sock_kmalloc(sk, sk_fsize, GFP_KERNEL); - if (!fp) + prog = kmalloc(bpf_fsize, GFP_KERNEL); + if (!prog) return -ENOMEM; - if (copy_from_user(fp->insns, fprog->filter, fsize)) { - sock_kfree_s(sk, fp, sk_fsize); + if (copy_from_user(prog->insns, fprog->filter, fsize)) { + kfree(prog); return -EFAULT; } - atomic_set(&fp->refcnt, 1); - fp->len = fprog->len; + prog->len = fprog->len; - err = sk_store_orig_filter(fp, fprog); + err = bpf_prog_store_orig_filter(prog, fprog); if (err) { - sk_filter_uncharge(sk, fp); + kfree(prog); return -ENOMEM; } - /* __sk_prepare_filter() already takes care of uncharging + /* bpf_prepare_filter() already takes care of freeing * memory in case something goes wrong. */ - fp = __sk_prepare_filter(fp, sk); - if (IS_ERR(fp)) - return PTR_ERR(fp); + prog = bpf_prepare_filter(prog); + if (IS_ERR(prog)) + return PTR_ERR(prog); + + fp = kmalloc(sizeof(*fp), GFP_KERNEL); + if (!fp) { + __bpf_prog_release(prog); + return -ENOMEM; + } + fp->prog = prog; + + atomic_set(&fp->refcnt, 0); + + if (!sk_filter_charge(sk, fp)) { + __sk_filter_release(fp); + return -ENOMEM; + } old_fp = rcu_dereference_protected(sk->sk_filter, sock_owned_by_user(sk)); @@ -1706,83 +1138,6 @@ int sk_detach_filter(struct sock *sk) } EXPORT_SYMBOL_GPL(sk_detach_filter); -void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) -{ - static const u16 decodes[] = { - [BPF_S_ALU_ADD_K] = BPF_ALU|BPF_ADD|BPF_K, - [BPF_S_ALU_ADD_X] = BPF_ALU|BPF_ADD|BPF_X, - [BPF_S_ALU_SUB_K] = BPF_ALU|BPF_SUB|BPF_K, - [BPF_S_ALU_SUB_X] = BPF_ALU|BPF_SUB|BPF_X, - [BPF_S_ALU_MUL_K] = BPF_ALU|BPF_MUL|BPF_K, - [BPF_S_ALU_MUL_X] = BPF_ALU|BPF_MUL|BPF_X, - [BPF_S_ALU_DIV_X] = BPF_ALU|BPF_DIV|BPF_X, - [BPF_S_ALU_MOD_K] = BPF_ALU|BPF_MOD|BPF_K, - [BPF_S_ALU_MOD_X] = BPF_ALU|BPF_MOD|BPF_X, - [BPF_S_ALU_AND_K] = BPF_ALU|BPF_AND|BPF_K, - [BPF_S_ALU_AND_X] = BPF_ALU|BPF_AND|BPF_X, - [BPF_S_ALU_OR_K] = BPF_ALU|BPF_OR|BPF_K, - [BPF_S_ALU_OR_X] = BPF_ALU|BPF_OR|BPF_X, - [BPF_S_ALU_XOR_K] = BPF_ALU|BPF_XOR|BPF_K, - [BPF_S_ALU_XOR_X] = BPF_ALU|BPF_XOR|BPF_X, - [BPF_S_ALU_LSH_K] = BPF_ALU|BPF_LSH|BPF_K, - [BPF_S_ALU_LSH_X] = BPF_ALU|BPF_LSH|BPF_X, - [BPF_S_ALU_RSH_K] = BPF_ALU|BPF_RSH|BPF_K, - [BPF_S_ALU_RSH_X] = BPF_ALU|BPF_RSH|BPF_X, - [BPF_S_ALU_NEG] = BPF_ALU|BPF_NEG, - [BPF_S_LD_W_ABS] = BPF_LD|BPF_W|BPF_ABS, - [BPF_S_LD_H_ABS] = BPF_LD|BPF_H|BPF_ABS, - [BPF_S_LD_B_ABS] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_PROTOCOL] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_PKTTYPE] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_IFINDEX] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_NLATTR] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_NLATTR_NEST] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_MARK] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_QUEUE] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_HATYPE] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN, - [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND, - [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND, - [BPF_S_LD_B_IND] = BPF_LD|BPF_B|BPF_IND, - [BPF_S_LD_IMM] = BPF_LD|BPF_IMM, - [BPF_S_LDX_W_LEN] = BPF_LDX|BPF_W|BPF_LEN, - [BPF_S_LDX_B_MSH] = BPF_LDX|BPF_B|BPF_MSH, - [BPF_S_LDX_IMM] = BPF_LDX|BPF_IMM, - [BPF_S_MISC_TAX] = BPF_MISC|BPF_TAX, - [BPF_S_MISC_TXA] = BPF_MISC|BPF_TXA, - [BPF_S_RET_K] = BPF_RET|BPF_K, - [BPF_S_RET_A] = BPF_RET|BPF_A, - [BPF_S_ALU_DIV_K] = BPF_ALU|BPF_DIV|BPF_K, - [BPF_S_LD_MEM] = BPF_LD|BPF_MEM, - [BPF_S_LDX_MEM] = BPF_LDX|BPF_MEM, - [BPF_S_ST] = BPF_ST, - [BPF_S_STX] = BPF_STX, - [BPF_S_JMP_JA] = BPF_JMP|BPF_JA, - [BPF_S_JMP_JEQ_K] = BPF_JMP|BPF_JEQ|BPF_K, - [BPF_S_JMP_JEQ_X] = BPF_JMP|BPF_JEQ|BPF_X, - [BPF_S_JMP_JGE_K] = BPF_JMP|BPF_JGE|BPF_K, - [BPF_S_JMP_JGE_X] = BPF_JMP|BPF_JGE|BPF_X, - [BPF_S_JMP_JGT_K] = BPF_JMP|BPF_JGT|BPF_K, - [BPF_S_JMP_JGT_X] = BPF_JMP|BPF_JGT|BPF_X, - [BPF_S_JMP_JSET_K] = BPF_JMP|BPF_JSET|BPF_K, - [BPF_S_JMP_JSET_X] = BPF_JMP|BPF_JSET|BPF_X, - }; - u16 code; - - code = filt->code; - - to->code = decodes[code]; - to->jt = filt->jt; - to->jf = filt->jf; - to->k = filt->k; -} - int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) { @@ -1799,7 +1154,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, /* We're copying the filter that has been originally attached, * so no conversion/decode needed anymore. */ - fprog = filter->orig_prog; + fprog = filter->prog->orig_prog; ret = fprog->len; if (!len) @@ -1811,7 +1166,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, goto out; ret = -EFAULT; - if (copy_to_user(ubuf, fprog->filter, sk_filter_proglen(fprog))) + if (copy_to_user(ubuf, fprog->filter, bpf_classic_proglen(fprog))) goto out; /* Instead of bytes, the API requests to return the number diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 107ed12a5323..5f362c1d0332 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -80,6 +80,8 @@ ip: case htons(ETH_P_IPV6): { const struct ipv6hdr *iph; struct ipv6hdr _iph; + __be32 flow_label; + ipv6: iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); if (!iph) @@ -89,6 +91,21 @@ ipv6: flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr); flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); nhoff += sizeof(struct ipv6hdr); + + flow_label = ip6_flowlabel(iph); + if (flow_label) { + /* Awesome, IPv6 packet has a flow label so we can + * use that to represent the ports without any + * further dissection. + */ + flow->n_proto = proto; + flow->ip_proto = ip_proto; + flow->ports = flow_label; + flow->thoff = (u16)nhoff; + + return true; + } + break; } case htons(ETH_P_8021AD): @@ -175,6 +192,7 @@ ipv6: break; } + flow->n_proto = proto; flow->ip_proto = ip_proto; flow->ports = skb_flow_get_ports(skb, nhoff, ip_proto); flow->thoff = (u16) nhoff; @@ -195,12 +213,33 @@ static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c) return jhash_3words(a, b, c, hashrnd); } -static __always_inline u32 __flow_hash_1word(u32 a) +static inline u32 __flow_hash_from_keys(struct flow_keys *keys) { - __flow_hash_secret_init(); - return jhash_1word(a, hashrnd); + u32 hash; + + /* get a consistent hash (same value on both flow directions) */ + if (((__force u32)keys->dst < (__force u32)keys->src) || + (((__force u32)keys->dst == (__force u32)keys->src) && + ((__force u16)keys->port16[1] < (__force u16)keys->port16[0]))) { + swap(keys->dst, keys->src); + swap(keys->port16[0], keys->port16[1]); + } + + hash = __flow_hash_3words((__force u32)keys->dst, + (__force u32)keys->src, + (__force u32)keys->ports); + if (!hash) + hash = 1; + + return hash; } +u32 flow_hash_from_keys(struct flow_keys *keys) +{ + return __flow_hash_from_keys(keys); +} +EXPORT_SYMBOL(flow_hash_from_keys); + /* * __skb_get_hash: calculate a flow hash based on src/dst addresses * and src/dst port numbers. Sets hash in skb to non-zero hash value @@ -210,7 +249,6 @@ static __always_inline u32 __flow_hash_1word(u32 a) void __skb_get_hash(struct sk_buff *skb) { struct flow_keys keys; - u32 hash; if (!skb_flow_dissect(skb, &keys)) return; @@ -218,21 +256,9 @@ void __skb_get_hash(struct sk_buff *skb) if (keys.ports) skb->l4_hash = 1; - /* get a consistent hash (same value on both flow directions) */ - if (((__force u32)keys.dst < (__force u32)keys.src) || - (((__force u32)keys.dst == (__force u32)keys.src) && - ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) { - swap(keys.dst, keys.src); - swap(keys.port16[0], keys.port16[1]); - } - - hash = __flow_hash_3words((__force u32)keys.dst, - (__force u32)keys.src, - (__force u32)keys.ports); - if (!hash) - hash = 1; + skb->sw_hash = 1; - skb->hash = hash; + skb->hash = __flow_hash_from_keys(&keys); } EXPORT_SYMBOL(__skb_get_hash); @@ -240,7 +266,7 @@ EXPORT_SYMBOL(__skb_get_hash); * Returns a Tx hash based on the given packet descriptor a Tx queues' number * to be used as a distribution range. */ -u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, +u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, unsigned int num_tx_queues) { u32 hash; @@ -260,13 +286,7 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, qcount = dev->tc_to_txq[tc].count; } - if (skb->sk && skb->sk->sk_hash) - hash = skb->sk->sk_hash; - else - hash = (__force u16) skb->protocol; - hash = __flow_hash_1word(hash); - - return (u16) (((u64) hash * qcount) >> 32) + qoffset; + return (u16) (((u64)skb_get_hash(skb) * qcount) >> 32) + qoffset; } EXPORT_SYMBOL(__skb_tx_hash); @@ -338,17 +358,10 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) if (map) { if (map->len == 1) queue_index = map->queues[0]; - else { - u32 hash; - if (skb->sk && skb->sk->sk_hash) - hash = skb->sk->sk_hash; - else - hash = (__force u16) skb->protocol ^ - skb->hash; - hash = __flow_hash_1word(hash); + else queue_index = map->queues[ - ((u64)hash * map->len) >> 32]; - } + ((u64)skb_get_hash(skb) * map->len) >> 32]; + if (unlikely(queue_index >= dev->real_num_tx_queues)) queue_index = -1; } diff --git a/net/core/iovec.c b/net/core/iovec.c index b61869429f4c..e1ec45ab1e63 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -39,7 +39,7 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a { int size, ct, err; - if (m->msg_namelen) { + if (m->msg_name && m->msg_namelen) { if (mode == VERIFY_READ) { void __user *namep; namep = (void __user __force *) m->msg_name; @@ -48,10 +48,10 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a if (err < 0) return err; } - if (m->msg_name) - m->msg_name = address; + m->msg_name = address; } else { m->msg_name = NULL; + m->msg_namelen = 0; } size = m->msg_iovlen * sizeof(struct iovec); @@ -75,61 +75,6 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a } /* - * Copy kernel to iovec. Returns -EFAULT on error. - */ - -int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata, - int offset, int len) -{ - int copy; - for (; len > 0; ++iov) { - /* Skip over the finished iovecs */ - if (unlikely(offset >= iov->iov_len)) { - offset -= iov->iov_len; - continue; - } - copy = min_t(unsigned int, iov->iov_len - offset, len); - if (copy_to_user(iov->iov_base + offset, kdata, copy)) - return -EFAULT; - offset = 0; - kdata += copy; - len -= copy; - } - - return 0; -} -EXPORT_SYMBOL(memcpy_toiovecend); - -/* - * Copy iovec to kernel. Returns -EFAULT on error. - */ - -int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov, - int offset, int len) -{ - /* Skip over the finished iovecs */ - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - iov++; - } - - while (len > 0) { - u8 __user *base = iov->iov_base + offset; - int copy = min_t(unsigned int, len, iov->iov_len - offset); - - offset = 0; - if (copy_from_user(kdata, base, copy)) - return -EFAULT; - len -= copy; - kdata += copy; - iov++; - } - - return 0; -} -EXPORT_SYMBOL(memcpy_fromiovecend); - -/* * And now for the all-in-one: copy and checksum from a user iovec * directly to a datagram * Calls to csum_partial but the last must be in 32 bit chunks diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 9c3a839322ba..bd0767e6b2b3 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -147,7 +147,7 @@ static void linkwatch_do_dev(struct net_device *dev) * Make sure the above read is complete since it can be * rewritten as soon as we clear the bit below. */ - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); /* We are about to handle this device, * so new events can be accepted diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 8f8a96ef9f3f..ef31fef25e5a 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1248,8 +1248,8 @@ void __neigh_set_probe_once(struct neighbour *neigh) neigh->updated = jiffies; if (!(neigh->nud_state & NUD_FAILED)) return; - neigh->nud_state = NUD_PROBE; - atomic_set(&neigh->probes, NEIGH_VAR(neigh->parms, UCAST_PROBES)); + neigh->nud_state = NUD_INCOMPLETE; + atomic_set(&neigh->probes, neigh_max_probes(neigh)); neigh_add_timer(neigh, jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME)); } @@ -2249,7 +2249,7 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, ndm->ndm_pad1 = 0; ndm->ndm_pad2 = 0; ndm->ndm_flags = pn->flags | NTF_PROXY; - ndm->ndm_type = NDA_DST; + ndm->ndm_type = RTN_UNICAST; ndm->ndm_ifindex = pn->dev->ifindex; ndm->ndm_state = NUD_NONE; @@ -3059,11 +3059,12 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0, sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL])); } else { + struct neigh_table *tbl = p->tbl; dev_name_source = "default"; - t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1); - t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1; - t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2; - t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3; + t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval; + t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1; + t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2; + t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3; } if (handler) { diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 1cac29ebb05b..9dd06699b09c 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -43,12 +43,12 @@ static ssize_t netdev_show(const struct device *dev, struct device_attribute *attr, char *buf, ssize_t (*format)(const struct net_device *, char *)) { - struct net_device *net = to_net_dev(dev); + struct net_device *ndev = to_net_dev(dev); ssize_t ret = -EINVAL; read_lock(&dev_base_lock); - if (dev_isalive(net)) - ret = (*format)(net, buf); + if (dev_isalive(ndev)) + ret = (*format)(ndev, buf); read_unlock(&dev_base_lock); return ret; @@ -56,9 +56,9 @@ static ssize_t netdev_show(const struct device *dev, /* generate a show function for simple field */ #define NETDEVICE_SHOW(field, format_string) \ -static ssize_t format_##field(const struct net_device *net, char *buf) \ +static ssize_t format_##field(const struct net_device *dev, char *buf) \ { \ - return sprintf(buf, format_string, net->field); \ + return sprintf(buf, format_string, dev->field); \ } \ static ssize_t field##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ @@ -112,16 +112,35 @@ NETDEVICE_SHOW_RO(ifindex, fmt_dec); NETDEVICE_SHOW_RO(type, fmt_dec); NETDEVICE_SHOW_RO(link_mode, fmt_dec); +static ssize_t format_name_assign_type(const struct net_device *dev, char *buf) +{ + return sprintf(buf, fmt_dec, dev->name_assign_type); +} + +static ssize_t name_assign_type_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct net_device *ndev = to_net_dev(dev); + ssize_t ret = -EINVAL; + + if (ndev->name_assign_type != NET_NAME_UNKNOWN) + ret = netdev_show(dev, attr, buf, format_name_assign_type); + + return ret; +} +static DEVICE_ATTR_RO(name_assign_type); + /* use same locking rules as GIFHWADDR ioctl's */ static ssize_t address_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct net_device *net = to_net_dev(dev); + struct net_device *ndev = to_net_dev(dev); ssize_t ret = -EINVAL; read_lock(&dev_base_lock); - if (dev_isalive(net)) - ret = sysfs_format_mac(buf, net->dev_addr, net->addr_len); + if (dev_isalive(ndev)) + ret = sysfs_format_mac(buf, ndev->dev_addr, ndev->addr_len); read_unlock(&dev_base_lock); return ret; } @@ -130,18 +149,18 @@ static DEVICE_ATTR_RO(address); static ssize_t broadcast_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct net_device *net = to_net_dev(dev); - if (dev_isalive(net)) - return sysfs_format_mac(buf, net->broadcast, net->addr_len); + struct net_device *ndev = to_net_dev(dev); + if (dev_isalive(ndev)) + return sysfs_format_mac(buf, ndev->broadcast, ndev->addr_len); return -EINVAL; } static DEVICE_ATTR_RO(broadcast); -static int change_carrier(struct net_device *net, unsigned long new_carrier) +static int change_carrier(struct net_device *dev, unsigned long new_carrier) { - if (!netif_running(net)) + if (!netif_running(dev)) return -EINVAL; - return dev_change_carrier(net, (bool) new_carrier); + return dev_change_carrier(dev, (bool) new_carrier); } static ssize_t carrier_store(struct device *dev, struct device_attribute *attr, @@ -265,9 +284,9 @@ static DEVICE_ATTR_RO(carrier_changes); /* read-write attributes */ -static int change_mtu(struct net_device *net, unsigned long new_mtu) +static int change_mtu(struct net_device *dev, unsigned long new_mtu) { - return dev_set_mtu(net, (int) new_mtu); + return dev_set_mtu(dev, (int) new_mtu); } static ssize_t mtu_store(struct device *dev, struct device_attribute *attr, @@ -277,9 +296,9 @@ static ssize_t mtu_store(struct device *dev, struct device_attribute *attr, } NETDEVICE_SHOW_RW(mtu, fmt_dec); -static int change_flags(struct net_device *net, unsigned long new_flags) +static int change_flags(struct net_device *dev, unsigned long new_flags) { - return dev_change_flags(net, (unsigned int) new_flags); + return dev_change_flags(dev, (unsigned int) new_flags); } static ssize_t flags_store(struct device *dev, struct device_attribute *attr, @@ -289,9 +308,9 @@ static ssize_t flags_store(struct device *dev, struct device_attribute *attr, } NETDEVICE_SHOW_RW(flags, fmt_hex); -static int change_tx_queue_len(struct net_device *net, unsigned long new_len) +static int change_tx_queue_len(struct net_device *dev, unsigned long new_len) { - net->tx_queue_len = new_len; + dev->tx_queue_len = new_len; return 0; } @@ -344,9 +363,9 @@ static ssize_t ifalias_show(struct device *dev, } static DEVICE_ATTR_RW(ifalias); -static int change_group(struct net_device *net, unsigned long new_group) +static int change_group(struct net_device *dev, unsigned long new_group) { - dev_set_group(net, (int) new_group); + dev_set_group(dev, (int) new_group); return 0; } @@ -387,6 +406,7 @@ static struct attribute *net_class_attrs[] = { &dev_attr_dev_port.attr, &dev_attr_iflink.attr, &dev_attr_ifindex.attr, + &dev_attr_name_assign_type.attr, &dev_attr_addr_assign_type.attr, &dev_attr_addr_len.attr, &dev_attr_link_mode.attr, @@ -776,20 +796,20 @@ static struct kobj_type rx_queue_ktype = { .namespace = rx_queue_namespace }; -static int rx_queue_add_kobject(struct net_device *net, int index) +static int rx_queue_add_kobject(struct net_device *dev, int index) { - struct netdev_rx_queue *queue = net->_rx + index; + struct netdev_rx_queue *queue = dev->_rx + index; struct kobject *kobj = &queue->kobj; int error = 0; - kobj->kset = net->queues_kset; + kobj->kset = dev->queues_kset; error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, "rx-%u", index); if (error) goto exit; - if (net->sysfs_rx_queue_group) { - error = sysfs_create_group(kobj, net->sysfs_rx_queue_group); + if (dev->sysfs_rx_queue_group) { + error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group); if (error) goto exit; } @@ -805,18 +825,18 @@ exit: #endif /* CONFIG_SYSFS */ int -net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) +net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num) { #ifdef CONFIG_SYSFS int i; int error = 0; #ifndef CONFIG_RPS - if (!net->sysfs_rx_queue_group) + if (!dev->sysfs_rx_queue_group) return 0; #endif for (i = old_num; i < new_num; i++) { - error = rx_queue_add_kobject(net, i); + error = rx_queue_add_kobject(dev, i); if (error) { new_num = old_num; break; @@ -824,10 +844,10 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) } while (--i >= new_num) { - if (net->sysfs_rx_queue_group) - sysfs_remove_group(&net->_rx[i].kobj, - net->sysfs_rx_queue_group); - kobject_put(&net->_rx[i].kobj); + if (dev->sysfs_rx_queue_group) + sysfs_remove_group(&dev->_rx[i].kobj, + dev->sysfs_rx_queue_group); + kobject_put(&dev->_rx[i].kobj); } return error; @@ -1135,13 +1155,13 @@ static struct kobj_type netdev_queue_ktype = { .namespace = netdev_queue_namespace, }; -static int netdev_queue_add_kobject(struct net_device *net, int index) +static int netdev_queue_add_kobject(struct net_device *dev, int index) { - struct netdev_queue *queue = net->_tx + index; + struct netdev_queue *queue = dev->_tx + index; struct kobject *kobj = &queue->kobj; int error = 0; - kobj->kset = net->queues_kset; + kobj->kset = dev->queues_kset; error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL, "tx-%u", index); if (error) @@ -1164,14 +1184,14 @@ exit: #endif /* CONFIG_SYSFS */ int -netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num) +netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num) { #ifdef CONFIG_SYSFS int i; int error = 0; for (i = old_num; i < new_num; i++) { - error = netdev_queue_add_kobject(net, i); + error = netdev_queue_add_kobject(dev, i); if (error) { new_num = old_num; break; @@ -1179,7 +1199,7 @@ netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num) } while (--i >= new_num) { - struct netdev_queue *queue = net->_tx + i; + struct netdev_queue *queue = dev->_tx + i; #ifdef CONFIG_BQL sysfs_remove_group(&queue->kobj, &dql_group); @@ -1193,25 +1213,25 @@ netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num) #endif /* CONFIG_SYSFS */ } -static int register_queue_kobjects(struct net_device *net) +static int register_queue_kobjects(struct net_device *dev) { int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0; #ifdef CONFIG_SYSFS - net->queues_kset = kset_create_and_add("queues", - NULL, &net->dev.kobj); - if (!net->queues_kset) + dev->queues_kset = kset_create_and_add("queues", + NULL, &dev->dev.kobj); + if (!dev->queues_kset) return -ENOMEM; - real_rx = net->real_num_rx_queues; + real_rx = dev->real_num_rx_queues; #endif - real_tx = net->real_num_tx_queues; + real_tx = dev->real_num_tx_queues; - error = net_rx_queue_update_kobjects(net, 0, real_rx); + error = net_rx_queue_update_kobjects(dev, 0, real_rx); if (error) goto error; rxq = real_rx; - error = netdev_queue_update_kobjects(net, 0, real_tx); + error = netdev_queue_update_kobjects(dev, 0, real_tx); if (error) goto error; txq = real_tx; @@ -1219,24 +1239,24 @@ static int register_queue_kobjects(struct net_device *net) return 0; error: - netdev_queue_update_kobjects(net, txq, 0); - net_rx_queue_update_kobjects(net, rxq, 0); + netdev_queue_update_kobjects(dev, txq, 0); + net_rx_queue_update_kobjects(dev, rxq, 0); return error; } -static void remove_queue_kobjects(struct net_device *net) +static void remove_queue_kobjects(struct net_device *dev) { int real_rx = 0, real_tx = 0; #ifdef CONFIG_SYSFS - real_rx = net->real_num_rx_queues; + real_rx = dev->real_num_rx_queues; #endif - real_tx = net->real_num_tx_queues; + real_tx = dev->real_num_tx_queues; - net_rx_queue_update_kobjects(net, real_rx, 0); - netdev_queue_update_kobjects(net, real_tx, 0); + net_rx_queue_update_kobjects(dev, real_rx, 0); + netdev_queue_update_kobjects(dev, real_tx, 0); #ifdef CONFIG_SYSFS - kset_unregister(net->queues_kset); + kset_unregister(dev->queues_kset); #endif } @@ -1329,13 +1349,13 @@ static struct class net_class = { /* Delete sysfs entries but hold kobject reference until after all * netdev references are gone. */ -void netdev_unregister_kobject(struct net_device * net) +void netdev_unregister_kobject(struct net_device *ndev) { - struct device *dev = &(net->dev); + struct device *dev = &(ndev->dev); kobject_get(&dev->kobj); - remove_queue_kobjects(net); + remove_queue_kobjects(ndev); pm_runtime_set_memalloc_noio(dev, false); @@ -1343,18 +1363,18 @@ void netdev_unregister_kobject(struct net_device * net) } /* Create sysfs entries for network device. */ -int netdev_register_kobject(struct net_device *net) +int netdev_register_kobject(struct net_device *ndev) { - struct device *dev = &(net->dev); - const struct attribute_group **groups = net->sysfs_groups; + struct device *dev = &(ndev->dev); + const struct attribute_group **groups = ndev->sysfs_groups; int error = 0; device_initialize(dev); dev->class = &net_class; - dev->platform_data = net; + dev->platform_data = ndev; dev->groups = groups; - dev_set_name(dev, "%s", net->name); + dev_set_name(dev, "%s", ndev->name); #ifdef CONFIG_SYSFS /* Allow for a device specific group */ @@ -1364,10 +1384,10 @@ int netdev_register_kobject(struct net_device *net) *groups++ = &netstat_group; #if IS_ENABLED(CONFIG_WIRELESS_EXT) || IS_ENABLED(CONFIG_CFG80211) - if (net->ieee80211_ptr) + if (ndev->ieee80211_ptr) *groups++ = &wireless_group; #if IS_ENABLED(CONFIG_WIRELESS_EXT) - else if (net->wireless_handlers) + else if (ndev->wireless_handlers) *groups++ = &wireless_group; #endif #endif @@ -1377,7 +1397,7 @@ int netdev_register_kobject(struct net_device *net) if (error) return error; - error = register_queue_kobjects(net); + error = register_queue_kobjects(ndev); if (error) { device_del(dev); return error; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 81d3a9a08453..85b62691f4f2 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -24,7 +24,7 @@ static LIST_HEAD(pernet_list); static struct list_head *first_device = &pernet_list; -static DEFINE_MUTEX(net_mutex); +DEFINE_MUTEX(net_mutex); LIST_HEAD(net_namespace_list); EXPORT_SYMBOL_GPL(net_namespace_list); @@ -273,7 +273,7 @@ static void cleanup_net(struct work_struct *work) { const struct pernet_operations *ops; struct net *net, *tmp; - LIST_HEAD(net_kill_list); + struct list_head net_kill_list; LIST_HEAD(net_exit_list); /* Atomically snapshot the list of namespaces to cleanup */ diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 22931e1b99b4..1f2a126f4ffa 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -42,7 +42,7 @@ cgrp_css_alloc(struct cgroup_subsys_state *parent_css) static int cgrp_css_online(struct cgroup_subsys_state *css) { struct cgroup_cls_state *cs = css_cls_state(css); - struct cgroup_cls_state *parent = css_cls_state(css_parent(css)); + struct cgroup_cls_state *parent = css_cls_state(css->parent); if (parent) cs->classid = parent->classid; @@ -107,5 +107,5 @@ struct cgroup_subsys net_cls_cgrp_subsys = { .css_online = cgrp_css_online, .css_free = cgrp_css_free, .attach = cgrp_attach, - .base_cftypes = ss_files, + .legacy_cftypes = ss_files, }; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index e33937fb32a0..907fb5e36c02 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -822,7 +822,8 @@ void __netpoll_cleanup(struct netpoll *np) RCU_INIT_POINTER(np->dev->npinfo, NULL); call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info); - } + } else + RCU_INIT_POINTER(np->dev->npinfo, NULL); } EXPORT_SYMBOL_GPL(__netpoll_cleanup); diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 3825f669147b..cbd0a199bf52 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -140,7 +140,7 @@ cgrp_css_alloc(struct cgroup_subsys_state *parent_css) static int cgrp_css_online(struct cgroup_subsys_state *css) { - struct cgroup_subsys_state *parent_css = css_parent(css); + struct cgroup_subsys_state *parent_css = css->parent; struct net_device *dev; int ret = 0; @@ -185,15 +185,15 @@ static int read_priomap(struct seq_file *sf, void *v) return 0; } -static int write_priomap(struct cgroup_subsys_state *css, struct cftype *cft, - char *buffer) +static ssize_t write_priomap(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { char devname[IFNAMSIZ + 1]; struct net_device *dev; u32 prio; int ret; - if (sscanf(buffer, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2) + if (sscanf(buf, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2) return -EINVAL; dev = dev_get_by_name(&init_net, devname); @@ -202,11 +202,11 @@ static int write_priomap(struct cgroup_subsys_state *css, struct cftype *cft, rtnl_lock(); - ret = netprio_set_prio(css, dev, prio); + ret = netprio_set_prio(of_css(of), dev, prio); rtnl_unlock(); dev_put(dev); - return ret; + return ret ?: nbytes; } static int update_netprio(const void *v, struct file *file, unsigned n) @@ -239,7 +239,7 @@ static struct cftype ss_files[] = { { .name = "ifpriomap", .seq_show = read_priomap, - .write_string = write_priomap, + .write = write_priomap, }, { } /* terminate */ }; @@ -249,7 +249,7 @@ struct cgroup_subsys net_prio_cgrp_subsys = { .css_online = cgrp_css_online, .css_free = cgrp_css_free, .attach = net_prio_attach, - .base_cftypes = ss_files, + .legacy_cftypes = ss_files, }; static int netprio_device_event(struct notifier_block *unused, diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 0304f981f7ff..8b849ddfef2e 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -69,8 +69,9 @@ * for running devices in the if_list and sends packets until count is 0 it * also the thread checks the thread->control which is used for inter-process * communication. controlling process "posts" operations to the threads this - * way. The if_lock should be possible to remove when add/rem_device is merged - * into this too. + * way. + * The if_list is RCU protected, and the if_lock remains to protect updating + * of if_list, from "add_device" as it invoked from userspace (via proc write). * * By design there should only be *one* "controlling" process. In practice * multiple write accesses gives unpredictable result. Understood by "write" @@ -208,7 +209,7 @@ #define T_REMDEVALL (1<<2) /* Remove all devs */ #define T_REMDEV (1<<3) /* Remove one dev */ -/* If lock -- can be removed after some work */ +/* If lock -- protects updating of if_list */ #define if_lock(t) spin_lock(&(t->if_lock)); #define if_unlock(t) spin_unlock(&(t->if_lock)); @@ -241,6 +242,7 @@ struct pktgen_dev { struct proc_dir_entry *entry; /* proc file */ struct pktgen_thread *pg_thread;/* the owner */ struct list_head list; /* chaining in the thread's run-queue */ + struct rcu_head rcu; /* freed by RCU */ int running; /* if false, the test will stop */ @@ -573,7 +575,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v) is_zero_ether_addr(pkt_dev->src_mac) ? pkt_dev->odev->dev_addr : pkt_dev->src_mac); - seq_printf(seq, "dst_mac: "); + seq_puts(seq, "dst_mac: "); seq_printf(seq, "%pM\n", pkt_dev->dst_mac); seq_printf(seq, @@ -588,7 +590,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v) if (pkt_dev->nr_labels) { unsigned int i; - seq_printf(seq, " mpls: "); + seq_puts(seq, " mpls: "); for (i = 0; i < pkt_dev->nr_labels; i++) seq_printf(seq, "%08x%s", ntohl(pkt_dev->labels[i]), i == pkt_dev->nr_labels-1 ? "\n" : ", "); @@ -613,67 +615,67 @@ static int pktgen_if_show(struct seq_file *seq, void *v) if (pkt_dev->node >= 0) seq_printf(seq, " node: %d\n", pkt_dev->node); - seq_printf(seq, " Flags: "); + seq_puts(seq, " Flags: "); if (pkt_dev->flags & F_IPV6) - seq_printf(seq, "IPV6 "); + seq_puts(seq, "IPV6 "); if (pkt_dev->flags & F_IPSRC_RND) - seq_printf(seq, "IPSRC_RND "); + seq_puts(seq, "IPSRC_RND "); if (pkt_dev->flags & F_IPDST_RND) - seq_printf(seq, "IPDST_RND "); + seq_puts(seq, "IPDST_RND "); if (pkt_dev->flags & F_TXSIZE_RND) - seq_printf(seq, "TXSIZE_RND "); + seq_puts(seq, "TXSIZE_RND "); if (pkt_dev->flags & F_UDPSRC_RND) - seq_printf(seq, "UDPSRC_RND "); + seq_puts(seq, "UDPSRC_RND "); if (pkt_dev->flags & F_UDPDST_RND) - seq_printf(seq, "UDPDST_RND "); + seq_puts(seq, "UDPDST_RND "); if (pkt_dev->flags & F_UDPCSUM) - seq_printf(seq, "UDPCSUM "); + seq_puts(seq, "UDPCSUM "); if (pkt_dev->flags & F_MPLS_RND) - seq_printf(seq, "MPLS_RND "); + seq_puts(seq, "MPLS_RND "); if (pkt_dev->flags & F_QUEUE_MAP_RND) - seq_printf(seq, "QUEUE_MAP_RND "); + seq_puts(seq, "QUEUE_MAP_RND "); if (pkt_dev->flags & F_QUEUE_MAP_CPU) - seq_printf(seq, "QUEUE_MAP_CPU "); + seq_puts(seq, "QUEUE_MAP_CPU "); if (pkt_dev->cflows) { if (pkt_dev->flags & F_FLOW_SEQ) - seq_printf(seq, "FLOW_SEQ "); /*in sequence flows*/ + seq_puts(seq, "FLOW_SEQ "); /*in sequence flows*/ else - seq_printf(seq, "FLOW_RND "); + seq_puts(seq, "FLOW_RND "); } #ifdef CONFIG_XFRM if (pkt_dev->flags & F_IPSEC_ON) { - seq_printf(seq, "IPSEC "); + seq_puts(seq, "IPSEC "); if (pkt_dev->spi) seq_printf(seq, "spi:%u", pkt_dev->spi); } #endif if (pkt_dev->flags & F_MACSRC_RND) - seq_printf(seq, "MACSRC_RND "); + seq_puts(seq, "MACSRC_RND "); if (pkt_dev->flags & F_MACDST_RND) - seq_printf(seq, "MACDST_RND "); + seq_puts(seq, "MACDST_RND "); if (pkt_dev->flags & F_VID_RND) - seq_printf(seq, "VID_RND "); + seq_puts(seq, "VID_RND "); if (pkt_dev->flags & F_SVID_RND) - seq_printf(seq, "SVID_RND "); + seq_puts(seq, "SVID_RND "); if (pkt_dev->flags & F_NODE) - seq_printf(seq, "NODE_ALLOC "); + seq_puts(seq, "NODE_ALLOC "); seq_puts(seq, "\n"); @@ -716,7 +718,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v) if (pkt_dev->result[0]) seq_printf(seq, "Result: %s\n", pkt_dev->result); else - seq_printf(seq, "Result: Idle\n"); + seq_puts(seq, "Result: Idle\n"); return 0; } @@ -802,7 +804,6 @@ static int strn_len(const char __user * user_buffer, unsigned int maxlen) case '\t': case ' ': goto done_str; - break; default: break; } @@ -1735,25 +1736,25 @@ static int pktgen_thread_show(struct seq_file *seq, void *v) BUG_ON(!t); - seq_printf(seq, "Running: "); + seq_puts(seq, "Running: "); - if_lock(t); - list_for_each_entry(pkt_dev, &t->if_list, list) + rcu_read_lock(); + list_for_each_entry_rcu(pkt_dev, &t->if_list, list) if (pkt_dev->running) seq_printf(seq, "%s ", pkt_dev->odevname); - seq_printf(seq, "\nStopped: "); + seq_puts(seq, "\nStopped: "); - list_for_each_entry(pkt_dev, &t->if_list, list) + list_for_each_entry_rcu(pkt_dev, &t->if_list, list) if (!pkt_dev->running) seq_printf(seq, "%s ", pkt_dev->odevname); if (t->result[0]) seq_printf(seq, "\nResult: %s\n", t->result); else - seq_printf(seq, "\nResult: NA\n"); + seq_puts(seq, "\nResult: NA\n"); - if_unlock(t); + rcu_read_unlock(); return 0; } @@ -1878,10 +1879,8 @@ static struct pktgen_dev *__pktgen_NN_threads(const struct pktgen_net *pn, pkt_dev = pktgen_find_dev(t, ifname, exact); if (pkt_dev) { if (remove) { - if_lock(t); pkt_dev->removal_mark = 1; t->control |= T_REMDEV; - if_unlock(t); } break; } @@ -1931,7 +1930,8 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d list_for_each_entry(t, &pn->pktgen_threads, th_list) { struct pktgen_dev *pkt_dev; - list_for_each_entry(pkt_dev, &t->if_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(pkt_dev, &t->if_list, list) { if (pkt_dev->odev != dev) continue; @@ -1946,6 +1946,7 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d dev->name); break; } + rcu_read_unlock(); } } @@ -2997,8 +2998,8 @@ static void pktgen_run(struct pktgen_thread *t) func_enter(); - if_lock(t); - list_for_each_entry(pkt_dev, &t->if_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(pkt_dev, &t->if_list, list) { /* * setup odev and create initial packet. @@ -3007,18 +3008,18 @@ static void pktgen_run(struct pktgen_thread *t) if (pkt_dev->odev) { pktgen_clear_counters(pkt_dev); - pkt_dev->running = 1; /* Cranke yeself! */ pkt_dev->skb = NULL; pkt_dev->started_at = pkt_dev->next_tx = ktime_get(); set_pkt_overhead(pkt_dev); strcpy(pkt_dev->result, "Starting"); + pkt_dev->running = 1; /* Cranke yeself! */ started++; } else strcpy(pkt_dev->result, "Error starting"); } - if_unlock(t); + rcu_read_unlock(); if (started) t->control &= ~(T_STOP); } @@ -3041,27 +3042,25 @@ static int thread_is_running(const struct pktgen_thread *t) { const struct pktgen_dev *pkt_dev; - list_for_each_entry(pkt_dev, &t->if_list, list) - if (pkt_dev->running) + rcu_read_lock(); + list_for_each_entry_rcu(pkt_dev, &t->if_list, list) + if (pkt_dev->running) { + rcu_read_unlock(); return 1; + } + rcu_read_unlock(); return 0; } static int pktgen_wait_thread_run(struct pktgen_thread *t) { - if_lock(t); - while (thread_is_running(t)) { - if_unlock(t); - msleep_interruptible(100); if (signal_pending(current)) goto signal; - if_lock(t); } - if_unlock(t); return 1; signal: return 0; @@ -3166,10 +3165,10 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev) return -EINVAL; } + pkt_dev->running = 0; kfree_skb(pkt_dev->skb); pkt_dev->skb = NULL; pkt_dev->stopped_at = ktime_get(); - pkt_dev->running = 0; show_results(pkt_dev, nr_frags); @@ -3180,9 +3179,8 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t) { struct pktgen_dev *pkt_dev, *best = NULL; - if_lock(t); - - list_for_each_entry(pkt_dev, &t->if_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(pkt_dev, &t->if_list, list) { if (!pkt_dev->running) continue; if (best == NULL) @@ -3190,7 +3188,8 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t) else if (ktime_compare(pkt_dev->next_tx, best->next_tx) < 0) best = pkt_dev; } - if_unlock(t); + rcu_read_unlock(); + return best; } @@ -3200,13 +3199,13 @@ static void pktgen_stop(struct pktgen_thread *t) func_enter(); - if_lock(t); + rcu_read_lock(); - list_for_each_entry(pkt_dev, &t->if_list, list) { + list_for_each_entry_rcu(pkt_dev, &t->if_list, list) { pktgen_stop_device(pkt_dev); } - if_unlock(t); + rcu_read_unlock(); } /* @@ -3220,8 +3219,6 @@ static void pktgen_rem_one_if(struct pktgen_thread *t) func_enter(); - if_lock(t); - list_for_each_safe(q, n, &t->if_list) { cur = list_entry(q, struct pktgen_dev, list); @@ -3235,8 +3232,6 @@ static void pktgen_rem_one_if(struct pktgen_thread *t) break; } - - if_unlock(t); } static void pktgen_rem_all_ifs(struct pktgen_thread *t) @@ -3248,8 +3243,6 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t) /* Remove all devices, free mem */ - if_lock(t); - list_for_each_safe(q, n, &t->if_list) { cur = list_entry(q, struct pktgen_dev, list); @@ -3258,8 +3251,6 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t) pktgen_remove_device(t, cur); } - - if_unlock(t); } static void pktgen_rem_thread(struct pktgen_thread *t) @@ -3407,10 +3398,10 @@ static int pktgen_thread_worker(void *arg) pr_debug("starting pktgen/%d: pid=%d\n", cpu, task_pid_nr(current)); - set_current_state(TASK_INTERRUPTIBLE); - set_freezable(); + __set_current_state(TASK_RUNNING); + while (!kthread_should_stop()) { pkt_dev = next_to_run(t); @@ -3424,8 +3415,6 @@ static int pktgen_thread_worker(void *arg) continue; } - __set_current_state(TASK_RUNNING); - if (likely(pkt_dev)) { pktgen_xmit(pkt_dev); @@ -3456,9 +3445,8 @@ static int pktgen_thread_worker(void *arg) } try_to_freeze(); - - set_current_state(TASK_INTERRUPTIBLE); } + set_current_state(TASK_INTERRUPTIBLE); pr_debug("%s stopping all device\n", t->tsk->comm); pktgen_stop(t); @@ -3485,8 +3473,8 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, struct pktgen_dev *p, *pkt_dev = NULL; size_t len = strlen(ifname); - if_lock(t); - list_for_each_entry(p, &t->if_list, list) + rcu_read_lock(); + list_for_each_entry_rcu(p, &t->if_list, list) if (strncmp(p->odevname, ifname, len) == 0) { if (p->odevname[len]) { if (exact || p->odevname[len] != '@') @@ -3496,7 +3484,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, break; } - if_unlock(t); + rcu_read_unlock(); pr_debug("find_dev(%s) returning %p\n", ifname, pkt_dev); return pkt_dev; } @@ -3510,6 +3498,12 @@ static int add_dev_to_thread(struct pktgen_thread *t, { int rv = 0; + /* This function cannot be called concurrently, as its called + * under pktgen_thread_lock mutex, but it can run from + * userspace on another CPU than the kthread. The if_lock() + * is used here to sync with concurrent instances of + * _rem_dev_from_if_list() invoked via kthread, which is also + * updating the if_list */ if_lock(t); if (pkt_dev->pg_thread) { @@ -3518,9 +3512,9 @@ static int add_dev_to_thread(struct pktgen_thread *t, goto out; } - list_add(&pkt_dev->list, &t->if_list); - pkt_dev->pg_thread = t; pkt_dev->running = 0; + pkt_dev->pg_thread = t; + list_add_rcu(&pkt_dev->list, &t->if_list); out: if_unlock(t); @@ -3675,11 +3669,13 @@ static void _rem_dev_from_if_list(struct pktgen_thread *t, struct list_head *q, *n; struct pktgen_dev *p; + if_lock(t); list_for_each_safe(q, n, &t->if_list) { p = list_entry(q, struct pktgen_dev, list); if (p == pkt_dev) - list_del(&p->list); + list_del_rcu(&p->list); } + if_unlock(t); } static int pktgen_remove_device(struct pktgen_thread *t, @@ -3699,20 +3695,22 @@ static int pktgen_remove_device(struct pktgen_thread *t, pkt_dev->odev = NULL; } - /* And update the thread if_list */ - - _rem_dev_from_if_list(t, pkt_dev); - + /* Remove proc before if_list entry, because add_device uses + * list to determine if interface already exist, avoid race + * with proc_create_data() */ if (pkt_dev->entry) proc_remove(pkt_dev->entry); + /* And update the thread if_list */ + _rem_dev_from_if_list(t, pkt_dev); + #ifdef CONFIG_XFRM free_SAs(pkt_dev); #endif vfree(pkt_dev->flows); if (pkt_dev->page) put_page(pkt_dev->page); - kfree(pkt_dev); + kfree_rcu(pkt_dev, rcu); return 0; } @@ -3812,6 +3810,7 @@ static void __exit pg_cleanup(void) { unregister_netdevice_notifier(&pktgen_notifier_block); unregister_pernet_subsys(&pg_net_ops); + /* Don't need rcu_barrier() due to use of kfree_rcu() */ } module_init(pg_init); diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c index eaba0f68f860..4eab4a94a59d 100644 --- a/net/core/ptp_classifier.c +++ b/net/core/ptp_classifier.c @@ -52,14 +52,43 @@ * test_8021q: * jneq #0x8100, test_ieee1588 ; ETH_P_8021Q ? * ldh [16] ; load inner type - * jneq #0x88f7, drop_ieee1588 ; ETH_P_1588 ? + * jneq #0x88f7, test_8021q_ipv4 ; ETH_P_1588 ? * ldb [18] ; load payload * and #0x8 ; as we don't have ports here, test * jneq #0x0, drop_ieee1588 ; for PTP_GEN_BIT and drop these * ldh [18] ; reload payload * and #0xf ; mask PTP_CLASS_VMASK - * or #0x40 ; PTP_CLASS_V2_VLAN + * or #0x70 ; PTP_CLASS_VLAN|PTP_CLASS_L2 + * ret a ; return PTP class + * + * ; PTP over UDP over IPv4 over 802.1Q over Ethernet + * test_8021q_ipv4: + * jneq #0x800, test_8021q_ipv6 ; ETH_P_IP ? + * ldb [27] ; load proto + * jneq #17, drop_8021q_ipv4 ; IPPROTO_UDP ? + * ldh [24] ; load frag offset field + * jset #0x1fff, drop_8021q_ipv4; don't allow fragments + * ldxb 4*([18]&0xf) ; load IP header len + * ldh [x + 20] ; load UDP dst port + * jneq #319, drop_8021q_ipv4 ; is port PTP_EV_PORT ? + * ldh [x + 26] ; load payload + * and #0xf ; mask PTP_CLASS_VMASK + * or #0x50 ; PTP_CLASS_VLAN|PTP_CLASS_IPV4 + * ret a ; return PTP class + * drop_8021q_ipv4: ret #0x0 ; PTP_CLASS_NONE + * + * ; PTP over UDP over IPv6 over 802.1Q over Ethernet + * test_8021q_ipv6: + * jneq #0x86dd, drop_8021q_ipv6 ; ETH_P_IPV6 ? + * ldb [24] ; load proto + * jneq #17, drop_8021q_ipv6 ; IPPROTO_UDP ? + * ldh [60] ; load UDP dst port + * jneq #319, drop_8021q_ipv6 ; is port PTP_EV_PORT ? + * ldh [66] ; load payload + * and #0xf ; mask PTP_CLASS_VMASK + * or #0x60 ; PTP_CLASS_VLAN|PTP_CLASS_IPV6 * ret a ; return PTP class + * drop_8021q_ipv6: ret #0x0 ; PTP_CLASS_NONE * * ; PTP over Ethernet * test_ieee1588: @@ -78,17 +107,17 @@ #include <linux/filter.h> #include <linux/ptp_classify.h> -static struct sk_filter *ptp_insns __read_mostly; +static struct bpf_prog *ptp_insns __read_mostly; unsigned int ptp_classify_raw(const struct sk_buff *skb) { - return SK_RUN_FILTER(ptp_insns, skb); + return BPF_PROG_RUN(ptp_insns, skb); } EXPORT_SYMBOL_GPL(ptp_classify_raw); void __init ptp_classifier_init(void) { - static struct sock_filter ptp_filter[] = { + static struct sock_filter ptp_filter[] __initdata = { { 0x28, 0, 0, 0x0000000c }, { 0x15, 0, 12, 0x00000800 }, { 0x30, 0, 0, 0x00000017 }, @@ -113,16 +142,39 @@ void __init ptp_classifier_init(void) { 0x44, 0, 0, 0x00000020 }, { 0x16, 0, 0, 0x00000000 }, { 0x06, 0, 0, 0x00000000 }, - { 0x15, 0, 9, 0x00008100 }, + { 0x15, 0, 32, 0x00008100 }, { 0x28, 0, 0, 0x00000010 }, - { 0x15, 0, 15, 0x000088f7 }, + { 0x15, 0, 7, 0x000088f7 }, { 0x30, 0, 0, 0x00000012 }, { 0x54, 0, 0, 0x00000008 }, - { 0x15, 0, 12, 0x00000000 }, + { 0x15, 0, 35, 0x00000000 }, { 0x28, 0, 0, 0x00000012 }, { 0x54, 0, 0, 0x0000000f }, - { 0x44, 0, 0, 0x00000040 }, + { 0x44, 0, 0, 0x00000070 }, + { 0x16, 0, 0, 0x00000000 }, + { 0x15, 0, 12, 0x00000800 }, + { 0x30, 0, 0, 0x0000001b }, + { 0x15, 0, 9, 0x00000011 }, + { 0x28, 0, 0, 0x00000018 }, + { 0x45, 7, 0, 0x00001fff }, + { 0xb1, 0, 0, 0x00000012 }, + { 0x48, 0, 0, 0x00000014 }, + { 0x15, 0, 4, 0x0000013f }, + { 0x48, 0, 0, 0x0000001a }, + { 0x54, 0, 0, 0x0000000f }, + { 0x44, 0, 0, 0x00000050 }, + { 0x16, 0, 0, 0x00000000 }, + { 0x06, 0, 0, 0x00000000 }, + { 0x15, 0, 8, 0x000086dd }, + { 0x30, 0, 0, 0x00000018 }, + { 0x15, 0, 6, 0x00000011 }, + { 0x28, 0, 0, 0x0000003c }, + { 0x15, 0, 4, 0x0000013f }, + { 0x28, 0, 0, 0x00000042 }, + { 0x54, 0, 0, 0x0000000f }, + { 0x44, 0, 0, 0x00000060 }, { 0x16, 0, 0, 0x00000000 }, + { 0x06, 0, 0, 0x00000000 }, { 0x15, 0, 7, 0x000088f7 }, { 0x30, 0, 0, 0x0000000e }, { 0x54, 0, 0, 0x00000008 }, @@ -133,9 +185,9 @@ void __init ptp_classifier_init(void) { 0x16, 0, 0, 0x00000000 }, { 0x06, 0, 0, 0x00000000 }, }; - struct sock_fprog ptp_prog = { + struct sock_fprog_kern ptp_prog = { .len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter, }; - BUG_ON(sk_unattached_filter_create(&ptp_insns, &ptp_prog)); + BUG_ON(bpf_prog_create(&ptp_insns, &ptp_prog)); } diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 467f326126e0..04db318e6218 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -41,27 +41,27 @@ int reqsk_queue_alloc(struct request_sock_queue *queue, unsigned int nr_table_entries) { size_t lopt_size = sizeof(struct listen_sock); - struct listen_sock *lopt; + struct listen_sock *lopt = NULL; nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog); nr_table_entries = max_t(u32, nr_table_entries, 8); nr_table_entries = roundup_pow_of_two(nr_table_entries + 1); lopt_size += nr_table_entries * sizeof(struct request_sock *); - if (lopt_size > PAGE_SIZE) + + if (lopt_size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) + lopt = kzalloc(lopt_size, GFP_KERNEL | + __GFP_NOWARN | + __GFP_NORETRY); + if (!lopt) lopt = vzalloc(lopt_size); - else - lopt = kzalloc(lopt_size, GFP_KERNEL); - if (lopt == NULL) + if (!lopt) return -ENOMEM; - for (lopt->max_qlen_log = 3; - (1 << lopt->max_qlen_log) < nr_table_entries; - lopt->max_qlen_log++); - get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd)); rwlock_init(&queue->syn_wait_lock); queue->rskq_accept_head = NULL; lopt->nr_table_entries = nr_table_entries; + lopt->max_qlen_log = ilog2(nr_table_entries); write_lock_bh(&queue->syn_wait_lock); queue->listen_opt = lopt; @@ -72,22 +72,8 @@ int reqsk_queue_alloc(struct request_sock_queue *queue, void __reqsk_queue_destroy(struct request_sock_queue *queue) { - struct listen_sock *lopt; - size_t lopt_size; - - /* - * this is an error recovery path only - * no locking needed and the lopt is not NULL - */ - - lopt = queue->listen_opt; - lopt_size = sizeof(struct listen_sock) + - lopt->nr_table_entries * sizeof(struct request_sock *); - - if (lopt_size > PAGE_SIZE) - vfree(lopt); - else - kfree(lopt); + /* This is an error recovery path only, no locking needed */ + kvfree(queue->listen_opt); } static inline struct listen_sock *reqsk_queue_yank_listen_sk( @@ -107,8 +93,6 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) { /* make all the listen_opt local to us */ struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue); - size_t lopt_size = sizeof(struct listen_sock) + - lopt->nr_table_entries * sizeof(struct request_sock *); if (lopt->qlen != 0) { unsigned int i; @@ -125,10 +109,7 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) } WARN_ON(lopt->qlen != 0); - if (lopt_size > PAGE_SIZE) - vfree(lopt); - else - kfree(lopt); + kvfree(lopt); } /* diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d4ff41739b0f..8d39071f32d7 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -299,7 +299,12 @@ int __rtnl_link_register(struct rtnl_link_ops *ops) if (rtnl_link_ops_get(ops->kind)) return -EEXIST; - if (!ops->dellink) + /* The check for setup is here because if ops + * does not have that filled up, it is not possible + * to use the ops for creating device. So do not + * fill up dellink as well. That disables rtnl_dellink. + */ + if (ops->setup && !ops->dellink) ops->dellink = unregister_netdevice_queue; list_add_tail(&ops->list, &link_ops); @@ -353,15 +358,46 @@ void __rtnl_link_unregister(struct rtnl_link_ops *ops) } EXPORT_SYMBOL_GPL(__rtnl_link_unregister); +/* Return with the rtnl_lock held when there are no network + * devices unregistering in any network namespace. + */ +static void rtnl_lock_unregistering_all(void) +{ + struct net *net; + bool unregistering; + DEFINE_WAIT(wait); + + for (;;) { + prepare_to_wait(&netdev_unregistering_wq, &wait, + TASK_UNINTERRUPTIBLE); + unregistering = false; + rtnl_lock(); + for_each_net(net) { + if (net->dev_unreg_count > 0) { + unregistering = true; + break; + } + } + if (!unregistering) + break; + __rtnl_unlock(); + schedule(); + } + finish_wait(&netdev_unregistering_wq, &wait); +} + /** * rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink. * @ops: struct rtnl_link_ops * to unregister */ void rtnl_link_unregister(struct rtnl_link_ops *ops) { - rtnl_lock(); + /* Close the race with cleanup_net() */ + mutex_lock(&net_mutex); + rtnl_lock_unregistering_all(); __rtnl_link_unregister(ops); rtnl_unlock(); + mutex_unlock(&net_mutex); } EXPORT_SYMBOL_GPL(rtnl_link_unregister); @@ -767,14 +803,15 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, size += num_vfs * (nla_total_size(sizeof(struct ifla_vf_mac)) + nla_total_size(sizeof(struct ifla_vf_vlan)) + - nla_total_size(sizeof(struct ifla_vf_tx_rate)) + - nla_total_size(sizeof(struct ifla_vf_spoofchk))); + nla_total_size(sizeof(struct ifla_vf_spoofchk)) + + nla_total_size(sizeof(struct ifla_vf_rate))); return size; } else return 0; } -static size_t rtnl_port_size(const struct net_device *dev) +static size_t rtnl_port_size(const struct net_device *dev, + u32 ext_filter_mask) { size_t port_size = nla_total_size(4) /* PORT_VF */ + nla_total_size(PORT_PROFILE_MAX) /* PORT_PROFILE */ @@ -790,7 +827,8 @@ static size_t rtnl_port_size(const struct net_device *dev) size_t port_self_size = nla_total_size(sizeof(struct nlattr)) + port_size; - if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent) + if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent || + !(ext_filter_mask & RTEXT_FILTER_VF)) return 0; if (dev_num_vf(dev->dev.parent)) return port_self_size + vf_ports_size + @@ -826,7 +864,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(ext_filter_mask & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */ + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */ - + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ + + rtnl_port_size(dev, ext_filter_mask) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ + rtnl_link_get_size(dev) /* IFLA_LINKINFO */ + rtnl_link_get_af_size(dev) /* IFLA_AF_SPEC */ + nla_total_size(MAX_PHYS_PORT_ID_LEN); /* IFLA_PHYS_PORT_ID */ @@ -888,11 +926,13 @@ static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev) return 0; } -static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev) +static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev, + u32 ext_filter_mask) { int err; - if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent) + if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent || + !(ext_filter_mask & RTEXT_FILTER_VF)) return 0; err = rtnl_port_self_fill(skb, dev); @@ -1030,6 +1070,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct ifla_vf_info ivi; struct ifla_vf_mac vf_mac; struct ifla_vf_vlan vf_vlan; + struct ifla_vf_rate vf_rate; struct ifla_vf_tx_rate vf_tx_rate; struct ifla_vf_spoofchk vf_spoofchk; struct ifla_vf_link_state vf_linkstate; @@ -1050,6 +1091,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, break; vf_mac.vf = vf_vlan.vf = + vf_rate.vf = vf_tx_rate.vf = vf_spoofchk.vf = vf_linkstate.vf = ivi.vf; @@ -1057,7 +1099,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); vf_vlan.vlan = ivi.vlan; vf_vlan.qos = ivi.qos; - vf_tx_rate.rate = ivi.tx_rate; + vf_tx_rate.rate = ivi.max_tx_rate; + vf_rate.min_tx_rate = ivi.min_tx_rate; + vf_rate.max_tx_rate = ivi.max_tx_rate; vf_spoofchk.setting = ivi.spoofchk; vf_linkstate.link_state = ivi.linkstate; vf = nla_nest_start(skb, IFLA_VF_INFO); @@ -1067,6 +1111,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, } if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) || nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) || + nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate), + &vf_rate) || nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate) || nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk), @@ -1079,7 +1125,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_nest_end(skb, vfinfo); } - if (rtnl_port_fill(skb, dev)) + if (rtnl_port_fill(skb, dev, ext_filter_mask)) goto nla_put_failure; if (dev->rtnl_link_ops || rtnl_have_link_slave_info(dev)) { @@ -1173,6 +1219,10 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { .len = sizeof(struct ifla_vf_tx_rate) }, [IFLA_VF_SPOOFCHK] = { .type = NLA_BINARY, .len = sizeof(struct ifla_vf_spoofchk) }, + [IFLA_VF_RATE] = { .type = NLA_BINARY, + .len = sizeof(struct ifla_vf_rate) }, + [IFLA_VF_LINK_STATE] = { .type = NLA_BINARY, + .len = sizeof(struct ifla_vf_link_state) }, }; static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { @@ -1198,6 +1248,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) struct hlist_head *head; struct nlattr *tb[IFLA_MAX+1]; u32 ext_filter_mask = 0; + int err; + int hdrlen; s_h = cb->args[0]; s_idx = cb->args[1]; @@ -1205,8 +1257,17 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); cb->seq = net->dev_base_seq; - if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, - ifla_policy) >= 0) { + /* A hack to preserve kernel<->userspace interface. + * The correct header is ifinfomsg. It is consistent with rtnl_getlink. + * However, before Linux v3.9 the code here assumed rtgenmsg and that's + * what iproute2 < v3.9.0 used. + * We can detect the old iproute2. Even including the IFLA_EXT_MASK + * attribute, its netlink message is shorter than struct ifinfomsg. + */ + hdrlen = nlmsg_len(cb->nlh) < sizeof(struct ifinfomsg) ? + sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg); + + if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) { if (tb[IFLA_EXT_MASK]) ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); @@ -1218,11 +1279,17 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; - if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, 0, - NLM_F_MULTI, - ext_filter_mask) <= 0) + err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, 0, + NLM_F_MULTI, + ext_filter_mask); + /* If we ran out of room on the first message, + * we're in trouble + */ + WARN_ON((err == -EMSGSIZE) && (skb->len == 0)); + + if (err <= 0) goto out; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); @@ -1325,11 +1392,29 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) } case IFLA_VF_TX_RATE: { struct ifla_vf_tx_rate *ivt; + struct ifla_vf_info ivf; + ivt = nla_data(vf); + err = -EOPNOTSUPP; + if (ops->ndo_get_vf_config) + err = ops->ndo_get_vf_config(dev, ivt->vf, + &ivf); + if (err) + break; + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_rate) + err = ops->ndo_set_vf_rate(dev, ivt->vf, + ivf.min_tx_rate, + ivt->rate); + break; + } + case IFLA_VF_RATE: { + struct ifla_vf_rate *ivt; ivt = nla_data(vf); err = -EOPNOTSUPP; - if (ops->ndo_set_vf_tx_rate) - err = ops->ndo_set_vf_tx_rate(dev, ivt->vf, - ivt->rate); + if (ops->ndo_set_vf_rate) + err = ops->ndo_set_vf_rate(dev, ivt->vf, + ivt->min_tx_rate, + ivt->max_tx_rate); break; } case IFLA_VF_SPOOFCHK: { @@ -1395,7 +1480,8 @@ static int do_set_master(struct net_device *dev, int ifindex) return 0; } -static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, +static int do_setlink(const struct sk_buff *skb, + struct net_device *dev, struct ifinfomsg *ifm, struct nlattr **tb, char *ifname, int modified) { const struct net_device_ops *ops = dev->netdev_ops; @@ -1407,7 +1493,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, err = PTR_ERR(net); goto errout; } - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) { + if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { err = -EPERM; goto errout; } @@ -1661,7 +1747,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) if (err < 0) goto errout; - err = do_setlink(dev, ifm, tb, ifname, 0); + err = do_setlink(skb, dev, ifm, tb, ifname, 0); errout: return err; } @@ -1696,12 +1782,11 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) return -ENODEV; ops = dev->rtnl_link_ops; - if (!ops) + if (!ops || !ops->dellink) return -EOPNOTSUPP; ops->dellink(dev, &list_kill); unregister_netdevice_many(&list_kill); - list_del(&list_kill); return 0; } @@ -1725,7 +1810,8 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm) EXPORT_SYMBOL(rtnl_configure_link); struct net_device *rtnl_create_link(struct net *net, - char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[]) + char *ifname, unsigned char name_assign_type, + const struct rtnl_link_ops *ops, struct nlattr *tb[]) { int err; struct net_device *dev; @@ -1743,8 +1829,8 @@ struct net_device *rtnl_create_link(struct net *net, num_rx_queues = ops->get_num_rx_queues(); err = -ENOMEM; - dev = alloc_netdev_mqs(ops->priv_size, ifname, ops->setup, - num_tx_queues, num_rx_queues); + dev = alloc_netdev_mqs(ops->priv_size, ifname, name_assign_type, + ops->setup, num_tx_queues, num_rx_queues); if (!dev) goto err; @@ -1778,7 +1864,8 @@ err: } EXPORT_SYMBOL(rtnl_create_link); -static int rtnl_group_changelink(struct net *net, int group, +static int rtnl_group_changelink(const struct sk_buff *skb, + struct net *net, int group, struct ifinfomsg *ifm, struct nlattr **tb) { @@ -1787,7 +1874,7 @@ static int rtnl_group_changelink(struct net *net, int group, for_each_netdev(net, dev) { if (dev->group == group) { - err = do_setlink(dev, ifm, tb, NULL, 0); + err = do_setlink(skb, dev, ifm, tb, NULL, 0); if (err < 0) return err; } @@ -1808,6 +1895,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh) char ifname[IFNAMSIZ]; struct nlattr *tb[IFLA_MAX+1]; struct nlattr *linkinfo[IFLA_INFO_MAX+1]; + unsigned char name_assign_type = NET_NAME_USER; int err; #ifdef CONFIG_MODULES @@ -1929,12 +2017,12 @@ replay: modified = 1; } - return do_setlink(dev, ifm, tb, ifname, modified); + return do_setlink(skb, dev, ifm, tb, ifname, modified); } if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { if (ifm->ifi_index == 0 && tb[IFLA_GROUP]) - return rtnl_group_changelink(net, + return rtnl_group_changelink(skb, net, nla_get_u32(tb[IFLA_GROUP]), ifm, tb); return -ENODEV; @@ -1957,14 +2045,19 @@ replay: return -EOPNOTSUPP; } - if (!ifname[0]) + if (!ops->setup) + return -EOPNOTSUPP; + + if (!ifname[0]) { snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); + name_assign_type = NET_NAME_ENUM; + } dest_net = rtnl_link_get_net(net, tb); if (IS_ERR(dest_net)) return PTR_ERR(dest_net); - dev = rtnl_create_link(dest_net, ifname, ops, tb); + dev = rtnl_create_link(dest_net, ifname, name_assign_type, ops, tb); if (IS_ERR(dev)) { err = PTR_ERR(dev); goto out; @@ -1975,11 +2068,15 @@ replay: if (ops->newlink) { err = ops->newlink(net, dev, tb, data); /* Drivers should call free_netdev() in ->destructor - * and unregister it on failure so that device could be - * finally freed in rtnl_unlock. + * and unregister it on failure after registration + * so that device could be finally freed in rtnl_unlock. */ - if (err < 0) + if (err < 0) { + /* If device is not registered at all, free it now */ + if (dev->reg_state == NETREG_UNINITIALIZED) + free_netdev(dev); goto out; + } } else { err = register_netdevice(dev); if (err < 0) { @@ -2051,9 +2148,13 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) struct nlattr *tb[IFLA_MAX+1]; u32 ext_filter_mask = 0; u16 min_ifinfo_dump_size = 0; + int hdrlen; + + /* Same kernel<->userspace interface hack as in rtnl_dump_ifinfo. */ + hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ? + sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg); - if (nlmsg_parse(nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, - ifla_policy) >= 0) { + if (nlmsg_parse(nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) { if (tb[IFLA_EXT_MASK]) ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); } @@ -2291,22 +2392,20 @@ int ndo_dflt_fdb_del(struct ndmsg *ndm, struct net_device *dev, const unsigned char *addr) { - int err = -EOPNOTSUPP; + int err = -EINVAL; /* If aging addresses are supported device will need to * implement its own handler for this. */ if (!(ndm->ndm_state & NUD_PERMANENT)) { pr_info("%s: FDB only supports static addresses\n", dev->name); - return -EINVAL; + return err; } if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) err = dev_uc_del(dev, addr); else if (is_multicast_ether_addr(addr)) err = dev_mc_del(dev, addr); - else - err = -EINVAL; return err; } @@ -2321,7 +2420,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) int err = -EINVAL; __u8 *addr; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); @@ -2420,6 +2519,7 @@ skip: int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, + struct net_device *filter_dev, int idx) { int err; @@ -2437,28 +2537,72 @@ EXPORT_SYMBOL(ndo_dflt_fdb_dump); static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) { - int idx = 0; - struct net *net = sock_net(skb->sk); struct net_device *dev; + struct nlattr *tb[IFLA_MAX+1]; + struct net_device *bdev = NULL; + struct net_device *br_dev = NULL; + const struct net_device_ops *ops = NULL; + const struct net_device_ops *cops = NULL; + struct ifinfomsg *ifm = nlmsg_data(cb->nlh); + struct net *net = sock_net(skb->sk); + int brport_idx = 0; + int br_idx = 0; + int idx = 0; - rcu_read_lock(); - for_each_netdev_rcu(net, dev) { - if (dev->priv_flags & IFF_BRIDGE_PORT) { - struct net_device *br_dev; - const struct net_device_ops *ops; + if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, + ifla_policy) == 0) { + if (tb[IFLA_MASTER]) + br_idx = nla_get_u32(tb[IFLA_MASTER]); + } + + brport_idx = ifm->ifi_index; + + if (br_idx) { + br_dev = __dev_get_by_index(net, br_idx); + if (!br_dev) + return -ENODEV; - br_dev = netdev_master_upper_dev_get(dev); - ops = br_dev->netdev_ops; - if (ops->ndo_fdb_dump) - idx = ops->ndo_fdb_dump(skb, cb, dev, idx); + ops = br_dev->netdev_ops; + bdev = br_dev; + } + + for_each_netdev(net, dev) { + if (brport_idx && (dev->ifindex != brport_idx)) + continue; + + if (!br_idx) { /* user did not specify a specific bridge */ + if (dev->priv_flags & IFF_BRIDGE_PORT) { + br_dev = netdev_master_upper_dev_get(dev); + cops = br_dev->netdev_ops; + } + + bdev = dev; + } else { + if (dev != br_dev && + !(dev->priv_flags & IFF_BRIDGE_PORT)) + continue; + + if (br_dev != netdev_master_upper_dev_get(dev) && + !(dev->priv_flags & IFF_EBRIDGE)) + continue; + + bdev = br_dev; + cops = ops; } + if (dev->priv_flags & IFF_BRIDGE_PORT) { + if (cops && cops->ndo_fdb_dump) + idx = cops->ndo_fdb_dump(skb, cb, br_dev, dev, + idx); + } + + idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); if (dev->netdev_ops->ndo_fdb_dump) - idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, idx); - else - idx = ndo_dflt_fdb_dump(skb, cb, dev, idx); + idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, bdev, dev, + idx); + + cops = NULL; } - rcu_read_unlock(); cb->args[0] = idx; return skb->len; @@ -2773,7 +2917,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) sz_idx = type>>2; kind = type&3; - if (kind != 2 && !ns_capable(net->user_ns, CAP_NET_ADMIN)) + if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 897da56f3aff..ba71212f0251 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -85,31 +85,6 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #endif #ifdef CONFIG_INET -__u32 secure_ip_id(__be32 daddr) -{ - u32 hash[MD5_DIGEST_WORDS]; - - net_secret_init(); - hash[0] = (__force __u32) daddr; - hash[1] = net_secret[13]; - hash[2] = net_secret[14]; - hash[3] = net_secret[15]; - - md5_transform(hash, net_secret); - - return hash[0]; -} - -__u32 secure_ipv6_id(const __be32 daddr[4]) -{ - __u32 hash[4]; - - net_secret_init(); - memcpy(hash, daddr, 16); - md5_transform(hash, net_secret); - - return hash[0]; -} __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1b62343f5837..224506a6fa80 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -689,12 +689,15 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->ooo_okay = old->ooo_okay; new->no_fcs = old->no_fcs; new->encapsulation = old->encapsulation; + new->encap_hdr_csum = old->encap_hdr_csum; + new->csum_valid = old->csum_valid; + new->csum_complete_sw = old->csum_complete_sw; #ifdef CONFIG_XFRM new->sp = secpath_get(old->sp); #endif memcpy(new->cb, old->cb, sizeof(old->cb)); new->csum = old->csum; - new->local_df = old->local_df; + new->ignore_df = old->ignore_df; new->pkt_type = old->pkt_type; new->ip_summed = old->ip_summed; skb_copy_queue_mapping(new, old); @@ -951,10 +954,13 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) EXPORT_SYMBOL(skb_copy); /** - * __pskb_copy - create copy of an sk_buff with private head. + * __pskb_copy_fclone - create copy of an sk_buff with private head. * @skb: buffer to copy * @headroom: headroom of new skb * @gfp_mask: allocation priority + * @fclone: if true allocate the copy of the skb from the fclone + * cache instead of the head cache; it is recommended to set this + * to true for the cases where the copy will likely be cloned * * Make a copy of both an &sk_buff and part of its data, located * in header. Fragmented data remain shared. This is used when @@ -964,11 +970,12 @@ EXPORT_SYMBOL(skb_copy); * The returned buffer has a reference count of 1. */ -struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) +struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom, + gfp_t gfp_mask, bool fclone) { unsigned int size = skb_headlen(skb) + headroom; - struct sk_buff *n = __alloc_skb(size, gfp_mask, - skb_alloc_rx_flag(skb), NUMA_NO_NODE); + int flags = skb_alloc_rx_flag(skb) | (fclone ? SKB_ALLOC_FCLONE : 0); + struct sk_buff *n = __alloc_skb(size, gfp_mask, flags, NUMA_NO_NODE); if (!n) goto out; @@ -1008,7 +1015,7 @@ struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) out: return n; } -EXPORT_SYMBOL(__pskb_copy); +EXPORT_SYMBOL(__pskb_copy_fclone); /** * pskb_expand_head - reallocate header of &sk_buff @@ -2881,12 +2888,14 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, int pos; int dummy; + __skb_push(head_skb, doffset); proto = skb_network_protocol(head_skb, &dummy); if (unlikely(!proto)) return ERR_PTR(-EINVAL); - csum = !!can_checksum_protocol(features, proto); - __skb_push(head_skb, doffset); + csum = !head_skb->encap_hdr_csum && + !!can_checksum_protocol(features, proto); + headroom = skb_headroom(head_skb); pos = skb_headlen(head_skb); @@ -2967,9 +2976,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, tail = nskb; __copy_skb_header(nskb, head_skb); - nskb->mac_len = head_skb->mac_len; skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom); + skb_reset_mac_len(nskb); skb_copy_from_linear_data_offset(head_skb, -tnl_hlen, nskb->data - tnl_hlen, @@ -2983,6 +2992,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, nskb->csum = skb_copy_and_csum_bits(head_skb, offset, skb_put(nskb, len), len, 0); + SKB_GSO_CB(nskb)->csum_start = + skb_headroom(nskb) + doffset; continue; } @@ -3052,6 +3063,8 @@ perform_csum_check: nskb->csum = skb_checksum(nskb, doffset, nskb->len - doffset, 0); nskb->ip_summed = CHECKSUM_NONE; + SKB_GSO_CB(nskb)->csum_start = + skb_headroom(nskb) + doffset; } } while ((offset += len) < head_skb->len); @@ -3076,7 +3089,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) if (unlikely(p->len + len >= 65536)) return -E2BIG; - lp = NAPI_GRO_CB(p)->last ?: p; + lp = NAPI_GRO_CB(p)->last; pinfo = skb_shinfo(lp); if (headlen <= offset) { @@ -3192,7 +3205,7 @@ merge: __skb_pull(skb, offset); - if (!NAPI_GRO_CB(p)->last) + if (NAPI_GRO_CB(p)->last == p) skb_shinfo(p)->frag_list = skb; else NAPI_GRO_CB(p)->last->next = skb; @@ -3477,10 +3490,10 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(sock_queue_err_skb); -void skb_tstamp_tx(struct sk_buff *orig_skb, - struct skb_shared_hwtstamps *hwtstamps) +void __skb_tstamp_tx(struct sk_buff *orig_skb, + struct skb_shared_hwtstamps *hwtstamps, + struct sock *sk, int tstype) { - struct sock *sk = orig_skb->sk; struct sock_exterr_skb *serr; struct sk_buff *skb; int err; @@ -3508,12 +3521,26 @@ void skb_tstamp_tx(struct sk_buff *orig_skb, memset(serr, 0, sizeof(*serr)); serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; + serr->ee.ee_info = tstype; + if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) { + serr->ee.ee_data = skb_shinfo(skb)->tskey; + if (sk->sk_protocol == IPPROTO_TCP) + serr->ee.ee_data -= sk->sk_tskey; + } err = sock_queue_err_skb(sk, skb); if (err) kfree_skb(skb); } +EXPORT_SYMBOL_GPL(__skb_tstamp_tx); + +void skb_tstamp_tx(struct sk_buff *orig_skb, + struct skb_shared_hwtstamps *hwtstamps) +{ + return __skb_tstamp_tx(orig_skb, hwtstamps, orig_skb->sk, + SCM_TSTAMP_SND); +} EXPORT_SYMBOL_GPL(skb_tstamp_tx); void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) @@ -3913,7 +3940,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) skb->tstamp.tv64 = 0; skb->pkt_type = PACKET_HOST; skb->skb_iif = 0; - skb->local_df = 0; + skb->ignore_df = 0; skb_dst_drop(skb); skb->mark = 0; secpath_reset(skb); diff --git a/net/core/sock.c b/net/core/sock.c index b4fff008136f..2714811afbd8 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -145,6 +145,55 @@ static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); +/** + * sk_ns_capable - General socket capability test + * @sk: Socket to use a capability on or through + * @user_ns: The user namespace of the capability to use + * @cap: The capability to use + * + * Test to see if the opener of the socket had when the socket was + * created and the current process has the capability @cap in the user + * namespace @user_ns. + */ +bool sk_ns_capable(const struct sock *sk, + struct user_namespace *user_ns, int cap) +{ + return file_ns_capable(sk->sk_socket->file, user_ns, cap) && + ns_capable(user_ns, cap); +} +EXPORT_SYMBOL(sk_ns_capable); + +/** + * sk_capable - Socket global capability test + * @sk: Socket to use a capability on or through + * @cap: The global capbility to use + * + * Test to see if the opener of the socket had when the socket was + * created and the current process has the capability @cap in all user + * namespaces. + */ +bool sk_capable(const struct sock *sk, int cap) +{ + return sk_ns_capable(sk, &init_user_ns, cap); +} +EXPORT_SYMBOL(sk_capable); + +/** + * sk_net_capable - Network namespace socket capability test + * @sk: Socket to use a capability on or through + * @cap: The capability to use + * + * Test to see if the opener of the socket had when the socke was created + * and the current process has the capability @cap over the network namespace + * the socket is a member of. + */ +bool sk_net_capable(const struct sock *sk, int cap) +{ + return sk_ns_capable(sk, sock_net(sk)->user_ns, cap); +} +EXPORT_SYMBOL(sk_net_capable); + + #ifdef CONFIG_MEMCG_KMEM int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { @@ -442,7 +491,7 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) skb->dev = NULL; - if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) { + if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { atomic_inc(&sk->sk_drops); goto discard_and_relse; } @@ -735,7 +784,7 @@ set_rcvbuf: break; case SO_NO_CHECK: - sk->sk_no_check = valbool; + sk->sk_no_check_tx = valbool; break; case SO_PRIORITY: @@ -799,24 +848,25 @@ set_rcvbuf: ret = -EINVAL; break; } - sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE, - val & SOF_TIMESTAMPING_TX_HARDWARE); - sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE, - val & SOF_TIMESTAMPING_TX_SOFTWARE); - sock_valbool_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE, - val & SOF_TIMESTAMPING_RX_HARDWARE); + if (val & SOF_TIMESTAMPING_OPT_ID && + !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) { + if (sk->sk_protocol == IPPROTO_TCP) { + if (sk->sk_state != TCP_ESTABLISHED) { + ret = -EINVAL; + break; + } + sk->sk_tskey = tcp_sk(sk)->snd_una; + } else { + sk->sk_tskey = 0; + } + } + sk->sk_tsflags = val; if (val & SOF_TIMESTAMPING_RX_SOFTWARE) sock_enable_timestamp(sk, SOCK_TIMESTAMPING_RX_SOFTWARE); else sock_disable_timestamp(sk, (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)); - sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE, - val & SOF_TIMESTAMPING_SOFTWARE); - sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE, - val & SOF_TIMESTAMPING_SYS_HARDWARE); - sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE, - val & SOF_TIMESTAMPING_RAW_HARDWARE); break; case SO_RCVLOWAT: @@ -1015,7 +1065,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_NO_CHECK: - v.val = sk->sk_no_check; + v.val = sk->sk_no_check_tx; break; case SO_PRIORITY: @@ -1042,21 +1092,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_TIMESTAMPING: - v.val = 0; - if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) - v.val |= SOF_TIMESTAMPING_TX_HARDWARE; - if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) - v.val |= SOF_TIMESTAMPING_TX_SOFTWARE; - if (sock_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE)) - v.val |= SOF_TIMESTAMPING_RX_HARDWARE; - if (sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE)) - v.val |= SOF_TIMESTAMPING_RX_SOFTWARE; - if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) - v.val |= SOF_TIMESTAMPING_SOFTWARE; - if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE)) - v.val |= SOF_TIMESTAMPING_SYS_HARDWARE; - if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE)) - v.val |= SOF_TIMESTAMPING_RAW_HARDWARE; + v.val = sk->sk_tsflags; break; case SO_RCVTIMEO: @@ -1429,6 +1465,7 @@ static void sk_update_clone(const struct sock *sk, struct sock *newsk) struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) { struct sock *newsk; + bool is_charged = true; newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family); if (newsk != NULL) { @@ -1473,9 +1510,13 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) filter = rcu_dereference_protected(newsk->sk_filter, 1); if (filter != NULL) - sk_filter_charge(newsk, filter); + /* though it's an empty new sock, the charging may fail + * if sysctl_optmem_max was changed between creation of + * original socket and cloning + */ + is_charged = sk_filter_charge(newsk, filter); - if (unlikely(xfrm_sk_clone_policy(newsk))) { + if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk))) { /* It is still raw copy of parent, so invalidate * destructor and make plain sk_free() */ newsk->sk_destruct = NULL; diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index d7af18859322..ad704c757bb4 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -49,7 +49,7 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype) } EXPORT_SYMBOL_GPL(sock_diag_put_meminfo); -int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk, +int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk, struct sk_buff *skb, int attrtype) { struct sock_fprog_kern *fprog; @@ -58,7 +58,7 @@ int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk, unsigned int flen; int err = 0; - if (!ns_capable(user_ns, CAP_NET_ADMIN)) { + if (!may_report_filterinfo) { nla_reserve(skb, attrtype, 0); return 0; } @@ -68,8 +68,8 @@ int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk, if (!filter) goto out; - fprog = filter->orig_prog; - flen = sk_filter_proglen(fprog); + fprog = filter->prog->orig_prog; + flen = bpf_classic_proglen(fprog); attr = nla_reserve(skb, attrtype, flen); if (attr == NULL) { diff --git a/net/core/timestamping.c b/net/core/timestamping.c index 6521dfd8b7c8..a8770391ea5b 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -43,31 +43,22 @@ void skb_clone_tx_timestamp(struct sk_buff *skb) return; type = classify(skb); + if (type == PTP_CLASS_NONE) + return; + + phydev = skb->dev->phydev; + if (likely(phydev->drv->txtstamp)) { + if (!atomic_inc_not_zero(&sk->sk_refcnt)) + return; - switch (type) { - case PTP_CLASS_V1_IPV4: - case PTP_CLASS_V1_IPV6: - case PTP_CLASS_V2_IPV4: - case PTP_CLASS_V2_IPV6: - case PTP_CLASS_V2_L2: - case PTP_CLASS_V2_VLAN: - phydev = skb->dev->phydev; - if (likely(phydev->drv->txtstamp)) { - if (!atomic_inc_not_zero(&sk->sk_refcnt)) - return; - - clone = skb_clone(skb, GFP_ATOMIC); - if (!clone) { - sock_put(sk); - return; - } - - clone->sk = sk; - phydev->drv->txtstamp(phydev, clone, type); + clone = skb_clone(skb, GFP_ATOMIC); + if (!clone) { + sock_put(sk); + return; } - break; - default: - break; + + clone->sk = sk; + phydev->drv->txtstamp(phydev, clone, type); } } EXPORT_SYMBOL_GPL(skb_clone_tx_timestamp); @@ -114,20 +105,12 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb) __skb_pull(skb, ETH_HLEN); - switch (type) { - case PTP_CLASS_V1_IPV4: - case PTP_CLASS_V1_IPV6: - case PTP_CLASS_V2_IPV4: - case PTP_CLASS_V2_IPV6: - case PTP_CLASS_V2_L2: - case PTP_CLASS_V2_VLAN: - phydev = skb->dev->phydev; - if (likely(phydev->drv->rxtstamp)) - return phydev->drv->rxtstamp(phydev, skb, type); - break; - default: - break; - } + if (type == PTP_CLASS_NONE) + return false; + + phydev = skb->dev->phydev; + if (likely(phydev->drv->rxtstamp)) + return phydev->drv->rxtstamp(phydev, skb, type); return false; } diff --git a/net/core/tso.c b/net/core/tso.c new file mode 100644 index 000000000000..8c3203c585b0 --- /dev/null +++ b/net/core/tso.c @@ -0,0 +1,77 @@ +#include <linux/export.h> +#include <net/ip.h> +#include <net/tso.h> + +/* Calculate expected number of TX descriptors */ +int tso_count_descs(struct sk_buff *skb) +{ + /* The Marvell Way */ + return skb_shinfo(skb)->gso_segs * 2 + skb_shinfo(skb)->nr_frags; +} +EXPORT_SYMBOL(tso_count_descs); + +void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso, + int size, bool is_last) +{ + struct iphdr *iph; + struct tcphdr *tcph; + int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + int mac_hdr_len = skb_network_offset(skb); + + memcpy(hdr, skb->data, hdr_len); + iph = (struct iphdr *)(hdr + mac_hdr_len); + iph->id = htons(tso->ip_id); + iph->tot_len = htons(size + hdr_len - mac_hdr_len); + tcph = (struct tcphdr *)(hdr + skb_transport_offset(skb)); + tcph->seq = htonl(tso->tcp_seq); + tso->ip_id++; + + if (!is_last) { + /* Clear all special flags for not last packet */ + tcph->psh = 0; + tcph->fin = 0; + tcph->rst = 0; + } +} +EXPORT_SYMBOL(tso_build_hdr); + +void tso_build_data(struct sk_buff *skb, struct tso_t *tso, int size) +{ + tso->tcp_seq += size; + tso->size -= size; + tso->data += size; + + if ((tso->size == 0) && + (tso->next_frag_idx < skb_shinfo(skb)->nr_frags)) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx]; + + /* Move to next segment */ + tso->size = frag->size; + tso->data = page_address(frag->page.p) + frag->page_offset; + tso->next_frag_idx++; + } +} +EXPORT_SYMBOL(tso_build_data); + +void tso_start(struct sk_buff *skb, struct tso_t *tso) +{ + int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + + tso->ip_id = ntohs(ip_hdr(skb)->id); + tso->tcp_seq = ntohl(tcp_hdr(skb)->seq); + tso->next_frag_idx = 0; + + /* Build first data */ + tso->size = skb_headlen(skb) - hdr_len; + tso->data = skb->data + hdr_len; + if ((tso->size == 0) && + (tso->next_frag_idx < skb_shinfo(skb)->nr_frags)) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx]; + + /* Move to next segment */ + tso->size = frag->size; + tso->data = page_address(frag->page.p) + frag->page_offset; + tso->next_frag_idx++; + } +} +EXPORT_SYMBOL(tso_start); diff --git a/net/core/utils.c b/net/core/utils.c index 2f737bf90b3f..eed34338736c 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -348,8 +348,8 @@ static void __net_random_once_deferred(struct work_struct *w) { struct __net_random_once_work *work = container_of(w, struct __net_random_once_work, work); - if (!static_key_enabled(work->key)) - static_key_slow_inc(work->key); + BUG_ON(!static_key_enabled(work->key)); + static_key_slow_dec(work->key); kfree(work); } @@ -367,7 +367,7 @@ static void __net_random_once_disable_jump(struct static_key *key) } bool __net_get_random_once(void *buf, int nbytes, bool *done, - struct static_key *done_key) + struct static_key *once_key) { static DEFINE_SPINLOCK(lock); unsigned long flags; @@ -382,7 +382,7 @@ bool __net_get_random_once(void *buf, int nbytes, bool *done, *done = true; spin_unlock_irqrestore(&lock, flags); - __net_random_once_disable_jump(done_key); + __net_random_once_disable_jump(once_key); return true; } diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 553644402670..ca11d283bbeb 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -471,7 +471,11 @@ static int dcbnl_getapp(struct net_device *netdev, struct nlmsghdr *nlh, id = nla_get_u16(app_tb[DCB_APP_ATTR_ID]); if (netdev->dcbnl_ops->getapp) { - up = netdev->dcbnl_ops->getapp(netdev, idtype, id); + ret = netdev->dcbnl_ops->getapp(netdev, idtype, id); + if (ret < 0) + return ret; + else + up = ret; } else { struct dcb_app app = { .selector = idtype, @@ -538,6 +542,8 @@ static int dcbnl_setapp(struct net_device *netdev, struct nlmsghdr *nlh, if (netdev->dcbnl_ops->setapp) { ret = netdev->dcbnl_ops->setapp(netdev, idtype, id, up); + if (ret < 0) + return ret; } else { struct dcb_app app; app.selector = idtype; @@ -1669,7 +1675,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh) struct nlmsghdr *reply_nlh = NULL; const struct reply_func *fn; - if ((nlh->nlmsg_type == RTM_SETDCB) && !capable(CAP_NET_ADMIN)) + if ((nlh->nlmsg_type == RTM_SETDCB) && !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; ret = nlmsg_parse(nlh, sizeof(*dcb), tb, DCB_ATTR_MAX, @@ -1770,7 +1776,7 @@ EXPORT_SYMBOL(dcb_getapp); * * Priority 0 is an invalid priority in CEE spec. This routine * removes applications from the app list if the priority is - * set to zero. + * set to zero. Priority is expected to be 8-bit 802.1p user priority bitmap */ int dcb_setapp(struct net_device *dev, struct dcb_app *new) { @@ -1831,7 +1837,8 @@ EXPORT_SYMBOL(dcb_ieee_getapp_mask); * * This adds Application data to the list. Multiple application * entries may exists for the same selector and protocol as long - * as the priorities are different. + * as the priorities are different. Priority is expected to be a + * 3-bit unsigned integer */ int dcb_ieee_setapp(struct net_device *dev, struct dcb_app *new) { diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 22b5d818b200..6ca645c4b48e 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -1024,7 +1024,6 @@ static struct inet_protosw dccp_v4_protosw = { .protocol = IPPROTO_DCCP, .prot = &dccp_v4_prot, .ops = &inet_dccp_ops, - .no_check = 0, .flags = INET_PROTOSW_ICSK, }; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 4db3c2a1679c..04cb17d4b0ce 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -386,7 +386,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) goto drop; - req = inet6_reqsk_alloc(&dccp6_request_sock_ops); + req = inet_reqsk_alloc(&dccp6_request_sock_ops); if (req == NULL) goto drop; diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index c69eb9c4fbb8..b50dc436db1f 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -55,11 +55,9 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); #if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == PF_INET6) { - const struct ipv6_pinfo *np = inet6_sk(sk); - tw->tw_v6_daddr = sk->sk_v6_daddr; tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr; - tw->tw_ipv6only = np->ipv6only; + tw->tw_ipv6only = sk->sk_ipv6only; } #endif /* Linkage updates. */ diff --git a/net/dccp/proto.c b/net/dccp/proto.c index eb892b4f4814..de2c1e719305 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1084,14 +1084,15 @@ EXPORT_SYMBOL_GPL(dccp_shutdown); static inline int dccp_mib_init(void) { - return snmp_mib_init((void __percpu **)dccp_statistics, - sizeof(struct dccp_mib), - __alignof__(struct dccp_mib)); + dccp_statistics = alloc_percpu(struct dccp_mib); + if (!dccp_statistics) + return -ENOMEM; + return 0; } static inline void dccp_mib_exit(void) { - snmp_mib_free((void __percpu **)dccp_statistics); + free_percpu(dccp_statistics); } static int thash_entries; diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index 607ab71b5a0c..53731e45403c 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c @@ -20,6 +20,7 @@ /* Boundary values */ static int zero = 0, + one = 1, u8_max = 0xFF; static unsigned long seqw_min = DCCPF_SEQ_WMIN, seqw_max = 0xFFFFFFFF; /* maximum on 32 bit */ @@ -58,7 +59,7 @@ static struct ctl_table dccp_default_table[] = { .maxlen = sizeof(sysctl_dccp_request_retries), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = &one, .extra2 = &u8_max, }, { diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 16f0b223102e..1cd46a345cb0 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -280,7 +280,7 @@ static ktime_t dccp_timestamp_seed; */ u32 dccp_timestamp(void) { - s64 delta = ktime_us_delta(ktime_get_real(), dccp_timestamp_seed); + u64 delta = (u64)ktime_us_delta(ktime_get_real(), dccp_timestamp_seed); do_div(delta, 10); return delta; diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 4c04848953bd..ae011b46c071 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -481,7 +481,7 @@ static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gf sk->sk_backlog_rcv = dn_nsp_backlog_rcv; sk->sk_destruct = dn_destruct; - sk->sk_no_check = 1; + sk->sk_no_check_tx = 1; sk->sk_family = PF_DECnet; sk->sk_protocol = 0; sk->sk_allocation = gfp; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index a603823a3e27..3b726f31c64c 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -574,7 +574,7 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) struct dn_ifaddr __rcu **ifap; int err = -EINVAL; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (!net_eq(net, &init_net)) @@ -618,7 +618,7 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) struct dn_ifaddr *ifa; int err; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (!net_eq(net, &init_net)) diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 57dc159245ec..d332aefb0846 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -505,7 +505,7 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) struct nlattr *attrs[RTA_MAX+1]; int err; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (!net_eq(net, &init_net)) @@ -530,7 +530,7 @@ static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) struct nlattr *attrs[RTA_MAX+1]; int err; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (!net_eq(net, &init_net)) diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index e83015cecfa7..e4d9560a910b 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -107,7 +107,7 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb) if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) return; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) RCV_SKB_FAIL(-EPERM); /* Eventually we might send routing messages too */ diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index bf8584339048..f380b2c58178 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -46,7 +46,7 @@ const struct cred *dns_resolver_cache; #define DNS_ERRORNO_OPTION "dnserror" /* - * Instantiate a user defined key for dns_resolver. + * Preparse instantiation data for a dns_resolver key. * * The data must be a NUL-terminated string, with the NUL char accounted in * datalen. @@ -58,17 +58,15 @@ const struct cred *dns_resolver_cache; * "ip1,ip2,...#foo=bar" */ static int -dns_resolver_instantiate(struct key *key, struct key_preparsed_payload *prep) +dns_resolver_preparse(struct key_preparsed_payload *prep) { struct user_key_payload *upayload; unsigned long derrno; int ret; - size_t datalen = prep->datalen, result_len = 0; + int datalen = prep->datalen, result_len = 0; const char *data = prep->data, *end, *opt; - kenter("%%%d,%s,'%*.*s',%zu", - key->serial, key->description, - (int)datalen, (int)datalen, data, datalen); + kenter("'%*.*s',%u", datalen, datalen, data, datalen); if (datalen <= 1 || !data || data[datalen - 1] != '\0') return -EINVAL; @@ -95,8 +93,7 @@ dns_resolver_instantiate(struct key *key, struct key_preparsed_payload *prep) opt_len = next_opt - opt; if (!opt_len) { printk(KERN_WARNING - "Empty option to dns_resolver key %d\n", - key->serial); + "Empty option to dns_resolver key\n"); return -EINVAL; } @@ -125,30 +122,28 @@ dns_resolver_instantiate(struct key *key, struct key_preparsed_payload *prep) goto bad_option_value; kdebug("dns error no. = %lu", derrno); - key->type_data.x[0] = -derrno; + prep->type_data[0] = ERR_PTR(-derrno); continue; } bad_option_value: printk(KERN_WARNING - "Option '%*.*s' to dns_resolver key %d:" + "Option '%*.*s' to dns_resolver key:" " bad/missing value\n", - opt_nlen, opt_nlen, opt, key->serial); + opt_nlen, opt_nlen, opt); return -EINVAL; } while (opt = next_opt + 1, opt < end); } /* don't cache the result if we're caching an error saying there's no * result */ - if (key->type_data.x[0]) { - kleave(" = 0 [h_error %ld]", key->type_data.x[0]); + if (prep->type_data[0]) { + kleave(" = 0 [h_error %ld]", PTR_ERR(prep->type_data[0])); return 0; } kdebug("store result"); - ret = key_payload_reserve(key, result_len); - if (ret < 0) - return -EINVAL; + prep->quotalen = result_len; upayload = kmalloc(sizeof(*upayload) + result_len + 1, GFP_KERNEL); if (!upayload) { @@ -159,13 +154,23 @@ dns_resolver_instantiate(struct key *key, struct key_preparsed_payload *prep) upayload->datalen = result_len; memcpy(upayload->data, data, result_len); upayload->data[result_len] = '\0'; - rcu_assign_pointer(key->payload.data, upayload); + prep->payload[0] = upayload; kleave(" = 0"); return 0; } /* + * Clean up the preparse data + */ +static void dns_resolver_free_preparse(struct key_preparsed_payload *prep) +{ + pr_devel("==>%s()\n", __func__); + + kfree(prep->payload[0]); +} + +/* * The description is of the form "[<type>:]<domain_name>" * * The domain name may be a simple name or an absolute domain name (which @@ -234,7 +239,9 @@ static long dns_resolver_read(const struct key *key, struct key_type key_type_dns_resolver = { .name = "dns_resolver", - .instantiate = dns_resolver_instantiate, + .preparse = dns_resolver_preparse, + .free_preparse = dns_resolver_free_preparse, + .instantiate = generic_key_instantiate, .match = dns_resolver_match, .revoke = user_revoke, .destroy = user_destroy, diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c index e7b6d53eef88..39d2c39bdf87 100644 --- a/net/dns_resolver/dns_query.c +++ b/net/dns_resolver/dns_query.c @@ -93,8 +93,8 @@ int dns_query(const char *type, const char *name, size_t namelen, } if (!namelen) - namelen = strlen(name); - if (namelen < 3) + namelen = strnlen(name, 256); + if (namelen < 3 || namelen > 255) return -EINVAL; desclen += namelen + 1; @@ -129,6 +129,7 @@ int dns_query(const char *type, const char *name, size_t namelen, } down_read(&rkey->sem); + set_bit(KEY_FLAG_ROOT_CAN_INVAL, &rkey->flags); rkey->perm |= KEY_USR_VIEW; ret = key_validate(rkey); @@ -149,7 +150,9 @@ int dns_query(const char *type, const char *name, size_t namelen, if (!*_result) goto put; - memcpy(*_result, upayload->data, len + 1); + memcpy(*_result, upayload->data, len); + (*_result)[len] = '\0'; + if (_expiry) *_expiry = rkey->expiry; diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 0eb5d5e76dfb..0a49632fac47 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -351,8 +351,7 @@ static void dsa_of_free_platform_data(struct dsa_platform_data *pd) for (i = 0; i < pd->nr_chips; i++) { port_index = 0; while (port_index < DSA_MAX_PORTS) { - if (pd->chip[i].port_names[port_index]) - kfree(pd->chip[i].port_names[port_index]); + kfree(pd->chip[i].port_names[port_index]); port_index++; } kfree(pd->chip[i].rtable); @@ -406,8 +405,9 @@ static int dsa_of_probe(struct platform_device *pdev) goto out_free; } - chip_index = 0; + chip_index = -1; for_each_available_child_of_node(np, child) { + chip_index++; cd = &pd->chip[chip_index]; cd->mii_bus = &mdio_bus->dev; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 02c0e1716f64..45a1e34c89e0 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -340,13 +340,13 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent, struct dsa_slave_priv *p; int ret; - slave_dev = alloc_netdev(sizeof(struct dsa_slave_priv), - name, ether_setup); + slave_dev = alloc_netdev(sizeof(struct dsa_slave_priv), name, + NET_NAME_UNKNOWN, ether_setup); if (slave_dev == NULL) return slave_dev; slave_dev->features = master->vlan_features; - SET_ETHTOOL_OPS(slave_dev, &dsa_slave_ethtool_ops); + slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; eth_hw_addr_inherit(slave_dev, master); slave_dev->tx_queue_len = 0; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 5dc638cad2e1..f405e0592407 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -390,7 +390,8 @@ EXPORT_SYMBOL(ether_setup); struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs, unsigned int rxqs) { - return alloc_netdev_mqs(sizeof_priv, "eth%d", ether_setup, txqs, rxqs); + return alloc_netdev_mqs(sizeof_priv, "eth%d", NET_NAME_UNKNOWN, + ether_setup, txqs, rxqs); } EXPORT_SYMBOL(alloc_etherdev_mqs); diff --git a/net/hsr/Makefile b/net/hsr/Makefile index b68359f181cc..9ae972a820f4 100644 --- a/net/hsr/Makefile +++ b/net/hsr/Makefile @@ -4,4 +4,5 @@ obj-$(CONFIG_HSR) += hsr.o -hsr-y := hsr_main.o hsr_framereg.o hsr_device.o hsr_netlink.o +hsr-y := hsr_main.o hsr_framereg.o hsr_device.o \ + hsr_netlink.o hsr_slave.o hsr_forward.o diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index e5302b7f7ca9..a138d75751df 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -1,4 +1,4 @@ -/* Copyright 2011-2013 Autronica Fire and Security AS +/* Copyright 2011-2014 Autronica Fire and Security AS * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -6,7 +6,7 @@ * any later version. * * Author(s): - * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + * 2011-2014 Arvid Brodin, arvid.brodin@alten.se * * This file contains device methods for creating, using and destroying * virtual HSR devices. @@ -15,12 +15,13 @@ #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/etherdevice.h> -#include <linux/if_arp.h> #include <linux/rtnetlink.h> #include <linux/pkt_sched.h> #include "hsr_device.h" +#include "hsr_slave.h" #include "hsr_framereg.h" #include "hsr_main.h" +#include "hsr_forward.h" static bool is_admin_up(struct net_device *dev) @@ -45,75 +46,108 @@ static void __hsr_set_operstate(struct net_device *dev, int transition) } } -void hsr_set_operstate(struct net_device *hsr_dev, struct net_device *slave1, - struct net_device *slave2) +static void hsr_set_operstate(struct hsr_port *master, bool has_carrier) { - if (!is_admin_up(hsr_dev)) { - __hsr_set_operstate(hsr_dev, IF_OPER_DOWN); + if (!is_admin_up(master->dev)) { + __hsr_set_operstate(master->dev, IF_OPER_DOWN); return; } - if (is_slave_up(slave1) || is_slave_up(slave2)) - __hsr_set_operstate(hsr_dev, IF_OPER_UP); + if (has_carrier) + __hsr_set_operstate(master->dev, IF_OPER_UP); else - __hsr_set_operstate(hsr_dev, IF_OPER_LOWERLAYERDOWN); + __hsr_set_operstate(master->dev, IF_OPER_LOWERLAYERDOWN); } -void hsr_set_carrier(struct net_device *hsr_dev, struct net_device *slave1, - struct net_device *slave2) +static bool hsr_check_carrier(struct hsr_port *master) { - if (is_slave_up(slave1) || is_slave_up(slave2)) - netif_carrier_on(hsr_dev); + struct hsr_port *port; + bool has_carrier; + + has_carrier = false; + + rcu_read_lock(); + hsr_for_each_port(master->hsr, port) + if ((port->type != HSR_PT_MASTER) && is_slave_up(port->dev)) { + has_carrier = true; + break; + } + rcu_read_unlock(); + + if (has_carrier) + netif_carrier_on(master->dev); else - netif_carrier_off(hsr_dev); + netif_carrier_off(master->dev); + + return has_carrier; } -void hsr_check_announce(struct net_device *hsr_dev, int old_operstate) +static void hsr_check_announce(struct net_device *hsr_dev, + unsigned char old_operstate) { - struct hsr_priv *hsr_priv; + struct hsr_priv *hsr; - hsr_priv = netdev_priv(hsr_dev); + hsr = netdev_priv(hsr_dev); if ((hsr_dev->operstate == IF_OPER_UP) && (old_operstate != IF_OPER_UP)) { /* Went up */ - hsr_priv->announce_count = 0; - hsr_priv->announce_timer.expires = jiffies + + hsr->announce_count = 0; + hsr->announce_timer.expires = jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); - add_timer(&hsr_priv->announce_timer); + add_timer(&hsr->announce_timer); } if ((hsr_dev->operstate != IF_OPER_UP) && (old_operstate == IF_OPER_UP)) /* Went down */ - del_timer(&hsr_priv->announce_timer); + del_timer(&hsr->announce_timer); } - -int hsr_get_max_mtu(struct hsr_priv *hsr_priv) +void hsr_check_carrier_and_operstate(struct hsr_priv *hsr) { - int mtu_max; - - if (hsr_priv->slave[0] && hsr_priv->slave[1]) - mtu_max = min(hsr_priv->slave[0]->mtu, hsr_priv->slave[1]->mtu); - else if (hsr_priv->slave[0]) - mtu_max = hsr_priv->slave[0]->mtu; - else if (hsr_priv->slave[1]) - mtu_max = hsr_priv->slave[1]->mtu; - else - mtu_max = HSR_TAGLEN; + struct hsr_port *master; + unsigned char old_operstate; + bool has_carrier; - return mtu_max - HSR_TAGLEN; + master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); + /* netif_stacked_transfer_operstate() cannot be used here since + * it doesn't set IF_OPER_LOWERLAYERDOWN (?) + */ + old_operstate = master->dev->operstate; + has_carrier = hsr_check_carrier(master); + hsr_set_operstate(master, has_carrier); + hsr_check_announce(master->dev, old_operstate); } +int hsr_get_max_mtu(struct hsr_priv *hsr) +{ + unsigned int mtu_max; + struct hsr_port *port; + + mtu_max = ETH_DATA_LEN; + rcu_read_lock(); + hsr_for_each_port(hsr, port) + if (port->type != HSR_PT_MASTER) + mtu_max = min(port->dev->mtu, mtu_max); + rcu_read_unlock(); + + if (mtu_max < HSR_HLEN) + return 0; + return mtu_max - HSR_HLEN; +} + + static int hsr_dev_change_mtu(struct net_device *dev, int new_mtu) { - struct hsr_priv *hsr_priv; + struct hsr_priv *hsr; + struct hsr_port *master; - hsr_priv = netdev_priv(dev); + hsr = netdev_priv(dev); + master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); - if (new_mtu > hsr_get_max_mtu(hsr_priv)) { - netdev_info(hsr_priv->dev, "A HSR master's MTU cannot be greater than the smallest MTU of its slaves minus the HSR Tag length (%d octets).\n", - HSR_TAGLEN); + if (new_mtu > hsr_get_max_mtu(hsr)) { + netdev_info(master->dev, "A HSR master's MTU cannot be greater than the smallest MTU of its slaves minus the HSR Tag length (%d octets).\n", + HSR_HLEN); return -EINVAL; } @@ -124,164 +158,95 @@ static int hsr_dev_change_mtu(struct net_device *dev, int new_mtu) static int hsr_dev_open(struct net_device *dev) { - struct hsr_priv *hsr_priv; - int i; - char *slave_name; + struct hsr_priv *hsr; + struct hsr_port *port; + char designation; - hsr_priv = netdev_priv(dev); + hsr = netdev_priv(dev); + designation = '\0'; - for (i = 0; i < HSR_MAX_SLAVE; i++) { - if (hsr_priv->slave[i]) - slave_name = hsr_priv->slave[i]->name; - else - slave_name = "null"; - - if (!is_slave_up(hsr_priv->slave[i])) - netdev_warn(dev, "Slave %c (%s) is not up; please bring it up to get a working HSR network\n", - 'A' + i, slave_name); + rcu_read_lock(); + hsr_for_each_port(hsr, port) { + if (port->type == HSR_PT_MASTER) + continue; + switch (port->type) { + case HSR_PT_SLAVE_A: + designation = 'A'; + break; + case HSR_PT_SLAVE_B: + designation = 'B'; + break; + default: + designation = '?'; + } + if (!is_slave_up(port->dev)) + netdev_warn(dev, "Slave %c (%s) is not up; please bring it up to get a fully working HSR network\n", + designation, port->dev->name); } + rcu_read_unlock(); + + if (designation == '\0') + netdev_warn(dev, "No slave devices configured\n"); return 0; } + static int hsr_dev_close(struct net_device *dev) { - /* Nothing to do here. We could try to restore the state of the slaves - * to what they were before being changed by the hsr master dev's state, - * but they might have been changed manually in the mean time too, so - * taking them up or down here might be confusing and is probably not a - * good idea. - */ + /* Nothing to do here. */ return 0; } -static void hsr_fill_tag(struct hsr_ethhdr *hsr_ethhdr, struct hsr_priv *hsr_priv) +static netdev_features_t hsr_features_recompute(struct hsr_priv *hsr, + netdev_features_t features) { - unsigned long irqflags; + netdev_features_t mask; + struct hsr_port *port; - /* IEC 62439-1:2010, p 48, says the 4-bit "path" field can take values - * between 0001-1001 ("ring identifier", for regular HSR frames), - * or 1111 ("HSR management", supervision frames). Unfortunately, the - * spec writers forgot to explain what a "ring identifier" is, or - * how it is used. So we just set this to 0001 for regular frames, - * and 1111 for supervision frames. - */ - set_hsr_tag_path(&hsr_ethhdr->hsr_tag, 0x1); + mask = features; - /* IEC 62439-1:2010, p 12: "The link service data unit in an Ethernet - * frame is the content of the frame located between the Length/Type - * field and the Frame Check Sequence." + /* Mask out all features that, if supported by one device, should be + * enabled for all devices (see NETIF_F_ONE_FOR_ALL). * - * IEC 62439-3, p 48, specifies the "original LPDU" to include the - * original "LT" field (what "LT" means is not explained anywhere as - * far as I can see - perhaps "Length/Type"?). So LSDU_size might - * equal original length + 2. - * Also, the fact that this field is not used anywhere (might be used - * by a RedBox connecting HSR and PRP nets?) means I cannot test its - * correctness. Instead of guessing, I set this to 0 here, to make any - * problems immediately apparent. Anyone using this driver with PRP/HSR - * RedBoxes might need to fix this... + * Anything that's off in mask will not be enabled - so only things + * that were in features originally, and also is in NETIF_F_ONE_FOR_ALL, + * may become enabled. */ - set_hsr_tag_LSDU_size(&hsr_ethhdr->hsr_tag, 0); - - spin_lock_irqsave(&hsr_priv->seqnr_lock, irqflags); - hsr_ethhdr->hsr_tag.sequence_nr = htons(hsr_priv->sequence_nr); - hsr_priv->sequence_nr++; - spin_unlock_irqrestore(&hsr_priv->seqnr_lock, irqflags); + features &= ~NETIF_F_ONE_FOR_ALL; + hsr_for_each_port(hsr, port) + features = netdev_increment_features(features, + port->dev->features, + mask); - hsr_ethhdr->hsr_tag.encap_proto = hsr_ethhdr->ethhdr.h_proto; - - hsr_ethhdr->ethhdr.h_proto = htons(ETH_P_PRP); + return features; } -static int slave_xmit(struct sk_buff *skb, struct hsr_priv *hsr_priv, - enum hsr_dev_idx dev_idx) +static netdev_features_t hsr_fix_features(struct net_device *dev, + netdev_features_t features) { - struct hsr_ethhdr *hsr_ethhdr; - - hsr_ethhdr = (struct hsr_ethhdr *) skb->data; + struct hsr_priv *hsr = netdev_priv(dev); - skb->dev = hsr_priv->slave[dev_idx]; - - hsr_addr_subst_dest(hsr_priv, &hsr_ethhdr->ethhdr, dev_idx); - - /* Address substitution (IEC62439-3 pp 26, 50): replace mac - * address of outgoing frame with that of the outgoing slave's. - */ - ether_addr_copy(hsr_ethhdr->ethhdr.h_source, skb->dev->dev_addr); - - return dev_queue_xmit(skb); + return hsr_features_recompute(hsr, features); } static int hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev) { - struct hsr_priv *hsr_priv; - struct hsr_ethhdr *hsr_ethhdr; - struct sk_buff *skb2; - int res1, res2; - - hsr_priv = netdev_priv(dev); - hsr_ethhdr = (struct hsr_ethhdr *) skb->data; - - if ((skb->protocol != htons(ETH_P_PRP)) || - (hsr_ethhdr->ethhdr.h_proto != htons(ETH_P_PRP))) { - hsr_fill_tag(hsr_ethhdr, hsr_priv); - skb->protocol = htons(ETH_P_PRP); - } - - skb2 = pskb_copy(skb, GFP_ATOMIC); - - res1 = NET_XMIT_DROP; - if (likely(hsr_priv->slave[HSR_DEV_SLAVE_A])) - res1 = slave_xmit(skb, hsr_priv, HSR_DEV_SLAVE_A); + struct hsr_priv *hsr = netdev_priv(dev); + struct hsr_port *master; - res2 = NET_XMIT_DROP; - if (likely(skb2 && hsr_priv->slave[HSR_DEV_SLAVE_B])) - res2 = slave_xmit(skb2, hsr_priv, HSR_DEV_SLAVE_B); - - if (likely(res1 == NET_XMIT_SUCCESS || res1 == NET_XMIT_CN || - res2 == NET_XMIT_SUCCESS || res2 == NET_XMIT_CN)) { - hsr_priv->dev->stats.tx_packets++; - hsr_priv->dev->stats.tx_bytes += skb->len; - } else { - hsr_priv->dev->stats.tx_dropped++; - } + master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); + skb->dev = master->dev; + hsr_forward_skb(skb, master); return NETDEV_TX_OK; } -static int hsr_header_create(struct sk_buff *skb, struct net_device *dev, - unsigned short type, const void *daddr, - const void *saddr, unsigned int len) -{ - int res; - - /* Make room for the HSR tag now. We will fill it in later (in - * hsr_dev_xmit) - */ - if (skb_headroom(skb) < HSR_TAGLEN + ETH_HLEN) - return -ENOBUFS; - skb_push(skb, HSR_TAGLEN); - - /* To allow VLAN/HSR combos we should probably use - * res = dev_hard_header(skb, dev, type, daddr, saddr, len + HSR_TAGLEN); - * here instead. It would require other changes too, though - e.g. - * separate headers for each slave etc... - */ - res = eth_header(skb, dev, type, daddr, saddr, len + HSR_TAGLEN); - if (res <= 0) - return res; - skb_reset_mac_header(skb); - - return res + HSR_TAGLEN; -} - - static const struct header_ops hsr_header_ops = { - .create = hsr_header_create, + .create = eth_header, .parse = eth_header_parse, }; @@ -291,67 +256,63 @@ static const struct header_ops hsr_header_ops = { */ static int hsr_pad(int size) { - const int min_size = ETH_ZLEN - HSR_TAGLEN - ETH_HLEN; + const int min_size = ETH_ZLEN - HSR_HLEN - ETH_HLEN; if (size >= min_size) return size; return min_size; } -static void send_hsr_supervision_frame(struct net_device *hsr_dev, u8 type) +static void send_hsr_supervision_frame(struct hsr_port *master, u8 type) { - struct hsr_priv *hsr_priv; struct sk_buff *skb; int hlen, tlen; struct hsr_sup_tag *hsr_stag; struct hsr_sup_payload *hsr_sp; unsigned long irqflags; - hlen = LL_RESERVED_SPACE(hsr_dev); - tlen = hsr_dev->needed_tailroom; + hlen = LL_RESERVED_SPACE(master->dev); + tlen = master->dev->needed_tailroom; skb = alloc_skb(hsr_pad(sizeof(struct hsr_sup_payload)) + hlen + tlen, GFP_ATOMIC); if (skb == NULL) return; - hsr_priv = netdev_priv(hsr_dev); - skb_reserve(skb, hlen); - skb->dev = hsr_dev; + skb->dev = master->dev; skb->protocol = htons(ETH_P_PRP); skb->priority = TC_PRIO_CONTROL; if (dev_hard_header(skb, skb->dev, ETH_P_PRP, - hsr_priv->sup_multicast_addr, - skb->dev->dev_addr, skb->len) < 0) + master->hsr->sup_multicast_addr, + skb->dev->dev_addr, skb->len) <= 0) goto out; + skb_reset_mac_header(skb); - skb_pull(skb, sizeof(struct ethhdr)); - hsr_stag = (typeof(hsr_stag)) skb->data; + hsr_stag = (typeof(hsr_stag)) skb_put(skb, sizeof(*hsr_stag)); set_hsr_stag_path(hsr_stag, 0xf); set_hsr_stag_HSR_Ver(hsr_stag, 0); - spin_lock_irqsave(&hsr_priv->seqnr_lock, irqflags); - hsr_stag->sequence_nr = htons(hsr_priv->sequence_nr); - hsr_priv->sequence_nr++; - spin_unlock_irqrestore(&hsr_priv->seqnr_lock, irqflags); + spin_lock_irqsave(&master->hsr->seqnr_lock, irqflags); + hsr_stag->sequence_nr = htons(master->hsr->sequence_nr); + master->hsr->sequence_nr++; + spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags); hsr_stag->HSR_TLV_Type = type; hsr_stag->HSR_TLV_Length = 12; - skb_push(skb, sizeof(struct ethhdr)); - /* Payload: MacAddressA */ hsr_sp = (typeof(hsr_sp)) skb_put(skb, sizeof(*hsr_sp)); - ether_addr_copy(hsr_sp->MacAddressA, hsr_dev->dev_addr); + ether_addr_copy(hsr_sp->MacAddressA, master->dev->dev_addr); - dev_queue_xmit(skb); + hsr_forward_skb(skb, master); return; out: + WARN_ON_ONCE("HSR: Could not send supervision frame\n"); kfree_skb(skb); } @@ -360,59 +321,32 @@ out: */ static void hsr_announce(unsigned long data) { - struct hsr_priv *hsr_priv; + struct hsr_priv *hsr; + struct hsr_port *master; - hsr_priv = (struct hsr_priv *) data; + hsr = (struct hsr_priv *) data; - if (hsr_priv->announce_count < 3) { - send_hsr_supervision_frame(hsr_priv->dev, HSR_TLV_ANNOUNCE); - hsr_priv->announce_count++; + rcu_read_lock(); + master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); + + if (hsr->announce_count < 3) { + send_hsr_supervision_frame(master, HSR_TLV_ANNOUNCE); + hsr->announce_count++; } else { - send_hsr_supervision_frame(hsr_priv->dev, HSR_TLV_LIFE_CHECK); + send_hsr_supervision_frame(master, HSR_TLV_LIFE_CHECK); } - if (hsr_priv->announce_count < 3) - hsr_priv->announce_timer.expires = jiffies + + if (hsr->announce_count < 3) + hsr->announce_timer.expires = jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); else - hsr_priv->announce_timer.expires = jiffies + + hsr->announce_timer.expires = jiffies + msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL); - if (is_admin_up(hsr_priv->dev)) - add_timer(&hsr_priv->announce_timer); -} - - -static void restore_slaves(struct net_device *hsr_dev) -{ - struct hsr_priv *hsr_priv; - int i; - int res; - - hsr_priv = netdev_priv(hsr_dev); - - rtnl_lock(); - - /* Restore promiscuity */ - for (i = 0; i < HSR_MAX_SLAVE; i++) { - if (!hsr_priv->slave[i]) - continue; - res = dev_set_promiscuity(hsr_priv->slave[i], -1); - if (res) - netdev_info(hsr_dev, - "Cannot restore slave promiscuity (%s, %d)\n", - hsr_priv->slave[i]->name, res); - } - - rtnl_unlock(); -} - -static void reclaim_hsr_dev(struct rcu_head *rh) -{ - struct hsr_priv *hsr_priv; + if (is_admin_up(master->dev)) + add_timer(&hsr->announce_timer); - hsr_priv = container_of(rh, struct hsr_priv, rcu_head); - free_netdev(hsr_priv->dev); + rcu_read_unlock(); } @@ -421,14 +355,18 @@ static void reclaim_hsr_dev(struct rcu_head *rh) */ static void hsr_dev_destroy(struct net_device *hsr_dev) { - struct hsr_priv *hsr_priv; + struct hsr_priv *hsr; + struct hsr_port *port; - hsr_priv = netdev_priv(hsr_dev); + hsr = netdev_priv(hsr_dev); + hsr_for_each_port(hsr, port) + hsr_del_port(port); - del_timer(&hsr_priv->announce_timer); - unregister_hsr_master(hsr_priv); /* calls list_del_rcu on hsr_priv */ - restore_slaves(hsr_dev); - call_rcu(&hsr_priv->rcu_head, reclaim_hsr_dev); /* reclaim hsr_priv */ + del_timer_sync(&hsr->prune_timer); + del_timer_sync(&hsr->announce_timer); + + synchronize_rcu(); + free_netdev(hsr_dev); } static const struct net_device_ops hsr_device_ops = { @@ -436,62 +374,51 @@ static const struct net_device_ops hsr_device_ops = { .ndo_open = hsr_dev_open, .ndo_stop = hsr_dev_close, .ndo_start_xmit = hsr_dev_xmit, + .ndo_fix_features = hsr_fix_features, }; +static struct device_type hsr_type = { + .name = "hsr", +}; void hsr_dev_setup(struct net_device *dev) { random_ether_addr(dev->dev_addr); ether_setup(dev); - dev->header_ops = &hsr_header_ops; - dev->netdev_ops = &hsr_device_ops; - dev->tx_queue_len = 0; + dev->header_ops = &hsr_header_ops; + dev->netdev_ops = &hsr_device_ops; + SET_NETDEV_DEVTYPE(dev, &hsr_type); + dev->tx_queue_len = 0; dev->destructor = hsr_dev_destroy; + + dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | + NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | + NETIF_F_HW_VLAN_CTAG_TX; + + dev->features = dev->hw_features; + + /* Prevent recursive tx locking */ + dev->features |= NETIF_F_LLTX; + /* VLAN on top of HSR needs testing and probably some work on + * hsr_header_create() etc. + */ + dev->features |= NETIF_F_VLAN_CHALLENGED; + /* Not sure about this. Taken from bridge code. netdev_features.h says + * it means "Does not change network namespaces". + */ + dev->features |= NETIF_F_NETNS_LOCAL; } /* Return true if dev is a HSR master; return false otherwise. */ -bool is_hsr_master(struct net_device *dev) +inline bool is_hsr_master(struct net_device *dev) { return (dev->netdev_ops->ndo_start_xmit == hsr_dev_xmit); } -static int check_slave_ok(struct net_device *dev) -{ - /* Don't allow HSR on non-ethernet like devices */ - if ((dev->flags & IFF_LOOPBACK) || (dev->type != ARPHRD_ETHER) || - (dev->addr_len != ETH_ALEN)) { - netdev_info(dev, "Cannot use loopback or non-ethernet device as HSR slave.\n"); - return -EINVAL; - } - - /* Don't allow enslaving hsr devices */ - if (is_hsr_master(dev)) { - netdev_info(dev, "Cannot create trees of HSR devices.\n"); - return -EINVAL; - } - - if (is_hsr_slave(dev)) { - netdev_info(dev, "This device is already a HSR slave.\n"); - return -EINVAL; - } - - if (dev->priv_flags & IFF_802_1Q_VLAN) { - netdev_info(dev, "HSR on top of VLAN is not yet supported in this driver.\n"); - return -EINVAL; - } - - /* HSR over bonded devices has not been tested, but I'm not sure it - * won't work... - */ - - return 0; -} - - /* Default multicast address for HSR Supervision frames */ static const unsigned char def_multicast_addr[ETH_ALEN] __aligned(2) = { 0x01, 0x15, 0x4e, 0x00, 0x01, 0x00 @@ -500,97 +427,74 @@ static const unsigned char def_multicast_addr[ETH_ALEN] __aligned(2) = { int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], unsigned char multicast_spec) { - struct hsr_priv *hsr_priv; - int i; + struct hsr_priv *hsr; + struct hsr_port *port; int res; - hsr_priv = netdev_priv(hsr_dev); - hsr_priv->dev = hsr_dev; - INIT_LIST_HEAD(&hsr_priv->node_db); - INIT_LIST_HEAD(&hsr_priv->self_node_db); - for (i = 0; i < HSR_MAX_SLAVE; i++) - hsr_priv->slave[i] = slave[i]; - - spin_lock_init(&hsr_priv->seqnr_lock); - /* Overflow soon to find bugs easier: */ - hsr_priv->sequence_nr = USHRT_MAX - 1024; - - init_timer(&hsr_priv->announce_timer); - hsr_priv->announce_timer.function = hsr_announce; - hsr_priv->announce_timer.data = (unsigned long) hsr_priv; + hsr = netdev_priv(hsr_dev); + INIT_LIST_HEAD(&hsr->ports); + INIT_LIST_HEAD(&hsr->node_db); + INIT_LIST_HEAD(&hsr->self_node_db); - ether_addr_copy(hsr_priv->sup_multicast_addr, def_multicast_addr); - hsr_priv->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec; + ether_addr_copy(hsr_dev->dev_addr, slave[0]->dev_addr); -/* FIXME: should I modify the value of these? - * - * - hsr_dev->flags - i.e. - * IFF_MASTER/SLAVE? - * - hsr_dev->priv_flags - i.e. - * IFF_EBRIDGE? - * IFF_TX_SKB_SHARING? - * IFF_HSR_MASTER/SLAVE? - */ + /* Make sure we recognize frames from ourselves in hsr_rcv() */ + res = hsr_create_self_node(&hsr->self_node_db, hsr_dev->dev_addr, + slave[1]->dev_addr); + if (res < 0) + return res; - for (i = 0; i < HSR_MAX_SLAVE; i++) { - res = check_slave_ok(slave[i]); - if (res) - return res; - } + spin_lock_init(&hsr->seqnr_lock); + /* Overflow soon to find bugs easier: */ + hsr->sequence_nr = HSR_SEQNR_START; - hsr_dev->features = slave[0]->features & slave[1]->features; - /* Prevent recursive tx locking */ - hsr_dev->features |= NETIF_F_LLTX; - /* VLAN on top of HSR needs testing and probably some work on - * hsr_header_create() etc. - */ - hsr_dev->features |= NETIF_F_VLAN_CHALLENGED; + init_timer(&hsr->announce_timer); + hsr->announce_timer.function = hsr_announce; + hsr->announce_timer.data = (unsigned long) hsr; - /* Set hsr_dev's MAC address to that of mac_slave1 */ - ether_addr_copy(hsr_dev->dev_addr, hsr_priv->slave[0]->dev_addr); + init_timer(&hsr->prune_timer); + hsr->prune_timer.function = hsr_prune_nodes; + hsr->prune_timer.data = (unsigned long) hsr; - /* Set required header length */ - for (i = 0; i < HSR_MAX_SLAVE; i++) { - if (slave[i]->hard_header_len + HSR_TAGLEN > - hsr_dev->hard_header_len) - hsr_dev->hard_header_len = - slave[i]->hard_header_len + HSR_TAGLEN; - } + ether_addr_copy(hsr->sup_multicast_addr, def_multicast_addr); + hsr->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec; - /* MTU */ - for (i = 0; i < HSR_MAX_SLAVE; i++) - if (slave[i]->mtu - HSR_TAGLEN < hsr_dev->mtu) - hsr_dev->mtu = slave[i]->mtu - HSR_TAGLEN; + /* FIXME: should I modify the value of these? + * + * - hsr_dev->flags - i.e. + * IFF_MASTER/SLAVE? + * - hsr_dev->priv_flags - i.e. + * IFF_EBRIDGE? + * IFF_TX_SKB_SHARING? + * IFF_HSR_MASTER/SLAVE? + */ /* Make sure the 1st call to netif_carrier_on() gets through */ netif_carrier_off(hsr_dev); - /* Promiscuity */ - for (i = 0; i < HSR_MAX_SLAVE; i++) { - res = dev_set_promiscuity(slave[i], 1); - if (res) { - netdev_info(hsr_dev, "Cannot set slave promiscuity (%s, %d)\n", - slave[i]->name, res); - goto fail; - } - } + res = hsr_add_port(hsr, hsr_dev, HSR_PT_MASTER); + if (res) + return res; - /* Make sure we recognize frames from ourselves in hsr_rcv() */ - res = hsr_create_self_node(&hsr_priv->self_node_db, - hsr_dev->dev_addr, - hsr_priv->slave[1]->dev_addr); - if (res < 0) + res = register_netdevice(hsr_dev); + if (res) goto fail; - res = register_netdevice(hsr_dev); + res = hsr_add_port(hsr, slave[0], HSR_PT_SLAVE_A); + if (res) + goto fail; + res = hsr_add_port(hsr, slave[1], HSR_PT_SLAVE_B); if (res) goto fail; - register_hsr_master(hsr_priv); + hsr->prune_timer.expires = jiffies + msecs_to_jiffies(PRUNE_PERIOD); + add_timer(&hsr->prune_timer); return 0; fail: - restore_slaves(hsr_dev); + hsr_for_each_port(hsr, port) + hsr_del_port(port); + return res; } diff --git a/net/hsr/hsr_device.h b/net/hsr/hsr_device.h index 2c7148e73914..108a5d59d2a6 100644 --- a/net/hsr/hsr_device.h +++ b/net/hsr/hsr_device.h @@ -1,4 +1,4 @@ -/* Copyright 2011-2013 Autronica Fire and Security AS +/* Copyright 2011-2014 Autronica Fire and Security AS * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -6,7 +6,7 @@ * any later version. * * Author(s): - * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + * 2011-2014 Arvid Brodin, arvid.brodin@alten.se */ #ifndef __HSR_DEVICE_H @@ -18,12 +18,8 @@ void hsr_dev_setup(struct net_device *dev); int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], unsigned char multicast_spec); -void hsr_set_operstate(struct net_device *hsr_dev, struct net_device *slave1, - struct net_device *slave2); -void hsr_set_carrier(struct net_device *hsr_dev, struct net_device *slave1, - struct net_device *slave2); -void hsr_check_announce(struct net_device *hsr_dev, int old_operstate); +void hsr_check_carrier_and_operstate(struct hsr_priv *hsr); bool is_hsr_master(struct net_device *dev); -int hsr_get_max_mtu(struct hsr_priv *hsr_priv); +int hsr_get_max_mtu(struct hsr_priv *hsr); #endif /* __HSR_DEVICE_H */ diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c new file mode 100644 index 000000000000..7871ed6d3825 --- /dev/null +++ b/net/hsr/hsr_forward.c @@ -0,0 +1,368 @@ +/* Copyright 2011-2014 Autronica Fire and Security AS + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Author(s): + * 2011-2014 Arvid Brodin, arvid.brodin@alten.se + */ + +#include "hsr_forward.h" +#include <linux/types.h> +#include <linux/skbuff.h> +#include <linux/etherdevice.h> +#include <linux/if_vlan.h> +#include "hsr_main.h" +#include "hsr_framereg.h" + + +struct hsr_node; + +struct hsr_frame_info { + struct sk_buff *skb_std; + struct sk_buff *skb_hsr; + struct hsr_port *port_rcv; + struct hsr_node *node_src; + u16 sequence_nr; + bool is_supervision; + bool is_vlan; + bool is_local_dest; + bool is_local_exclusive; +}; + + +/* The uses I can see for these HSR supervision frames are: + * 1) Use the frames that are sent after node initialization ("HSR_TLV.Type = + * 22") to reset any sequence_nr counters belonging to that node. Useful if + * the other node's counter has been reset for some reason. + * -- + * Or not - resetting the counter and bridging the frame would create a + * loop, unfortunately. + * + * 2) Use the LifeCheck frames to detect ring breaks. I.e. if no LifeCheck + * frame is received from a particular node, we know something is wrong. + * We just register these (as with normal frames) and throw them away. + * + * 3) Allow different MAC addresses for the two slave interfaces, using the + * MacAddressA field. + */ +static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb) +{ + struct hsr_ethhdr_sp *hdr; + + WARN_ON_ONCE(!skb_mac_header_was_set(skb)); + hdr = (struct hsr_ethhdr_sp *) skb_mac_header(skb); + + if (!ether_addr_equal(hdr->ethhdr.h_dest, + hsr->sup_multicast_addr)) + return false; + + if (get_hsr_stag_path(&hdr->hsr_sup) != 0x0f) + return false; + if ((hdr->hsr_sup.HSR_TLV_Type != HSR_TLV_ANNOUNCE) && + (hdr->hsr_sup.HSR_TLV_Type != HSR_TLV_LIFE_CHECK)) + return false; + if (hdr->hsr_sup.HSR_TLV_Length != 12) + return false; + + return true; +} + + +static struct sk_buff *create_stripped_skb(struct sk_buff *skb_in, + struct hsr_frame_info *frame) +{ + struct sk_buff *skb; + int copylen; + unsigned char *dst, *src; + + skb_pull(skb_in, HSR_HLEN); + skb = __pskb_copy(skb_in, skb_headroom(skb_in) - HSR_HLEN, GFP_ATOMIC); + skb_push(skb_in, HSR_HLEN); + if (skb == NULL) + return NULL; + + skb_reset_mac_header(skb); + + if (skb->ip_summed == CHECKSUM_PARTIAL) + skb->csum_start -= HSR_HLEN; + + copylen = 2*ETH_ALEN; + if (frame->is_vlan) + copylen += VLAN_HLEN; + src = skb_mac_header(skb_in); + dst = skb_mac_header(skb); + memcpy(dst, src, copylen); + + skb->protocol = eth_hdr(skb)->h_proto; + return skb; +} + +static struct sk_buff *frame_get_stripped_skb(struct hsr_frame_info *frame, + struct hsr_port *port) +{ + if (!frame->skb_std) + frame->skb_std = create_stripped_skb(frame->skb_hsr, frame); + return skb_clone(frame->skb_std, GFP_ATOMIC); +} + + +static void hsr_fill_tag(struct sk_buff *skb, struct hsr_frame_info *frame, + struct hsr_port *port) +{ + struct hsr_ethhdr *hsr_ethhdr; + int lane_id; + int lsdu_size; + + if (port->type == HSR_PT_SLAVE_A) + lane_id = 0; + else + lane_id = 1; + + lsdu_size = skb->len - 14; + if (frame->is_vlan) + lsdu_size -= 4; + + hsr_ethhdr = (struct hsr_ethhdr *) skb_mac_header(skb); + + set_hsr_tag_path(&hsr_ethhdr->hsr_tag, lane_id); + set_hsr_tag_LSDU_size(&hsr_ethhdr->hsr_tag, lsdu_size); + hsr_ethhdr->hsr_tag.sequence_nr = htons(frame->sequence_nr); + hsr_ethhdr->hsr_tag.encap_proto = hsr_ethhdr->ethhdr.h_proto; + hsr_ethhdr->ethhdr.h_proto = htons(ETH_P_PRP); +} + +static struct sk_buff *create_tagged_skb(struct sk_buff *skb_o, + struct hsr_frame_info *frame, + struct hsr_port *port) +{ + int movelen; + unsigned char *dst, *src; + struct sk_buff *skb; + + /* Create the new skb with enough headroom to fit the HSR tag */ + skb = __pskb_copy(skb_o, skb_headroom(skb_o) + HSR_HLEN, GFP_ATOMIC); + if (skb == NULL) + return NULL; + skb_reset_mac_header(skb); + + if (skb->ip_summed == CHECKSUM_PARTIAL) + skb->csum_start += HSR_HLEN; + + movelen = ETH_HLEN; + if (frame->is_vlan) + movelen += VLAN_HLEN; + + src = skb_mac_header(skb); + dst = skb_push(skb, HSR_HLEN); + memmove(dst, src, movelen); + skb_reset_mac_header(skb); + + hsr_fill_tag(skb, frame, port); + + return skb; +} + +/* If the original frame was an HSR tagged frame, just clone it to be sent + * unchanged. Otherwise, create a private frame especially tagged for 'port'. + */ +static struct sk_buff *frame_get_tagged_skb(struct hsr_frame_info *frame, + struct hsr_port *port) +{ + if (frame->skb_hsr) + return skb_clone(frame->skb_hsr, GFP_ATOMIC); + + if ((port->type != HSR_PT_SLAVE_A) && (port->type != HSR_PT_SLAVE_B)) { + WARN_ONCE(1, "HSR: Bug: trying to create a tagged frame for a non-ring port"); + return NULL; + } + + return create_tagged_skb(frame->skb_std, frame, port); +} + + +static void hsr_deliver_master(struct sk_buff *skb, struct net_device *dev, + struct hsr_node *node_src) +{ + bool was_multicast_frame; + int res; + + was_multicast_frame = (skb->pkt_type == PACKET_MULTICAST); + hsr_addr_subst_source(node_src, skb); + skb_pull(skb, ETH_HLEN); + res = netif_rx(skb); + if (res == NET_RX_DROP) { + dev->stats.rx_dropped++; + } else { + dev->stats.rx_packets++; + dev->stats.rx_bytes += skb->len; + if (was_multicast_frame) + dev->stats.multicast++; + } +} + +static int hsr_xmit(struct sk_buff *skb, struct hsr_port *port, + struct hsr_frame_info *frame) +{ + if (frame->port_rcv->type == HSR_PT_MASTER) { + hsr_addr_subst_dest(frame->node_src, skb, port); + + /* Address substitution (IEC62439-3 pp 26, 50): replace mac + * address of outgoing frame with that of the outgoing slave's. + */ + ether_addr_copy(eth_hdr(skb)->h_source, port->dev->dev_addr); + } + return dev_queue_xmit(skb); +} + + +/* Forward the frame through all devices except: + * - Back through the receiving device + * - If it's a HSR frame: through a device where it has passed before + * - To the local HSR master only if the frame is directly addressed to it, or + * a non-supervision multicast or broadcast frame. + * + * HSR slave devices should insert a HSR tag into the frame, or forward the + * frame unchanged if it's already tagged. Interlink devices should strip HSR + * tags if they're of the non-HSR type (but only after duplicate discard). The + * master device always strips HSR tags. + */ +static void hsr_forward_do(struct hsr_frame_info *frame) +{ + struct hsr_port *port; + struct sk_buff *skb; + + hsr_for_each_port(frame->port_rcv->hsr, port) { + /* Don't send frame back the way it came */ + if (port == frame->port_rcv) + continue; + + /* Don't deliver locally unless we should */ + if ((port->type == HSR_PT_MASTER) && !frame->is_local_dest) + continue; + + /* Deliver frames directly addressed to us to master only */ + if ((port->type != HSR_PT_MASTER) && frame->is_local_exclusive) + continue; + + /* Don't send frame over port where it has been sent before */ + if (hsr_register_frame_out(port, frame->node_src, + frame->sequence_nr)) + continue; + + if (frame->is_supervision && (port->type == HSR_PT_MASTER)) { + hsr_handle_sup_frame(frame->skb_hsr, + frame->node_src, + frame->port_rcv); + continue; + } + + if (port->type != HSR_PT_MASTER) + skb = frame_get_tagged_skb(frame, port); + else + skb = frame_get_stripped_skb(frame, port); + if (skb == NULL) { + /* FIXME: Record the dropped frame? */ + continue; + } + + skb->dev = port->dev; + if (port->type == HSR_PT_MASTER) + hsr_deliver_master(skb, port->dev, frame->node_src); + else + hsr_xmit(skb, port, frame); + } +} + + +static void check_local_dest(struct hsr_priv *hsr, struct sk_buff *skb, + struct hsr_frame_info *frame) +{ + struct net_device *master_dev; + + master_dev = hsr_port_get_hsr(hsr, HSR_PT_MASTER)->dev; + + if (hsr_addr_is_self(hsr, eth_hdr(skb)->h_dest)) { + frame->is_local_exclusive = true; + skb->pkt_type = PACKET_HOST; + } else { + frame->is_local_exclusive = false; + } + + if ((skb->pkt_type == PACKET_HOST) || + (skb->pkt_type == PACKET_MULTICAST) || + (skb->pkt_type == PACKET_BROADCAST)) { + frame->is_local_dest = true; + } else { + frame->is_local_dest = false; + } +} + + +static int hsr_fill_frame_info(struct hsr_frame_info *frame, + struct sk_buff *skb, struct hsr_port *port) +{ + struct ethhdr *ethhdr; + unsigned long irqflags; + + frame->is_supervision = is_supervision_frame(port->hsr, skb); + frame->node_src = hsr_get_node(&port->hsr->node_db, skb, + frame->is_supervision); + if (frame->node_src == NULL) + return -1; /* Unknown node and !is_supervision, or no mem */ + + ethhdr = (struct ethhdr *) skb_mac_header(skb); + frame->is_vlan = false; + if (ethhdr->h_proto == htons(ETH_P_8021Q)) { + frame->is_vlan = true; + /* FIXME: */ + WARN_ONCE(1, "HSR: VLAN not yet supported"); + } + if (ethhdr->h_proto == htons(ETH_P_PRP)) { + frame->skb_std = NULL; + frame->skb_hsr = skb; + frame->sequence_nr = hsr_get_skb_sequence_nr(skb); + } else { + frame->skb_std = skb; + frame->skb_hsr = NULL; + /* Sequence nr for the master node */ + spin_lock_irqsave(&port->hsr->seqnr_lock, irqflags); + frame->sequence_nr = port->hsr->sequence_nr; + port->hsr->sequence_nr++; + spin_unlock_irqrestore(&port->hsr->seqnr_lock, irqflags); + } + + frame->port_rcv = port; + check_local_dest(port->hsr, skb, frame); + + return 0; +} + +/* Must be called holding rcu read lock (because of the port parameter) */ +void hsr_forward_skb(struct sk_buff *skb, struct hsr_port *port) +{ + struct hsr_frame_info frame; + + if (skb_mac_header(skb) != skb->data) { + WARN_ONCE(1, "%s:%d: Malformed frame (port_src %s)\n", + __FILE__, __LINE__, port->dev->name); + goto out_drop; + } + + if (hsr_fill_frame_info(&frame, skb, port) < 0) + goto out_drop; + hsr_register_frame_in(frame.node_src, port, frame.sequence_nr); + hsr_forward_do(&frame); + + if (frame.skb_hsr != NULL) + kfree_skb(frame.skb_hsr); + if (frame.skb_std != NULL) + kfree_skb(frame.skb_std); + return; + +out_drop: + port->dev->stats.tx_dropped++; + kfree_skb(skb); +} diff --git a/net/hsr/hsr_forward.h b/net/hsr/hsr_forward.h new file mode 100644 index 000000000000..5c5bc4b6b75f --- /dev/null +++ b/net/hsr/hsr_forward.h @@ -0,0 +1,20 @@ +/* Copyright 2011-2014 Autronica Fire and Security AS + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Author(s): + * 2011-2014 Arvid Brodin, arvid.brodin@alten.se + */ + +#ifndef __HSR_FORWARD_H +#define __HSR_FORWARD_H + +#include <linux/netdevice.h> +#include "hsr_main.h" + +void hsr_forward_skb(struct sk_buff *skb, struct hsr_port *port); + +#endif /* __HSR_FORWARD_H */ diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 83e58449366a..bace124d14ef 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -1,4 +1,4 @@ -/* Copyright 2011-2013 Autronica Fire and Security AS +/* Copyright 2011-2014 Autronica Fire and Security AS * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -6,7 +6,7 @@ * any later version. * * Author(s): - * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + * 2011-2014 Arvid Brodin, arvid.brodin@alten.se * * The HSR spec says never to forward the same frame twice on the same * interface. A frame is identified by its source MAC address and its HSR @@ -23,71 +23,68 @@ #include "hsr_netlink.h" -struct node_entry { - struct list_head mac_list; - unsigned char MacAddressA[ETH_ALEN]; - unsigned char MacAddressB[ETH_ALEN]; - enum hsr_dev_idx AddrB_if; /* The local slave through which AddrB - * frames are received from this node - */ - unsigned long time_in[HSR_MAX_SLAVE]; - bool time_in_stale[HSR_MAX_SLAVE]; - u16 seq_out[HSR_MAX_DEV]; - struct rcu_head rcu_head; +struct hsr_node { + struct list_head mac_list; + unsigned char MacAddressA[ETH_ALEN]; + unsigned char MacAddressB[ETH_ALEN]; + /* Local slave through which AddrB frames are received from this node */ + enum hsr_port_type AddrB_port; + unsigned long time_in[HSR_PT_PORTS]; + bool time_in_stale[HSR_PT_PORTS]; + u16 seq_out[HSR_PT_PORTS]; + struct rcu_head rcu_head; }; -/* TODO: use hash lists for mac addresses (linux/jhash.h)? */ +/* TODO: use hash lists for mac addresses (linux/jhash.h)? */ -/* Search for mac entry. Caller must hold rcu read lock. +/* seq_nr_after(a, b) - return true if a is after (higher in sequence than) b, + * false otherwise. */ -static struct node_entry *find_node_by_AddrA(struct list_head *node_db, - const unsigned char addr[ETH_ALEN]) +static bool seq_nr_after(u16 a, u16 b) { - struct node_entry *node; - - list_for_each_entry_rcu(node, node_db, mac_list) { - if (ether_addr_equal(node->MacAddressA, addr)) - return node; - } + /* Remove inconsistency where + * seq_nr_after(a, b) == seq_nr_before(a, b) + */ + if ((int) b - a == 32768) + return false; - return NULL; + return (((s16) (b - a)) < 0); } +#define seq_nr_before(a, b) seq_nr_after((b), (a)) +#define seq_nr_after_or_eq(a, b) (!seq_nr_before((a), (b))) +#define seq_nr_before_or_eq(a, b) (!seq_nr_after((a), (b))) -/* Search for mac entry. Caller must hold rcu read lock. - */ -static struct node_entry *find_node_by_AddrB(struct list_head *node_db, - const unsigned char addr[ETH_ALEN]) +bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr) { - struct node_entry *node; + struct hsr_node *node; - list_for_each_entry_rcu(node, node_db, mac_list) { - if (ether_addr_equal(node->MacAddressB, addr)) - return node; + node = list_first_or_null_rcu(&hsr->self_node_db, struct hsr_node, + mac_list); + if (!node) { + WARN_ONCE(1, "HSR: No self node\n"); + return false; } - return NULL; -} + if (ether_addr_equal(addr, node->MacAddressA)) + return true; + if (ether_addr_equal(addr, node->MacAddressB)) + return true; + return false; +} /* Search for mac entry. Caller must hold rcu read lock. */ -struct node_entry *hsr_find_node(struct list_head *node_db, struct sk_buff *skb) +static struct hsr_node *find_node_by_AddrA(struct list_head *node_db, + const unsigned char addr[ETH_ALEN]) { - struct node_entry *node; - struct ethhdr *ethhdr; - - if (!skb_mac_header_was_set(skb)) - return NULL; - - ethhdr = (struct ethhdr *) skb_mac_header(skb); + struct hsr_node *node; list_for_each_entry_rcu(node, node_db, mac_list) { - if (ether_addr_equal(node->MacAddressA, ethhdr->h_source)) - return node; - if (ether_addr_equal(node->MacAddressB, ethhdr->h_source)) + if (ether_addr_equal(node->MacAddressA, addr)) return node; } @@ -102,7 +99,7 @@ int hsr_create_self_node(struct list_head *self_node_db, unsigned char addr_a[ETH_ALEN], unsigned char addr_b[ETH_ALEN]) { - struct node_entry *node, *oldnode; + struct hsr_node *node, *oldnode; node = kmalloc(sizeof(*node), GFP_KERNEL); if (!node) @@ -113,7 +110,7 @@ int hsr_create_self_node(struct list_head *self_node_db, rcu_read_lock(); oldnode = list_first_or_null_rcu(self_node_db, - struct node_entry, mac_list); + struct hsr_node, mac_list); if (oldnode) { list_replace_rcu(&oldnode->mac_list, &node->mac_list); rcu_read_unlock(); @@ -128,135 +125,144 @@ int hsr_create_self_node(struct list_head *self_node_db, } -/* Add/merge node to the database of nodes. 'skb' must contain an HSR - * supervision frame. - * - If the supervision header's MacAddressA field is not yet in the database, - * this frame is from an hitherto unknown node - add it to the database. - * - If the sender's MAC address is not the same as its MacAddressA address, - * the node is using PICS_SUBS (address substitution). Record the sender's - * address as the node's MacAddressB. - * - * This function needs to work even if the sender node has changed one of its - * slaves' MAC addresses. In this case, there are four different cases described - * by (Addr-changed, received-from) pairs as follows. Note that changing the - * SlaveA address is equal to changing the node's own address: - * - * - (AddrB, SlaveB): The new AddrB will be recorded by PICS_SUBS code since - * node == NULL. - * - (AddrB, SlaveA): Will work as usual (the AddrB change won't be detected - * from this frame). - * - * - (AddrA, SlaveB): The old node will be found. We need to detect this and - * remove the node. - * - (AddrA, SlaveA): A new node will be registered (non-PICS_SUBS at first). - * The old one will be pruned after HSR_NODE_FORGET_TIME. - * - * We also need to detect if the sender's SlaveA and SlaveB cables have been - * swapped. +/* Allocate an hsr_node and add it to node_db. 'addr' is the node's AddressA; + * seq_out is used to initialize filtering of outgoing duplicate frames + * originating from the newly added node. */ -struct node_entry *hsr_merge_node(struct hsr_priv *hsr_priv, - struct node_entry *node, - struct sk_buff *skb, - enum hsr_dev_idx dev_idx) +struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[], + u16 seq_out) { - struct hsr_sup_payload *hsr_sp; - struct hsr_ethhdr_sp *hsr_ethsup; - int i; + struct hsr_node *node; unsigned long now; - - hsr_ethsup = (struct hsr_ethhdr_sp *) skb_mac_header(skb); - hsr_sp = (struct hsr_sup_payload *) skb->data; - - if (node && !ether_addr_equal(node->MacAddressA, hsr_sp->MacAddressA)) { - /* Node has changed its AddrA, frame was received from SlaveB */ - list_del_rcu(&node->mac_list); - kfree_rcu(node, rcu_head); - node = NULL; - } - - if (node && (dev_idx == node->AddrB_if) && - !ether_addr_equal(node->MacAddressB, hsr_ethsup->ethhdr.h_source)) { - /* Cables have been swapped */ - list_del_rcu(&node->mac_list); - kfree_rcu(node, rcu_head); - node = NULL; - } - - if (node && (dev_idx != node->AddrB_if) && - (node->AddrB_if != HSR_DEV_NONE) && - !ether_addr_equal(node->MacAddressA, hsr_ethsup->ethhdr.h_source)) { - /* Cables have been swapped */ - list_del_rcu(&node->mac_list); - kfree_rcu(node, rcu_head); - node = NULL; - } - - if (node) - return node; - - node = find_node_by_AddrA(&hsr_priv->node_db, hsr_sp->MacAddressA); - if (node) { - /* Node is known, but frame was received from an unknown - * address. Node is PICS_SUBS capable; merge its AddrB. - */ - ether_addr_copy(node->MacAddressB, hsr_ethsup->ethhdr.h_source); - node->AddrB_if = dev_idx; - return node; - } + int i; node = kzalloc(sizeof(*node), GFP_ATOMIC); if (!node) return NULL; - ether_addr_copy(node->MacAddressA, hsr_sp->MacAddressA); - ether_addr_copy(node->MacAddressB, hsr_ethsup->ethhdr.h_source); - if (!ether_addr_equal(hsr_sp->MacAddressA, hsr_ethsup->ethhdr.h_source)) - node->AddrB_if = dev_idx; - else - node->AddrB_if = HSR_DEV_NONE; + ether_addr_copy(node->MacAddressA, addr); /* We are only interested in time diffs here, so use current jiffies * as initialization. (0 could trigger an spurious ring error warning). */ now = jiffies; - for (i = 0; i < HSR_MAX_SLAVE; i++) + for (i = 0; i < HSR_PT_PORTS; i++) node->time_in[i] = now; - for (i = 0; i < HSR_MAX_DEV; i++) - node->seq_out[i] = ntohs(hsr_ethsup->hsr_sup.sequence_nr) - 1; + for (i = 0; i < HSR_PT_PORTS; i++) + node->seq_out[i] = seq_out; - list_add_tail_rcu(&node->mac_list, &hsr_priv->node_db); + list_add_tail_rcu(&node->mac_list, node_db); return node; } +/* Get the hsr_node from which 'skb' was sent. + */ +struct hsr_node *hsr_get_node(struct list_head *node_db, struct sk_buff *skb, + bool is_sup) +{ + struct hsr_node *node; + struct ethhdr *ethhdr; + u16 seq_out; + + if (!skb_mac_header_was_set(skb)) + return NULL; + + ethhdr = (struct ethhdr *) skb_mac_header(skb); + + list_for_each_entry_rcu(node, node_db, mac_list) { + if (ether_addr_equal(node->MacAddressA, ethhdr->h_source)) + return node; + if (ether_addr_equal(node->MacAddressB, ethhdr->h_source)) + return node; + } + + if (!is_sup) + return NULL; /* Only supervision frame may create node entry */ + + if (ethhdr->h_proto == htons(ETH_P_PRP)) { + /* Use the existing sequence_nr from the tag as starting point + * for filtering duplicate frames. + */ + seq_out = hsr_get_skb_sequence_nr(skb) - 1; + } else { + WARN_ONCE(1, "%s: Non-HSR frame\n", __func__); + seq_out = 0; + } + + return hsr_add_node(node_db, ethhdr->h_source, seq_out); +} + +/* Use the Supervision frame's info about an eventual MacAddressB for merging + * nodes that has previously had their MacAddressB registered as a separate + * node. + */ +void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr, + struct hsr_port *port_rcv) +{ + struct hsr_node *node_real; + struct hsr_sup_payload *hsr_sp; + struct list_head *node_db; + int i; + + skb_pull(skb, sizeof(struct hsr_ethhdr_sp)); + hsr_sp = (struct hsr_sup_payload *) skb->data; + + if (ether_addr_equal(eth_hdr(skb)->h_source, hsr_sp->MacAddressA)) + /* Not sent from MacAddressB of a PICS_SUBS capable node */ + goto done; + + /* Merge node_curr (registered on MacAddressB) into node_real */ + node_db = &port_rcv->hsr->node_db; + node_real = find_node_by_AddrA(node_db, hsr_sp->MacAddressA); + if (!node_real) + /* No frame received from AddrA of this node yet */ + node_real = hsr_add_node(node_db, hsr_sp->MacAddressA, + HSR_SEQNR_START - 1); + if (!node_real) + goto done; /* No mem */ + if (node_real == node_curr) + /* Node has already been merged */ + goto done; + + ether_addr_copy(node_real->MacAddressB, eth_hdr(skb)->h_source); + for (i = 0; i < HSR_PT_PORTS; i++) { + if (!node_curr->time_in_stale[i] && + time_after(node_curr->time_in[i], node_real->time_in[i])) { + node_real->time_in[i] = node_curr->time_in[i]; + node_real->time_in_stale[i] = node_curr->time_in_stale[i]; + } + if (seq_nr_after(node_curr->seq_out[i], node_real->seq_out[i])) + node_real->seq_out[i] = node_curr->seq_out[i]; + } + node_real->AddrB_port = port_rcv->type; + + list_del_rcu(&node_curr->mac_list); + kfree_rcu(node_curr, rcu_head); + +done: + skb_push(skb, sizeof(struct hsr_ethhdr_sp)); +} + /* 'skb' is a frame meant for this host, that is to be passed to upper layers. * - * If the frame was sent by a node's B interface, replace the sender + * If the frame was sent by a node's B interface, replace the source * address with that node's "official" address (MacAddressA) so that upper * layers recognize where it came from. */ -void hsr_addr_subst_source(struct hsr_priv *hsr_priv, struct sk_buff *skb) +void hsr_addr_subst_source(struct hsr_node *node, struct sk_buff *skb) { - struct ethhdr *ethhdr; - struct node_entry *node; - if (!skb_mac_header_was_set(skb)) { WARN_ONCE(1, "%s: Mac header not set\n", __func__); return; } - ethhdr = (struct ethhdr *) skb_mac_header(skb); - rcu_read_lock(); - node = find_node_by_AddrB(&hsr_priv->node_db, ethhdr->h_source); - if (node) - ether_addr_copy(ethhdr->h_source, node->MacAddressA); - rcu_read_unlock(); + memcpy(ð_hdr(skb)->h_source, node->MacAddressA, ETH_ALEN); } - /* 'skb' is a frame meant for another host. - * 'hsr_dev_idx' is the HSR index of the outgoing device + * 'port' is the outgoing interface * * Substitute the target (dest) MAC address if necessary, so the it matches the * recipient interface MAC address, regardless of whether that is the @@ -264,47 +270,44 @@ void hsr_addr_subst_source(struct hsr_priv *hsr_priv, struct sk_buff *skb) * This is needed to keep the packets flowing through switches that learn on * which "side" the different interfaces are. */ -void hsr_addr_subst_dest(struct hsr_priv *hsr_priv, struct ethhdr *ethhdr, - enum hsr_dev_idx dev_idx) +void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb, + struct hsr_port *port) { - struct node_entry *node; + struct hsr_node *node_dst; - rcu_read_lock(); - node = find_node_by_AddrA(&hsr_priv->node_db, ethhdr->h_dest); - if (node && (node->AddrB_if == dev_idx)) - ether_addr_copy(ethhdr->h_dest, node->MacAddressB); - rcu_read_unlock(); -} + if (!skb_mac_header_was_set(skb)) { + WARN_ONCE(1, "%s: Mac header not set\n", __func__); + return; + } + if (!is_unicast_ether_addr(eth_hdr(skb)->h_dest)) + return; -/* seq_nr_after(a, b) - return true if a is after (higher in sequence than) b, - * false otherwise. - */ -static bool seq_nr_after(u16 a, u16 b) -{ - /* Remove inconsistency where - * seq_nr_after(a, b) == seq_nr_before(a, b) - */ - if ((int) b - a == 32768) - return false; + node_dst = find_node_by_AddrA(&port->hsr->node_db, eth_hdr(skb)->h_dest); + if (!node_dst) { + WARN_ONCE(1, "%s: Unknown node\n", __func__); + return; + } + if (port->type != node_dst->AddrB_port) + return; - return (((s16) (b - a)) < 0); + ether_addr_copy(eth_hdr(skb)->h_dest, node_dst->MacAddressB); } -#define seq_nr_before(a, b) seq_nr_after((b), (a)) -#define seq_nr_after_or_eq(a, b) (!seq_nr_before((a), (b))) -#define seq_nr_before_or_eq(a, b) (!seq_nr_after((a), (b))) -void hsr_register_frame_in(struct node_entry *node, enum hsr_dev_idx dev_idx) +void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port, + u16 sequence_nr) { - if ((dev_idx < 0) || (dev_idx >= HSR_MAX_SLAVE)) { - WARN_ONCE(1, "%s: Invalid dev_idx (%d)\n", __func__, dev_idx); + /* Don't register incoming frames without a valid sequence number. This + * ensures entries of restarted nodes gets pruned so that they can + * re-register and resume communications. + */ + if (seq_nr_before(sequence_nr, node->seq_out[port->type])) return; - } - node->time_in[dev_idx] = jiffies; - node->time_in_stale[dev_idx] = false; -} + node->time_in[port->type] = jiffies; + node->time_in_stale[port->type] = false; +} /* 'skb' is a HSR Ethernet frame (with a HSR tag inserted), with a valid * ethhdr->h_source address and skb->mac_header set. @@ -314,102 +317,87 @@ void hsr_register_frame_in(struct node_entry *node, enum hsr_dev_idx dev_idx) * 0 otherwise, or * negative error code on error */ -int hsr_register_frame_out(struct node_entry *node, enum hsr_dev_idx dev_idx, - struct sk_buff *skb) +int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node, + u16 sequence_nr) { - struct hsr_ethhdr *hsr_ethhdr; - u16 sequence_nr; - - if ((dev_idx < 0) || (dev_idx >= HSR_MAX_DEV)) { - WARN_ONCE(1, "%s: Invalid dev_idx (%d)\n", __func__, dev_idx); - return -EINVAL; - } - if (!skb_mac_header_was_set(skb)) { - WARN_ONCE(1, "%s: Mac header not set\n", __func__); - return -EINVAL; - } - hsr_ethhdr = (struct hsr_ethhdr *) skb_mac_header(skb); - - sequence_nr = ntohs(hsr_ethhdr->hsr_tag.sequence_nr); - if (seq_nr_before_or_eq(sequence_nr, node->seq_out[dev_idx])) + if (seq_nr_before_or_eq(sequence_nr, node->seq_out[port->type])) return 1; - node->seq_out[dev_idx] = sequence_nr; + node->seq_out[port->type] = sequence_nr; return 0; } - -static bool is_late(struct node_entry *node, enum hsr_dev_idx dev_idx) +static struct hsr_port *get_late_port(struct hsr_priv *hsr, + struct hsr_node *node) { - enum hsr_dev_idx other; - - if (node->time_in_stale[dev_idx]) - return true; - - if (dev_idx == HSR_DEV_SLAVE_A) - other = HSR_DEV_SLAVE_B; - else - other = HSR_DEV_SLAVE_A; - - if (node->time_in_stale[other]) - return false; + if (node->time_in_stale[HSR_PT_SLAVE_A]) + return hsr_port_get_hsr(hsr, HSR_PT_SLAVE_A); + if (node->time_in_stale[HSR_PT_SLAVE_B]) + return hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B); + + if (time_after(node->time_in[HSR_PT_SLAVE_B], + node->time_in[HSR_PT_SLAVE_A] + + msecs_to_jiffies(MAX_SLAVE_DIFF))) + return hsr_port_get_hsr(hsr, HSR_PT_SLAVE_A); + if (time_after(node->time_in[HSR_PT_SLAVE_A], + node->time_in[HSR_PT_SLAVE_B] + + msecs_to_jiffies(MAX_SLAVE_DIFF))) + return hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B); - if (time_after(node->time_in[other], node->time_in[dev_idx] + - msecs_to_jiffies(MAX_SLAVE_DIFF))) - return true; - - return false; + return NULL; } /* Remove stale sequence_nr records. Called by timer every * HSR_LIFE_CHECK_INTERVAL (two seconds or so). */ -void hsr_prune_nodes(struct hsr_priv *hsr_priv) +void hsr_prune_nodes(unsigned long data) { - struct node_entry *node; + struct hsr_priv *hsr; + struct hsr_node *node; + struct hsr_port *port; unsigned long timestamp; unsigned long time_a, time_b; + hsr = (struct hsr_priv *) data; + rcu_read_lock(); - list_for_each_entry_rcu(node, &hsr_priv->node_db, mac_list) { + list_for_each_entry_rcu(node, &hsr->node_db, mac_list) { /* Shorthand */ - time_a = node->time_in[HSR_DEV_SLAVE_A]; - time_b = node->time_in[HSR_DEV_SLAVE_B]; + time_a = node->time_in[HSR_PT_SLAVE_A]; + time_b = node->time_in[HSR_PT_SLAVE_B]; /* Check for timestamps old enough to risk wrap-around */ if (time_after(jiffies, time_a + MAX_JIFFY_OFFSET/2)) - node->time_in_stale[HSR_DEV_SLAVE_A] = true; + node->time_in_stale[HSR_PT_SLAVE_A] = true; if (time_after(jiffies, time_b + MAX_JIFFY_OFFSET/2)) - node->time_in_stale[HSR_DEV_SLAVE_B] = true; + node->time_in_stale[HSR_PT_SLAVE_B] = true; /* Get age of newest frame from node. * At least one time_in is OK here; nodes get pruned long * before both time_ins can get stale */ timestamp = time_a; - if (node->time_in_stale[HSR_DEV_SLAVE_A] || - (!node->time_in_stale[HSR_DEV_SLAVE_B] && + if (node->time_in_stale[HSR_PT_SLAVE_A] || + (!node->time_in_stale[HSR_PT_SLAVE_B] && time_after(time_b, time_a))) timestamp = time_b; /* Warn of ring error only as long as we get frames at all */ if (time_is_after_jiffies(timestamp + msecs_to_jiffies(1.5*MAX_SLAVE_DIFF))) { - - if (is_late(node, HSR_DEV_SLAVE_A)) - hsr_nl_ringerror(hsr_priv, node->MacAddressA, - HSR_DEV_SLAVE_A); - else if (is_late(node, HSR_DEV_SLAVE_B)) - hsr_nl_ringerror(hsr_priv, node->MacAddressA, - HSR_DEV_SLAVE_B); + rcu_read_lock(); + port = get_late_port(hsr, node); + if (port != NULL) + hsr_nl_ringerror(hsr, node->MacAddressA, port); + rcu_read_unlock(); } /* Prune old entries */ if (time_is_before_jiffies(timestamp + msecs_to_jiffies(HSR_NODE_FORGET_TIME))) { - hsr_nl_nodedown(hsr_priv, node->MacAddressA); + hsr_nl_nodedown(hsr, node->MacAddressA); list_del_rcu(&node->mac_list); /* Note that we need to free this entry later: */ kfree_rcu(node, rcu_head); @@ -419,21 +407,21 @@ void hsr_prune_nodes(struct hsr_priv *hsr_priv) } -void *hsr_get_next_node(struct hsr_priv *hsr_priv, void *_pos, +void *hsr_get_next_node(struct hsr_priv *hsr, void *_pos, unsigned char addr[ETH_ALEN]) { - struct node_entry *node; + struct hsr_node *node; if (!_pos) { - node = list_first_or_null_rcu(&hsr_priv->node_db, - struct node_entry, mac_list); + node = list_first_or_null_rcu(&hsr->node_db, + struct hsr_node, mac_list); if (node) ether_addr_copy(addr, node->MacAddressA); return node; } node = _pos; - list_for_each_entry_continue_rcu(node, &hsr_priv->node_db, mac_list) { + list_for_each_entry_continue_rcu(node, &hsr->node_db, mac_list) { ether_addr_copy(addr, node->MacAddressA); return node; } @@ -442,7 +430,7 @@ void *hsr_get_next_node(struct hsr_priv *hsr_priv, void *_pos, } -int hsr_get_node_data(struct hsr_priv *hsr_priv, +int hsr_get_node_data(struct hsr_priv *hsr, const unsigned char *addr, unsigned char addr_b[ETH_ALEN], unsigned int *addr_b_ifindex, @@ -451,12 +439,13 @@ int hsr_get_node_data(struct hsr_priv *hsr_priv, int *if2_age, u16 *if2_seq) { - struct node_entry *node; + struct hsr_node *node; + struct hsr_port *port; unsigned long tdiff; rcu_read_lock(); - node = find_node_by_AddrA(&hsr_priv->node_db, addr); + node = find_node_by_AddrA(&hsr->node_db, addr); if (!node) { rcu_read_unlock(); return -ENOENT; /* No such entry */ @@ -464,8 +453,8 @@ int hsr_get_node_data(struct hsr_priv *hsr_priv, ether_addr_copy(addr_b, node->MacAddressB); - tdiff = jiffies - node->time_in[HSR_DEV_SLAVE_A]; - if (node->time_in_stale[HSR_DEV_SLAVE_A]) + tdiff = jiffies - node->time_in[HSR_PT_SLAVE_A]; + if (node->time_in_stale[HSR_PT_SLAVE_A]) *if1_age = INT_MAX; #if HZ <= MSEC_PER_SEC else if (tdiff > msecs_to_jiffies(INT_MAX)) @@ -474,8 +463,8 @@ int hsr_get_node_data(struct hsr_priv *hsr_priv, else *if1_age = jiffies_to_msecs(tdiff); - tdiff = jiffies - node->time_in[HSR_DEV_SLAVE_B]; - if (node->time_in_stale[HSR_DEV_SLAVE_B]) + tdiff = jiffies - node->time_in[HSR_PT_SLAVE_B]; + if (node->time_in_stale[HSR_PT_SLAVE_B]) *if2_age = INT_MAX; #if HZ <= MSEC_PER_SEC else if (tdiff > msecs_to_jiffies(INT_MAX)) @@ -485,13 +474,15 @@ int hsr_get_node_data(struct hsr_priv *hsr_priv, *if2_age = jiffies_to_msecs(tdiff); /* Present sequence numbers as if they were incoming on interface */ - *if1_seq = node->seq_out[HSR_DEV_SLAVE_B]; - *if2_seq = node->seq_out[HSR_DEV_SLAVE_A]; + *if1_seq = node->seq_out[HSR_PT_SLAVE_B]; + *if2_seq = node->seq_out[HSR_PT_SLAVE_A]; - if ((node->AddrB_if != HSR_DEV_NONE) && hsr_priv->slave[node->AddrB_if]) - *addr_b_ifindex = hsr_priv->slave[node->AddrB_if]->ifindex; - else + if (node->AddrB_port != HSR_PT_NONE) { + port = hsr_port_get_hsr(hsr, node->AddrB_port); + *addr_b_ifindex = port->dev->ifindex; + } else { *addr_b_ifindex = -1; + } rcu_read_unlock(); diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h index e6c4022030ad..438b40f98f5a 100644 --- a/net/hsr/hsr_framereg.h +++ b/net/hsr/hsr_framereg.h @@ -1,4 +1,4 @@ -/* Copyright 2011-2013 Autronica Fire and Security AS +/* Copyright 2011-2014 Autronica Fire and Security AS * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -6,42 +6,43 @@ * any later version. * * Author(s): - * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + * 2011-2014 Arvid Brodin, arvid.brodin@alten.se */ -#ifndef _HSR_FRAMEREG_H -#define _HSR_FRAMEREG_H +#ifndef __HSR_FRAMEREG_H +#define __HSR_FRAMEREG_H #include "hsr_main.h" -struct node_entry; +struct hsr_node; -struct node_entry *hsr_find_node(struct list_head *node_db, struct sk_buff *skb); +struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[], + u16 seq_out); +struct hsr_node *hsr_get_node(struct list_head *node_db, struct sk_buff *skb, + bool is_sup); +void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr, + struct hsr_port *port); +bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr); -struct node_entry *hsr_merge_node(struct hsr_priv *hsr_priv, - struct node_entry *node, - struct sk_buff *skb, - enum hsr_dev_idx dev_idx); +void hsr_addr_subst_source(struct hsr_node *node, struct sk_buff *skb); +void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb, + struct hsr_port *port); -void hsr_addr_subst_source(struct hsr_priv *hsr_priv, struct sk_buff *skb); -void hsr_addr_subst_dest(struct hsr_priv *hsr_priv, struct ethhdr *ethhdr, - enum hsr_dev_idx dev_idx); +void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port, + u16 sequence_nr); +int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node, + u16 sequence_nr); -void hsr_register_frame_in(struct node_entry *node, enum hsr_dev_idx dev_idx); - -int hsr_register_frame_out(struct node_entry *node, enum hsr_dev_idx dev_idx, - struct sk_buff *skb); - -void hsr_prune_nodes(struct hsr_priv *hsr_priv); +void hsr_prune_nodes(unsigned long data); int hsr_create_self_node(struct list_head *self_node_db, unsigned char addr_a[ETH_ALEN], unsigned char addr_b[ETH_ALEN]); -void *hsr_get_next_node(struct hsr_priv *hsr_priv, void *_pos, +void *hsr_get_next_node(struct hsr_priv *hsr, void *_pos, unsigned char addr[ETH_ALEN]); -int hsr_get_node_data(struct hsr_priv *hsr_priv, +int hsr_get_node_data(struct hsr_priv *hsr, const unsigned char *addr, unsigned char addr_b[ETH_ALEN], unsigned int *addr_b_ifindex, @@ -50,4 +51,4 @@ int hsr_get_node_data(struct hsr_priv *hsr_priv, int *if2_age, u16 *if2_seq); -#endif /* _HSR_FRAMEREG_H */ +#endif /* __HSR_FRAMEREG_H */ diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c index 3fee5218a691..779d28b65417 100644 --- a/net/hsr/hsr_main.c +++ b/net/hsr/hsr_main.c @@ -1,4 +1,4 @@ -/* Copyright 2011-2013 Autronica Fire and Security AS +/* Copyright 2011-2014 Autronica Fire and Security AS * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -6,11 +6,7 @@ * any later version. * * Author(s): - * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com - * - * In addition to routines for registering and unregistering HSR support, this - * file also contains the receive routine that handles all incoming frames with - * Ethertype (protocol) ETH_P_PRP (HSRv0), and network device event handling. + * 2011-2014 Arvid Brodin, arvid.brodin@alten.se */ #include <linux/netdevice.h> @@ -21,154 +17,71 @@ #include "hsr_device.h" #include "hsr_netlink.h" #include "hsr_framereg.h" - - -/* List of all registered virtual HSR devices */ -static LIST_HEAD(hsr_list); - -void register_hsr_master(struct hsr_priv *hsr_priv) -{ - list_add_tail_rcu(&hsr_priv->hsr_list, &hsr_list); -} - -void unregister_hsr_master(struct hsr_priv *hsr_priv) -{ - struct hsr_priv *hsr_priv_it; - - list_for_each_entry(hsr_priv_it, &hsr_list, hsr_list) - if (hsr_priv_it == hsr_priv) { - list_del_rcu(&hsr_priv_it->hsr_list); - return; - } -} - -bool is_hsr_slave(struct net_device *dev) -{ - struct hsr_priv *hsr_priv_it; - - list_for_each_entry_rcu(hsr_priv_it, &hsr_list, hsr_list) { - if (dev == hsr_priv_it->slave[0]) - return true; - if (dev == hsr_priv_it->slave[1]) - return true; - } - - return false; -} - - -/* If dev is a HSR slave device, return the virtual master device. Return NULL - * otherwise. - */ -static struct hsr_priv *get_hsr_master(struct net_device *dev) -{ - struct hsr_priv *hsr_priv; - - rcu_read_lock(); - list_for_each_entry_rcu(hsr_priv, &hsr_list, hsr_list) - if ((dev == hsr_priv->slave[0]) || - (dev == hsr_priv->slave[1])) { - rcu_read_unlock(); - return hsr_priv; - } - - rcu_read_unlock(); - return NULL; -} - - -/* If dev is a HSR slave device, return the other slave device. Return NULL - * otherwise. - */ -static struct net_device *get_other_slave(struct hsr_priv *hsr_priv, - struct net_device *dev) -{ - if (dev == hsr_priv->slave[0]) - return hsr_priv->slave[1]; - if (dev == hsr_priv->slave[1]) - return hsr_priv->slave[0]; - - return NULL; -} +#include "hsr_slave.h" static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event, void *ptr) { - struct net_device *slave, *other_slave; - struct hsr_priv *hsr_priv; - int old_operstate; + struct net_device *dev; + struct hsr_port *port, *master; + struct hsr_priv *hsr; int mtu_max; int res; - struct net_device *dev; dev = netdev_notifier_info_to_dev(ptr); - - hsr_priv = get_hsr_master(dev); - if (hsr_priv) { - /* dev is a slave device */ - slave = dev; - other_slave = get_other_slave(hsr_priv, slave); - } else { + port = hsr_port_get_rtnl(dev); + if (port == NULL) { if (!is_hsr_master(dev)) - return NOTIFY_DONE; - hsr_priv = netdev_priv(dev); - slave = hsr_priv->slave[0]; - other_slave = hsr_priv->slave[1]; + return NOTIFY_DONE; /* Not an HSR device */ + hsr = netdev_priv(dev); + port = hsr_port_get_hsr(hsr, HSR_PT_MASTER); + } else { + hsr = port->hsr; } switch (event) { case NETDEV_UP: /* Administrative state DOWN */ case NETDEV_DOWN: /* Administrative state UP */ case NETDEV_CHANGE: /* Link (carrier) state changes */ - old_operstate = hsr_priv->dev->operstate; - hsr_set_carrier(hsr_priv->dev, slave, other_slave); - /* netif_stacked_transfer_operstate() cannot be used here since - * it doesn't set IF_OPER_LOWERLAYERDOWN (?) - */ - hsr_set_operstate(hsr_priv->dev, slave, other_slave); - hsr_check_announce(hsr_priv->dev, old_operstate); + hsr_check_carrier_and_operstate(hsr); break; case NETDEV_CHANGEADDR: - - /* This should not happen since there's no ndo_set_mac_address() - * for HSR devices - i.e. not supported. - */ - if (dev == hsr_priv->dev) + if (port->type == HSR_PT_MASTER) { + /* This should not happen since there's no + * ndo_set_mac_address() for HSR devices - i.e. not + * supported. + */ break; + } - if (dev == hsr_priv->slave[0]) - ether_addr_copy(hsr_priv->dev->dev_addr, - hsr_priv->slave[0]->dev_addr); + master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); + + if (port->type == HSR_PT_SLAVE_A) { + ether_addr_copy(master->dev->dev_addr, dev->dev_addr); + call_netdevice_notifiers(NETDEV_CHANGEADDR, master->dev); + } /* Make sure we recognize frames from ourselves in hsr_rcv() */ - res = hsr_create_self_node(&hsr_priv->self_node_db, - hsr_priv->dev->dev_addr, - hsr_priv->slave[1] ? - hsr_priv->slave[1]->dev_addr : - hsr_priv->dev->dev_addr); + port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B); + res = hsr_create_self_node(&hsr->self_node_db, + master->dev->dev_addr, + port ? + port->dev->dev_addr : + master->dev->dev_addr); if (res) - netdev_warn(hsr_priv->dev, + netdev_warn(master->dev, "Could not update HSR node address.\n"); - - if (dev == hsr_priv->slave[0]) - call_netdevice_notifiers(NETDEV_CHANGEADDR, hsr_priv->dev); break; case NETDEV_CHANGEMTU: - if (dev == hsr_priv->dev) + if (port->type == HSR_PT_MASTER) break; /* Handled in ndo_change_mtu() */ - mtu_max = hsr_get_max_mtu(hsr_priv); - if (hsr_priv->dev->mtu > mtu_max) - dev_set_mtu(hsr_priv->dev, mtu_max); + mtu_max = hsr_get_max_mtu(port->hsr); + master = hsr_port_get_hsr(port->hsr, HSR_PT_MASTER); + master->dev->mtu = mtu_max; break; case NETDEV_UNREGISTER: - if (dev == hsr_priv->slave[0]) - hsr_priv->slave[0] = NULL; - if (dev == hsr_priv->slave[1]) - hsr_priv->slave[1] = NULL; - - /* There should really be a way to set a new slave device... */ - + hsr_del_port(port); break; case NETDEV_PRE_TYPE_CHANGE: /* HSR works only on Ethernet devices. Refuse slave to change @@ -181,255 +94,16 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event, } -static struct timer_list prune_timer; - -static void prune_nodes_all(unsigned long data) -{ - struct hsr_priv *hsr_priv; - - rcu_read_lock(); - list_for_each_entry_rcu(hsr_priv, &hsr_list, hsr_list) - hsr_prune_nodes(hsr_priv); - rcu_read_unlock(); - - prune_timer.expires = jiffies + msecs_to_jiffies(PRUNE_PERIOD); - add_timer(&prune_timer); -} - - -static struct sk_buff *hsr_pull_tag(struct sk_buff *skb) +struct hsr_port *hsr_port_get_hsr(struct hsr_priv *hsr, enum hsr_port_type pt) { - struct hsr_tag *hsr_tag; - struct sk_buff *skb2; - - skb2 = skb_share_check(skb, GFP_ATOMIC); - if (unlikely(!skb2)) - goto err_free; - skb = skb2; - - if (unlikely(!pskb_may_pull(skb, HSR_TAGLEN))) - goto err_free; + struct hsr_port *port; - hsr_tag = (struct hsr_tag *) skb->data; - skb->protocol = hsr_tag->encap_proto; - skb_pull(skb, HSR_TAGLEN); - - return skb; - -err_free: - kfree_skb(skb); + hsr_for_each_port(hsr, port) + if (port->type == pt) + return port; return NULL; } - -/* The uses I can see for these HSR supervision frames are: - * 1) Use the frames that are sent after node initialization ("HSR_TLV.Type = - * 22") to reset any sequence_nr counters belonging to that node. Useful if - * the other node's counter has been reset for some reason. - * -- - * Or not - resetting the counter and bridging the frame would create a - * loop, unfortunately. - * - * 2) Use the LifeCheck frames to detect ring breaks. I.e. if no LifeCheck - * frame is received from a particular node, we know something is wrong. - * We just register these (as with normal frames) and throw them away. - * - * 3) Allow different MAC addresses for the two slave interfaces, using the - * MacAddressA field. - */ -static bool is_supervision_frame(struct hsr_priv *hsr_priv, struct sk_buff *skb) -{ - struct hsr_sup_tag *hsr_stag; - - if (!ether_addr_equal(eth_hdr(skb)->h_dest, - hsr_priv->sup_multicast_addr)) - return false; - - hsr_stag = (struct hsr_sup_tag *) skb->data; - if (get_hsr_stag_path(hsr_stag) != 0x0f) - return false; - if ((hsr_stag->HSR_TLV_Type != HSR_TLV_ANNOUNCE) && - (hsr_stag->HSR_TLV_Type != HSR_TLV_LIFE_CHECK)) - return false; - if (hsr_stag->HSR_TLV_Length != 12) - return false; - - return true; -} - - -/* Implementation somewhat according to IEC-62439-3, p. 43 - */ -static int hsr_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) -{ - struct hsr_priv *hsr_priv; - struct net_device *other_slave; - struct node_entry *node; - bool deliver_to_self; - struct sk_buff *skb_deliver; - enum hsr_dev_idx dev_in_idx, dev_other_idx; - bool dup_out; - int ret; - - hsr_priv = get_hsr_master(dev); - - if (!hsr_priv) { - /* Non-HSR-slave device 'dev' is connected to a HSR network */ - kfree_skb(skb); - dev->stats.rx_errors++; - return NET_RX_SUCCESS; - } - - if (dev == hsr_priv->slave[0]) { - dev_in_idx = HSR_DEV_SLAVE_A; - dev_other_idx = HSR_DEV_SLAVE_B; - } else { - dev_in_idx = HSR_DEV_SLAVE_B; - dev_other_idx = HSR_DEV_SLAVE_A; - } - - node = hsr_find_node(&hsr_priv->self_node_db, skb); - if (node) { - /* Always kill frames sent by ourselves */ - kfree_skb(skb); - return NET_RX_SUCCESS; - } - - /* Is this frame a candidate for local reception? */ - deliver_to_self = false; - if ((skb->pkt_type == PACKET_HOST) || - (skb->pkt_type == PACKET_MULTICAST) || - (skb->pkt_type == PACKET_BROADCAST)) - deliver_to_self = true; - else if (ether_addr_equal(eth_hdr(skb)->h_dest, - hsr_priv->dev->dev_addr)) { - skb->pkt_type = PACKET_HOST; - deliver_to_self = true; - } - - - rcu_read_lock(); /* node_db */ - node = hsr_find_node(&hsr_priv->node_db, skb); - - if (is_supervision_frame(hsr_priv, skb)) { - skb_pull(skb, sizeof(struct hsr_sup_tag)); - node = hsr_merge_node(hsr_priv, node, skb, dev_in_idx); - if (!node) { - rcu_read_unlock(); /* node_db */ - kfree_skb(skb); - hsr_priv->dev->stats.rx_dropped++; - return NET_RX_DROP; - } - skb_push(skb, sizeof(struct hsr_sup_tag)); - deliver_to_self = false; - } - - if (!node) { - /* Source node unknown; this might be a HSR frame from - * another net (different multicast address). Ignore it. - */ - rcu_read_unlock(); /* node_db */ - kfree_skb(skb); - return NET_RX_SUCCESS; - } - - /* Register ALL incoming frames as outgoing through the other interface. - * This allows us to register frames as incoming only if they are valid - * for the receiving interface, without using a specific counter for - * incoming frames. - */ - dup_out = hsr_register_frame_out(node, dev_other_idx, skb); - if (!dup_out) - hsr_register_frame_in(node, dev_in_idx); - - /* Forward this frame? */ - if (!dup_out && (skb->pkt_type != PACKET_HOST)) - other_slave = get_other_slave(hsr_priv, dev); - else - other_slave = NULL; - - if (hsr_register_frame_out(node, HSR_DEV_MASTER, skb)) - deliver_to_self = false; - - rcu_read_unlock(); /* node_db */ - - if (!deliver_to_self && !other_slave) { - kfree_skb(skb); - /* Circulated frame; silently remove it. */ - return NET_RX_SUCCESS; - } - - skb_deliver = skb; - if (deliver_to_self && other_slave) { - /* skb_clone() is not enough since we will strip the hsr tag - * and do address substitution below - */ - skb_deliver = pskb_copy(skb, GFP_ATOMIC); - if (!skb_deliver) { - deliver_to_self = false; - hsr_priv->dev->stats.rx_dropped++; - } - } - - if (deliver_to_self) { - bool multicast_frame; - - skb_deliver = hsr_pull_tag(skb_deliver); - if (!skb_deliver) { - hsr_priv->dev->stats.rx_dropped++; - goto forward; - } -#if !defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) - /* Move everything in the header that is after the HSR tag, - * to work around alignment problems caused by the 6-byte HSR - * tag. In practice, this removes/overwrites the HSR tag in - * the header and restores a "standard" packet. - */ - memmove(skb_deliver->data - HSR_TAGLEN, skb_deliver->data, - skb_headlen(skb_deliver)); - - /* Adjust skb members so they correspond with the move above. - * This cannot possibly underflow skb->data since hsr_pull_tag() - * above succeeded. - * At this point in the protocol stack, the transport and - * network headers have not been set yet, and we haven't touched - * the mac header nor the head. So we only need to adjust data - * and tail: - */ - skb_deliver->data -= HSR_TAGLEN; - skb_deliver->tail -= HSR_TAGLEN; -#endif - skb_deliver->dev = hsr_priv->dev; - hsr_addr_subst_source(hsr_priv, skb_deliver); - multicast_frame = (skb_deliver->pkt_type == PACKET_MULTICAST); - ret = netif_rx(skb_deliver); - if (ret == NET_RX_DROP) { - hsr_priv->dev->stats.rx_dropped++; - } else { - hsr_priv->dev->stats.rx_packets++; - hsr_priv->dev->stats.rx_bytes += skb->len; - if (multicast_frame) - hsr_priv->dev->stats.multicast++; - } - } - -forward: - if (other_slave) { - skb_push(skb, ETH_HLEN); - skb->dev = other_slave; - dev_queue_xmit(skb); - } - - return NET_RX_SUCCESS; -} - - -static struct packet_type hsr_pt __read_mostly = { - .type = htons(ETH_P_PRP), - .func = hsr_rcv, -}; - static struct notifier_block hsr_nb = { .notifier_call = hsr_netdev_notify, /* Slave event notifications */ }; @@ -439,18 +113,9 @@ static int __init hsr_init(void) { int res; - BUILD_BUG_ON(sizeof(struct hsr_tag) != HSR_TAGLEN); - - dev_add_pack(&hsr_pt); - - init_timer(&prune_timer); - prune_timer.function = prune_nodes_all; - prune_timer.data = 0; - prune_timer.expires = jiffies + msecs_to_jiffies(PRUNE_PERIOD); - add_timer(&prune_timer); + BUILD_BUG_ON(sizeof(struct hsr_tag) != HSR_HLEN); register_netdevice_notifier(&hsr_nb); - res = hsr_netlink_init(); return res; @@ -459,9 +124,7 @@ static int __init hsr_init(void) static void __exit hsr_exit(void) { unregister_netdevice_notifier(&hsr_nb); - del_timer_sync(&prune_timer); hsr_netlink_exit(); - dev_remove_pack(&hsr_pt); } module_init(hsr_init); diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index 56fe060c0ab1..5a9c69962ded 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -1,4 +1,4 @@ -/* Copyright 2011-2013 Autronica Fire and Security AS +/* Copyright 2011-2014 Autronica Fire and Security AS * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -6,11 +6,11 @@ * any later version. * * Author(s): - * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + * 2011-2014 Arvid Brodin, arvid.brodin@alten.se */ -#ifndef _HSR_PRIVATE_H -#define _HSR_PRIVATE_H +#ifndef __HSR_PRIVATE_H +#define __HSR_PRIVATE_H #include <linux/netdevice.h> #include <linux/list.h> @@ -29,6 +29,7 @@ * each node differ before we notify of communication problem? */ #define MAX_SLAVE_DIFF 3000 /* ms */ +#define HSR_SEQNR_START (USHRT_MAX - 1024) /* How often shall we check for broken ring and remove node entries older than @@ -46,16 +47,16 @@ * path, LSDU_size, sequence Nr }. But we let eth_header() create { h_dest, * h_source, h_proto = 0x88FB }, and add { path, LSDU_size, sequence Nr, * encapsulated protocol } instead. + * + * Field names as defined in the IEC:2010 standard for HSR. */ -#define HSR_TAGLEN 6 - -/* Field names below as defined in the IEC:2010 standard for HSR. */ struct hsr_tag { __be16 path_and_LSDU_size; __be16 sequence_nr; __be16 encap_proto; } __packed; +#define HSR_HLEN 6 /* The helper functions below assumes that 'path' occupies the 4 most * significant bits of the 16-bit field shared by 'path' and 'LSDU_size' (or @@ -136,31 +137,47 @@ struct hsr_ethhdr_sp { } __packed; -enum hsr_dev_idx { - HSR_DEV_NONE = -1, - HSR_DEV_SLAVE_A = 0, - HSR_DEV_SLAVE_B, - HSR_DEV_MASTER, +enum hsr_port_type { + HSR_PT_NONE = 0, /* Must be 0, used by framereg */ + HSR_PT_SLAVE_A, + HSR_PT_SLAVE_B, + HSR_PT_INTERLINK, + HSR_PT_MASTER, + HSR_PT_PORTS, /* This must be the last item in the enum */ +}; + +struct hsr_port { + struct list_head port_list; + struct net_device *dev; + struct hsr_priv *hsr; + enum hsr_port_type type; }; -#define HSR_MAX_SLAVE (HSR_DEV_SLAVE_B + 1) -#define HSR_MAX_DEV (HSR_DEV_MASTER + 1) struct hsr_priv { - struct list_head hsr_list; /* List of hsr devices */ struct rcu_head rcu_head; - struct net_device *dev; - struct net_device *slave[HSR_MAX_SLAVE]; - struct list_head node_db; /* Other HSR nodes */ + struct list_head ports; + struct list_head node_db; /* Known HSR nodes */ struct list_head self_node_db; /* MACs of slaves */ struct timer_list announce_timer; /* Supervision frame dispatch */ + struct timer_list prune_timer; int announce_count; u16 sequence_nr; spinlock_t seqnr_lock; /* locking for sequence_nr */ unsigned char sup_multicast_addr[ETH_ALEN]; }; -void register_hsr_master(struct hsr_priv *hsr_priv); -void unregister_hsr_master(struct hsr_priv *hsr_priv); -bool is_hsr_slave(struct net_device *dev); +#define hsr_for_each_port(hsr, port) \ + list_for_each_entry_rcu((port), &(hsr)->ports, port_list) + +struct hsr_port *hsr_port_get_hsr(struct hsr_priv *hsr, enum hsr_port_type pt); + +/* Caller must ensure skb is a valid HSR frame */ +static inline u16 hsr_get_skb_sequence_nr(struct sk_buff *skb) +{ + struct hsr_ethhdr *hsr_ethhdr; + + hsr_ethhdr = (struct hsr_ethhdr *) skb_mac_header(skb); + return ntohs(hsr_ethhdr->hsr_tag.sequence_nr); +} -#endif /* _HSR_PRIVATE_H */ +#endif /* __HSR_PRIVATE_H */ diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 01a5261ac7a5..a2c7e4c0ac1e 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -1,4 +1,4 @@ -/* Copyright 2011-2013 Autronica Fire and Security AS +/* Copyright 2011-2014 Autronica Fire and Security AS * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -6,7 +6,7 @@ * any later version. * * Author(s): - * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + * 2011-2014 Arvid Brodin, arvid.brodin@alten.se * * Routines for handling Netlink messages for HSR. */ @@ -37,13 +37,17 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, struct net_device *link[2]; unsigned char multicast_spec; + if (!data) { + netdev_info(dev, "HSR: No slave devices specified\n"); + return -EINVAL; + } if (!data[IFLA_HSR_SLAVE1]) { - netdev_info(dev, "IFLA_HSR_SLAVE1 missing!\n"); + netdev_info(dev, "HSR: Slave1 device not specified\n"); return -EINVAL; } link[0] = __dev_get_by_index(src_net, nla_get_u32(data[IFLA_HSR_SLAVE1])); if (!data[IFLA_HSR_SLAVE2]) { - netdev_info(dev, "IFLA_HSR_SLAVE2 missing!\n"); + netdev_info(dev, "HSR: Slave2 device not specified\n"); return -EINVAL; } link[1] = __dev_get_by_index(src_net, nla_get_u32(data[IFLA_HSR_SLAVE2])); @@ -63,21 +67,33 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, static int hsr_fill_info(struct sk_buff *skb, const struct net_device *dev) { - struct hsr_priv *hsr_priv; + struct hsr_priv *hsr; + struct hsr_port *port; + int res; - hsr_priv = netdev_priv(dev); + hsr = netdev_priv(dev); - if (hsr_priv->slave[0]) - if (nla_put_u32(skb, IFLA_HSR_SLAVE1, hsr_priv->slave[0]->ifindex)) - goto nla_put_failure; + res = 0; - if (hsr_priv->slave[1]) - if (nla_put_u32(skb, IFLA_HSR_SLAVE2, hsr_priv->slave[1]->ifindex)) - goto nla_put_failure; + rcu_read_lock(); + port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_A); + if (port) + res = nla_put_u32(skb, IFLA_HSR_SLAVE1, port->dev->ifindex); + rcu_read_unlock(); + if (res) + goto nla_put_failure; + + rcu_read_lock(); + port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B); + if (port) + res = nla_put_u32(skb, IFLA_HSR_SLAVE2, port->dev->ifindex); + rcu_read_unlock(); + if (res) + goto nla_put_failure; if (nla_put(skb, IFLA_HSR_SUPERVISION_ADDR, ETH_ALEN, - hsr_priv->sup_multicast_addr) || - nla_put_u16(skb, IFLA_HSR_SEQ_NR, hsr_priv->sequence_nr)) + hsr->sup_multicast_addr) || + nla_put_u16(skb, IFLA_HSR_SEQ_NR, hsr->sequence_nr)) goto nla_put_failure; return 0; @@ -128,13 +144,13 @@ static const struct genl_multicast_group hsr_mcgrps[] = { * over one of the slave interfaces. This would indicate an open network ring * (i.e. a link has failed somewhere). */ -void hsr_nl_ringerror(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN], - enum hsr_dev_idx dev_idx) +void hsr_nl_ringerror(struct hsr_priv *hsr, unsigned char addr[ETH_ALEN], + struct hsr_port *port) { struct sk_buff *skb; void *msg_head; + struct hsr_port *master; int res; - int ifindex; skb = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb) @@ -148,11 +164,7 @@ void hsr_nl_ringerror(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN], if (res < 0) goto nla_put_failure; - if (hsr_priv->slave[dev_idx]) - ifindex = hsr_priv->slave[dev_idx]->ifindex; - else - ifindex = -1; - res = nla_put_u32(skb, HSR_A_IFINDEX, ifindex); + res = nla_put_u32(skb, HSR_A_IFINDEX, port->dev->ifindex); if (res < 0) goto nla_put_failure; @@ -165,16 +177,20 @@ nla_put_failure: kfree_skb(skb); fail: - netdev_warn(hsr_priv->dev, "Could not send HSR ring error message\n"); + rcu_read_lock(); + master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); + netdev_warn(master->dev, "Could not send HSR ring error message\n"); + rcu_read_unlock(); } /* This is called when we haven't heard from the node with MAC address addr for * some time (just before the node is removed from the node table/list). */ -void hsr_nl_nodedown(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN]) +void hsr_nl_nodedown(struct hsr_priv *hsr, unsigned char addr[ETH_ALEN]) { struct sk_buff *skb; void *msg_head; + struct hsr_port *master; int res; skb = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); @@ -199,7 +215,10 @@ nla_put_failure: kfree_skb(skb); fail: - netdev_warn(hsr_priv->dev, "Could not send HSR node down\n"); + rcu_read_lock(); + master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); + netdev_warn(master->dev, "Could not send HSR node down\n"); + rcu_read_unlock(); } @@ -220,7 +239,8 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info) /* For sending */ struct sk_buff *skb_out; void *msg_head; - struct hsr_priv *hsr_priv; + struct hsr_priv *hsr; + struct hsr_port *port; unsigned char hsr_node_addr_b[ETH_ALEN]; int hsr_node_if1_age; u16 hsr_node_if1_seq; @@ -267,8 +287,8 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info) if (res < 0) goto nla_put_failure; - hsr_priv = netdev_priv(hsr_dev); - res = hsr_get_node_data(hsr_priv, + hsr = netdev_priv(hsr_dev); + res = hsr_get_node_data(hsr, (unsigned char *) nla_data(info->attrs[HSR_A_NODE_ADDR]), hsr_node_addr_b, &addr_b_ifindex, @@ -301,9 +321,12 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info) res = nla_put_u16(skb_out, HSR_A_IF1_SEQ, hsr_node_if1_seq); if (res < 0) goto nla_put_failure; - if (hsr_priv->slave[0]) + rcu_read_lock(); + port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_A); + if (port) res = nla_put_u32(skb_out, HSR_A_IF1_IFINDEX, - hsr_priv->slave[0]->ifindex); + port->dev->ifindex); + rcu_read_unlock(); if (res < 0) goto nla_put_failure; @@ -313,9 +336,14 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info) res = nla_put_u16(skb_out, HSR_A_IF2_SEQ, hsr_node_if2_seq); if (res < 0) goto nla_put_failure; - if (hsr_priv->slave[1]) + rcu_read_lock(); + port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B); + if (port) res = nla_put_u32(skb_out, HSR_A_IF2_IFINDEX, - hsr_priv->slave[1]->ifindex); + port->dev->ifindex); + rcu_read_unlock(); + if (res < 0) + goto nla_put_failure; genlmsg_end(skb_out, msg_head); genlmsg_unicast(genl_info_net(info), skb_out, info->snd_portid); @@ -334,7 +362,7 @@ fail: return res; } -/* Get a list of MacAddressA of all nodes known to this node (other than self). +/* Get a list of MacAddressA of all nodes known to this node (including self). */ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info) { @@ -345,7 +373,7 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info) /* For sending */ struct sk_buff *skb_out; void *msg_head; - struct hsr_priv *hsr_priv; + struct hsr_priv *hsr; void *pos; unsigned char addr[ETH_ALEN]; int res; @@ -385,17 +413,17 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info) if (res < 0) goto nla_put_failure; - hsr_priv = netdev_priv(hsr_dev); + hsr = netdev_priv(hsr_dev); rcu_read_lock(); - pos = hsr_get_next_node(hsr_priv, NULL, addr); + pos = hsr_get_next_node(hsr, NULL, addr); while (pos) { res = nla_put(skb_out, HSR_A_NODE_ADDR, ETH_ALEN, addr); if (res < 0) { rcu_read_unlock(); goto nla_put_failure; } - pos = hsr_get_next_node(hsr_priv, pos, addr); + pos = hsr_get_next_node(hsr, pos, addr); } rcu_read_unlock(); diff --git a/net/hsr/hsr_netlink.h b/net/hsr/hsr_netlink.h index d4579dcc3c7d..3f6b95b5b6b8 100644 --- a/net/hsr/hsr_netlink.h +++ b/net/hsr/hsr_netlink.h @@ -1,4 +1,4 @@ -/* Copyright 2011-2013 Autronica Fire and Security AS +/* Copyright 2011-2014 Autronica Fire and Security AS * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -6,7 +6,7 @@ * any later version. * * Author(s): - * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + * 2011-2014 Arvid Brodin, arvid.brodin@alten.se */ #ifndef __HSR_NETLINK_H @@ -17,13 +17,14 @@ #include <uapi/linux/hsr_netlink.h> struct hsr_priv; +struct hsr_port; int __init hsr_netlink_init(void); void __exit hsr_netlink_exit(void); -void hsr_nl_ringerror(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN], - int dev_idx); -void hsr_nl_nodedown(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN]); +void hsr_nl_ringerror(struct hsr_priv *hsr, unsigned char addr[ETH_ALEN], + struct hsr_port *port); +void hsr_nl_nodedown(struct hsr_priv *hsr, unsigned char addr[ETH_ALEN]); void hsr_nl_framedrop(int dropcount, int dev_idx); void hsr_nl_linkdown(int dev_idx); diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c new file mode 100644 index 000000000000..a348dcbcd683 --- /dev/null +++ b/net/hsr/hsr_slave.c @@ -0,0 +1,196 @@ +/* Copyright 2011-2014 Autronica Fire and Security AS + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Author(s): + * 2011-2014 Arvid Brodin, arvid.brodin@alten.se + */ + +#include "hsr_slave.h" +#include <linux/etherdevice.h> +#include <linux/if_arp.h> +#include "hsr_main.h" +#include "hsr_device.h" +#include "hsr_forward.h" +#include "hsr_framereg.h" + + +static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb) +{ + struct sk_buff *skb = *pskb; + struct hsr_port *port; + + if (!skb_mac_header_was_set(skb)) { + WARN_ONCE(1, "%s: skb invalid", __func__); + return RX_HANDLER_PASS; + } + + rcu_read_lock(); /* hsr->node_db, hsr->ports */ + port = hsr_port_get_rcu(skb->dev); + + if (hsr_addr_is_self(port->hsr, eth_hdr(skb)->h_source)) { + /* Directly kill frames sent by ourselves */ + kfree_skb(skb); + goto finish_consume; + } + + if (eth_hdr(skb)->h_proto != htons(ETH_P_PRP)) + goto finish_pass; + + skb_push(skb, ETH_HLEN); + + hsr_forward_skb(skb, port); + +finish_consume: + rcu_read_unlock(); /* hsr->node_db, hsr->ports */ + return RX_HANDLER_CONSUMED; + +finish_pass: + rcu_read_unlock(); /* hsr->node_db, hsr->ports */ + return RX_HANDLER_PASS; +} + +bool hsr_port_exists(const struct net_device *dev) +{ + return rcu_access_pointer(dev->rx_handler) == hsr_handle_frame; +} + + +static int hsr_check_dev_ok(struct net_device *dev) +{ + /* Don't allow HSR on non-ethernet like devices */ + if ((dev->flags & IFF_LOOPBACK) || (dev->type != ARPHRD_ETHER) || + (dev->addr_len != ETH_ALEN)) { + netdev_info(dev, "Cannot use loopback or non-ethernet device as HSR slave.\n"); + return -EINVAL; + } + + /* Don't allow enslaving hsr devices */ + if (is_hsr_master(dev)) { + netdev_info(dev, "Cannot create trees of HSR devices.\n"); + return -EINVAL; + } + + if (hsr_port_exists(dev)) { + netdev_info(dev, "This device is already a HSR slave.\n"); + return -EINVAL; + } + + if (dev->priv_flags & IFF_802_1Q_VLAN) { + netdev_info(dev, "HSR on top of VLAN is not yet supported in this driver.\n"); + return -EINVAL; + } + + if (dev->priv_flags & IFF_DONT_BRIDGE) { + netdev_info(dev, "This device does not support bridging.\n"); + return -EOPNOTSUPP; + } + + /* HSR over bonded devices has not been tested, but I'm not sure it + * won't work... + */ + + return 0; +} + + +/* Setup device to be added to the HSR bridge. */ +static int hsr_portdev_setup(struct net_device *dev, struct hsr_port *port) +{ + int res; + + dev_hold(dev); + res = dev_set_promiscuity(dev, 1); + if (res) + goto fail_promiscuity; + + /* FIXME: + * What does net device "adjacency" mean? Should we do + * res = netdev_master_upper_dev_link(port->dev, port->hsr->dev); ? + */ + + res = netdev_rx_handler_register(dev, hsr_handle_frame, port); + if (res) + goto fail_rx_handler; + dev_disable_lro(dev); + + return 0; + +fail_rx_handler: + dev_set_promiscuity(dev, -1); +fail_promiscuity: + dev_put(dev); + + return res; +} + +int hsr_add_port(struct hsr_priv *hsr, struct net_device *dev, + enum hsr_port_type type) +{ + struct hsr_port *port, *master; + int res; + + if (type != HSR_PT_MASTER) { + res = hsr_check_dev_ok(dev); + if (res) + return res; + } + + port = hsr_port_get_hsr(hsr, type); + if (port != NULL) + return -EBUSY; /* This port already exists */ + + port = kzalloc(sizeof(*port), GFP_KERNEL); + if (port == NULL) + return -ENOMEM; + + if (type != HSR_PT_MASTER) { + res = hsr_portdev_setup(dev, port); + if (res) + goto fail_dev_setup; + } + + port->hsr = hsr; + port->dev = dev; + port->type = type; + + list_add_tail_rcu(&port->port_list, &hsr->ports); + synchronize_rcu(); + + master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); + netdev_update_features(master->dev); + dev_set_mtu(master->dev, hsr_get_max_mtu(hsr)); + + return 0; + +fail_dev_setup: + kfree(port); + return res; +} + +void hsr_del_port(struct hsr_port *port) +{ + struct hsr_priv *hsr; + struct hsr_port *master; + + hsr = port->hsr; + master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); + list_del_rcu(&port->port_list); + + if (port != master) { + netdev_update_features(master->dev); + dev_set_mtu(master->dev, hsr_get_max_mtu(hsr)); + netdev_rx_handler_unregister(port->dev); + dev_set_promiscuity(port->dev, -1); + } + + /* FIXME? + * netdev_upper_dev_unlink(port->dev, port->hsr->dev); + */ + + synchronize_rcu(); + dev_put(port->dev); +} diff --git a/net/hsr/hsr_slave.h b/net/hsr/hsr_slave.h new file mode 100644 index 000000000000..3ccfbf71c92e --- /dev/null +++ b/net/hsr/hsr_slave.h @@ -0,0 +1,38 @@ +/* Copyright 2011-2014 Autronica Fire and Security AS + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Author(s): + * 2011-2014 Arvid Brodin, arvid.brodin@alten.se + */ + +#ifndef __HSR_SLAVE_H +#define __HSR_SLAVE_H + +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/rtnetlink.h> +#include "hsr_main.h" + +int hsr_add_port(struct hsr_priv *hsr, struct net_device *dev, + enum hsr_port_type pt); +void hsr_del_port(struct hsr_port *port); +bool hsr_port_exists(const struct net_device *dev); + +static inline struct hsr_port *hsr_port_get_rtnl(const struct net_device *dev) +{ + ASSERT_RTNL(); + return hsr_port_exists(dev) ? + rtnl_dereference(dev->rx_handler_data) : NULL; +} + +static inline struct hsr_port *hsr_port_get_rcu(const struct net_device *dev) +{ + return hsr_port_exists(dev) ? + rcu_dereference(dev->rx_handler_data) : NULL; +} + +#endif /* __HSR_SLAVE_H */ diff --git a/net/ieee802154/6lowpan_rtnl.c b/net/ieee802154/6lowpan_rtnl.c index 0f5a69ed746d..016b77ee88f0 100644 --- a/net/ieee802154/6lowpan_rtnl.c +++ b/net/ieee802154/6lowpan_rtnl.c @@ -80,18 +80,19 @@ lowpan_dev_info *lowpan_dev_info(const struct net_device *dev) static inline void lowpan_address_flip(u8 *src, u8 *dest) { int i; + for (i = 0; i < IEEE802154_ADDR_LEN; i++) (dest)[IEEE802154_ADDR_LEN - i - 1] = (src)[i]; } -static int lowpan_header_create(struct sk_buff *skb, - struct net_device *dev, - unsigned short type, const void *_daddr, - const void *_saddr, unsigned int len) +static int lowpan_header_create(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *_daddr, + const void *_saddr, unsigned int len) { const u8 *saddr = _saddr; const u8 *daddr = _daddr; struct ieee802154_addr sa, da; + struct ieee802154_mac_cb *cb = mac_cb_init(skb); /* TODO: * if this package isn't ipv6 one, where should it be routed? @@ -115,8 +116,7 @@ static int lowpan_header_create(struct sk_buff *skb, * from MAC subif of the 'dev' and 'real_dev' network devices, but * this isn't implemented in mainline yet, so currently we assign 0xff */ - mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA; - mac_cb(skb)->seq = ieee802154_mlme_ops(dev)->get_dsn(dev); + cb->type = IEEE802154_FC_TYPE_DATA; /* prepare wpan address data */ sa.mode = IEEE802154_ADDR_LONG; @@ -135,17 +135,16 @@ static int lowpan_header_create(struct sk_buff *skb, } else { da.mode = IEEE802154_ADDR_LONG; da.extended_addr = ieee802154_devaddr_from_raw(daddr); - - /* request acknowledgment */ - mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ; } + cb->ackreq = !lowpan_is_addr_broadcast(daddr); + return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev, type, (void *)&da, (void *)&sa, 0); } static int lowpan_give_skb_to_devices(struct sk_buff *skb, - struct net_device *dev) + struct net_device *dev) { struct lowpan_dev_record *entry; struct sk_buff *skb_cp; @@ -221,162 +220,176 @@ static int lowpan_set_address(struct net_device *dev, void *p) return 0; } -static int -lowpan_fragment_xmit(struct sk_buff *skb, u8 *head, - int mlen, int plen, int offset, int type) +static struct sk_buff* +lowpan_alloc_frag(struct sk_buff *skb, int size, + const struct ieee802154_hdr *master_hdr) { + struct net_device *real_dev = lowpan_dev_info(skb->dev)->real_dev; struct sk_buff *frag; - int hlen; - - hlen = (type == LOWPAN_DISPATCH_FRAG1) ? - LOWPAN_FRAG1_HEAD_SIZE : LOWPAN_FRAGN_HEAD_SIZE; - - raw_dump_inline(__func__, "6lowpan fragment header", head, hlen); + int rc; + + frag = alloc_skb(real_dev->hard_header_len + + real_dev->needed_tailroom + size, + GFP_ATOMIC); + + if (likely(frag)) { + frag->dev = real_dev; + frag->priority = skb->priority; + skb_reserve(frag, real_dev->hard_header_len); + skb_reset_network_header(frag); + *mac_cb(frag) = *mac_cb(skb); + + rc = dev_hard_header(frag, real_dev, 0, &master_hdr->dest, + &master_hdr->source, size); + if (rc < 0) { + kfree_skb(frag); + return ERR_PTR(-rc); + } + } else { + frag = ERR_PTR(ENOMEM); + } - frag = netdev_alloc_skb(skb->dev, - hlen + mlen + plen + IEEE802154_MFR_SIZE); - if (!frag) - return -ENOMEM; + return frag; +} - frag->priority = skb->priority; +static int +lowpan_xmit_fragment(struct sk_buff *skb, const struct ieee802154_hdr *wpan_hdr, + u8 *frag_hdr, int frag_hdrlen, + int offset, int len) +{ + struct sk_buff *frag; - /* copy header, MFR and payload */ - skb_put(frag, mlen); - skb_copy_to_linear_data(frag, skb_mac_header(skb), mlen); + raw_dump_inline(__func__, " fragment header", frag_hdr, frag_hdrlen); - skb_put(frag, hlen); - skb_copy_to_linear_data_offset(frag, mlen, head, hlen); + frag = lowpan_alloc_frag(skb, frag_hdrlen + len, wpan_hdr); + if (IS_ERR(frag)) + return -PTR_ERR(frag); - skb_put(frag, plen); - skb_copy_to_linear_data_offset(frag, mlen + hlen, - skb_network_header(skb) + offset, plen); + memcpy(skb_put(frag, frag_hdrlen), frag_hdr, frag_hdrlen); + memcpy(skb_put(frag, len), skb_network_header(skb) + offset, len); - raw_dump_table(__func__, " raw fragment dump", frag->data, frag->len); + raw_dump_table(__func__, " fragment dump", frag->data, frag->len); return dev_queue_xmit(frag); } static int -lowpan_skb_fragmentation(struct sk_buff *skb, struct net_device *dev) +lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *dev, + const struct ieee802154_hdr *wpan_hdr) { - int err; - u16 dgram_offset, dgram_size, payload_length, header_length, - lowpan_size, frag_plen, offset; - __be16 tag; - u8 head[5]; - - header_length = skb->mac_len; - payload_length = skb->len - header_length; - tag = lowpan_dev_info(dev)->fragment_tag++; - lowpan_size = skb_network_header_len(skb); + u16 dgram_size, dgram_offset; + __be16 frag_tag; + u8 frag_hdr[5]; + int frag_cap, frag_len, payload_cap, rc; + int skb_unprocessed, skb_offset; + dgram_size = lowpan_uncompress_size(skb, &dgram_offset) - - header_length; + skb->mac_len; + frag_tag = lowpan_dev_info(dev)->fragment_tag++; - /* first fragment header */ - head[0] = LOWPAN_DISPATCH_FRAG1 | ((dgram_size >> 8) & 0x7); - head[1] = dgram_size & 0xff; - memcpy(head + 2, &tag, sizeof(tag)); + frag_hdr[0] = LOWPAN_DISPATCH_FRAG1 | ((dgram_size >> 8) & 0x07); + frag_hdr[1] = dgram_size & 0xff; + memcpy(frag_hdr + 2, &frag_tag, sizeof(frag_tag)); - /* calc the nearest payload length(divided to 8) for first fragment - * which fits into a IEEE802154_MTU - */ - frag_plen = round_down(IEEE802154_MTU - header_length - - LOWPAN_FRAG1_HEAD_SIZE - lowpan_size - - IEEE802154_MFR_SIZE, 8); - - err = lowpan_fragment_xmit(skb, head, header_length, - frag_plen + lowpan_size, 0, - LOWPAN_DISPATCH_FRAG1); - if (err) { - pr_debug("%s unable to send FRAG1 packet (tag: %d)", - __func__, tag); - goto exit; - } + payload_cap = ieee802154_max_payload(wpan_hdr); - offset = lowpan_size + frag_plen; - dgram_offset += frag_plen; + frag_len = round_down(payload_cap - LOWPAN_FRAG1_HEAD_SIZE - + skb_network_header_len(skb), 8); - /* next fragment header */ - head[0] &= ~LOWPAN_DISPATCH_FRAG1; - head[0] |= LOWPAN_DISPATCH_FRAGN; + skb_offset = skb_network_header_len(skb); + skb_unprocessed = skb->len - skb->mac_len - skb_offset; - frag_plen = round_down(IEEE802154_MTU - header_length - - LOWPAN_FRAGN_HEAD_SIZE - IEEE802154_MFR_SIZE, 8); + rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr, + LOWPAN_FRAG1_HEAD_SIZE, 0, + frag_len + skb_network_header_len(skb)); + if (rc) { + pr_debug("%s unable to send FRAG1 packet (tag: %d)", + __func__, frag_tag); + goto err; + } - while (payload_length - offset > 0) { - int len = frag_plen; + frag_hdr[0] &= ~LOWPAN_DISPATCH_FRAG1; + frag_hdr[0] |= LOWPAN_DISPATCH_FRAGN; + frag_cap = round_down(payload_cap - LOWPAN_FRAGN_HEAD_SIZE, 8); - head[4] = dgram_offset >> 3; + do { + dgram_offset += frag_len; + skb_offset += frag_len; + skb_unprocessed -= frag_len; + frag_len = min(frag_cap, skb_unprocessed); - if (payload_length - offset < len) - len = payload_length - offset; + frag_hdr[4] = dgram_offset >> 3; - err = lowpan_fragment_xmit(skb, head, header_length, len, - offset, LOWPAN_DISPATCH_FRAGN); - if (err) { + rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr, + LOWPAN_FRAGN_HEAD_SIZE, skb_offset, + frag_len); + if (rc) { pr_debug("%s unable to send a FRAGN packet. (tag: %d, offset: %d)\n", - __func__, tag, offset); - goto exit; + __func__, frag_tag, skb_offset); + goto err; } + } while (skb_unprocessed > frag_cap); - offset += len; - dgram_offset += len; - } + consume_skb(skb); + return NET_XMIT_SUCCESS; -exit: - return err; +err: + kfree_skb(skb); + return rc; } static netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev) { - int err = -1; + struct ieee802154_hdr wpan_hdr; + int max_single; pr_debug("package xmit\n"); - skb->dev = lowpan_dev_info(dev)->real_dev; - if (skb->dev == NULL) { - pr_debug("ERROR: no real wpan device found\n"); - goto error; + if (ieee802154_hdr_peek(skb, &wpan_hdr) < 0) { + kfree_skb(skb); + return NET_XMIT_DROP; } - /* Send directly if less than the MTU minus the 2 checksum bytes. */ - if (skb->len <= IEEE802154_MTU - IEEE802154_MFR_SIZE) { - err = dev_queue_xmit(skb); - goto out; - } + max_single = ieee802154_max_payload(&wpan_hdr); - pr_debug("frame is too big, fragmentation is needed\n"); - err = lowpan_skb_fragmentation(skb, dev); -error: - dev_kfree_skb(skb); -out: - if (err) - pr_debug("ERROR: xmit failed\n"); + if (skb_tail_pointer(skb) - skb_network_header(skb) <= max_single) { + skb->dev = lowpan_dev_info(dev)->real_dev; + return dev_queue_xmit(skb); + } else { + netdev_tx_t rc; - return (err < 0) ? NET_XMIT_DROP : err; + pr_debug("frame is too big, fragmentation is needed\n"); + rc = lowpan_xmit_fragmented(skb, dev, &wpan_hdr); + + return rc < 0 ? NET_XMIT_DROP : rc; + } } static struct wpan_phy *lowpan_get_phy(const struct net_device *dev) { struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; + return ieee802154_mlme_ops(real_dev)->get_phy(real_dev); } static __le16 lowpan_get_pan_id(const struct net_device *dev) { struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; + return ieee802154_mlme_ops(real_dev)->get_pan_id(real_dev); } static __le16 lowpan_get_short_addr(const struct net_device *dev) { struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; + return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev); } static u8 lowpan_get_dsn(const struct net_device *dev) { struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; + return ieee802154_mlme_ops(real_dev)->get_dsn(real_dev); } @@ -445,7 +458,7 @@ static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[]) } static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) + struct packet_type *pt, struct net_device *orig_dev) { struct ieee802154_hdr hdr; int ret; diff --git a/net/ieee802154/Kconfig b/net/ieee802154/Kconfig index 8af1330b3137..c0d4154d144f 100644 --- a/net/ieee802154/Kconfig +++ b/net/ieee802154/Kconfig @@ -12,13 +12,6 @@ config IEEE802154 config IEEE802154_6LOWPAN tristate "6lowpan support over IEEE 802.15.4" - depends on IEEE802154 && IPV6 - select 6LOWPAN_IPHC + depends on IEEE802154 && 6LOWPAN ---help--- IPv6 compression over IEEE 802.15.4. - -config 6LOWPAN_IPHC - tristate - ---help--- - 6lowpan compression code which is shared between IEEE 802.15.4 and Bluetooth - stacks. diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile index bf1b51497a41..3914b1ed4274 100644 --- a/net/ieee802154/Makefile +++ b/net/ieee802154/Makefile @@ -1,8 +1,7 @@ obj-$(CONFIG_IEEE802154) += ieee802154.o af_802154.o -obj-$(CONFIG_IEEE802154_6LOWPAN) += 6lowpan.o -obj-$(CONFIG_6LOWPAN_IPHC) += 6lowpan_iphc.o +obj-$(CONFIG_IEEE802154_6LOWPAN) += ieee802154_6lowpan.o -6lowpan-y := 6lowpan_rtnl.o reassembly.o +ieee802154_6lowpan-y := 6lowpan_rtnl.o reassembly.o ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o \ header_ops.o af_802154-y := af_ieee802154.o raw.o dgram.o diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c index 351d9a94ec2f..29e0de63001b 100644 --- a/net/ieee802154/af_ieee802154.c +++ b/net/ieee802154/af_ieee802154.c @@ -40,9 +40,7 @@ #include "af802154.h" -/* - * Utility function for families - */ +/* Utility function for families */ struct net_device* ieee802154_get_dev(struct net *net, const struct ieee802154_addr *addr) { @@ -87,8 +85,8 @@ ieee802154_get_dev(struct net *net, const struct ieee802154_addr *addr) rtnl_unlock(); break; default: - pr_warning("Unsupported ieee802154 address type: %d\n", - addr->mode); + pr_warn("Unsupported ieee802154 address type: %d\n", + addr->mode); break; } @@ -106,7 +104,7 @@ static int ieee802154_sock_release(struct socket *sock) return 0; } static int ieee802154_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) + struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; @@ -114,7 +112,7 @@ static int ieee802154_sock_sendmsg(struct kiocb *iocb, struct socket *sock, } static int ieee802154_sock_bind(struct socket *sock, struct sockaddr *uaddr, - int addr_len) + int addr_len) { struct sock *sk = sock->sk; @@ -125,7 +123,7 @@ static int ieee802154_sock_bind(struct socket *sock, struct sockaddr *uaddr, } static int ieee802154_sock_connect(struct socket *sock, struct sockaddr *uaddr, - int addr_len, int flags) + int addr_len, int flags) { struct sock *sk = sock->sk; @@ -139,7 +137,7 @@ static int ieee802154_sock_connect(struct socket *sock, struct sockaddr *uaddr, } static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg, - unsigned int cmd) + unsigned int cmd) { struct ifreq ifr; int ret = -ENOIOCTLCMD; @@ -167,7 +165,7 @@ static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg, } static int ieee802154_sock_ioctl(struct socket *sock, unsigned int cmd, - unsigned long arg) + unsigned long arg) { struct sock *sk = sock->sk; @@ -238,8 +236,7 @@ static const struct proto_ops ieee802154_dgram_ops = { }; -/* - * Create a socket. Initialise the socket, blank the addresses +/* Create a socket. Initialise the socket, blank the addresses * set the state. */ static int ieee802154_create(struct net *net, struct socket *sock, @@ -301,13 +298,14 @@ static const struct net_proto_family ieee802154_family_ops = { }; static int ieee802154_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) + struct packet_type *pt, struct net_device *orig_dev) { if (!netif_running(dev)) goto drop; pr_debug("got frame, type %d, dev %p\n", dev->type, dev); #ifdef DEBUG - print_hex_dump_bytes("ieee802154_rcv ", DUMP_PREFIX_NONE, skb->data, skb->len); + print_hex_dump_bytes("ieee802154_rcv ", + DUMP_PREFIX_NONE, skb->data, skb->len); #endif if (!net_eq(dev_net(dev), &init_net)) diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c index 786437bc0c08..ef2ad8aaef13 100644 --- a/net/ieee802154/dgram.c +++ b/net/ieee802154/dgram.c @@ -21,6 +21,7 @@ * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> */ +#include <linux/capability.h> #include <linux/net.h> #include <linux/module.h> #include <linux/if_arp.h> @@ -45,7 +46,12 @@ struct dgram_sock { struct ieee802154_addr dst_addr; unsigned int bound:1; + unsigned int connected:1; unsigned int want_ack:1; + unsigned int secen:1; + unsigned int secen_override:1; + unsigned int seclevel:3; + unsigned int seclevel_override:1; }; static inline struct dgram_sock *dgram_sk(const struct sock *sk) @@ -73,10 +79,7 @@ static int dgram_init(struct sock *sk) { struct dgram_sock *ro = dgram_sk(sk); - ro->dst_addr.mode = IEEE802154_ADDR_LONG; - ro->dst_addr.pan_id = cpu_to_le16(IEEE802154_PANID_BROADCAST); ro->want_ack = 1; - memset(&ro->dst_addr.extended_addr, 0xff, IEEE802154_ADDR_LEN); return 0; } @@ -146,8 +149,7 @@ static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg) spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); if (skb != NULL) { - /* - * We will only return the amount + /* We will only return the amount * of this packet since that is all * that will be read. */ @@ -158,12 +160,13 @@ static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg) } } + return -ENOIOCTLCMD; } /* FIXME: autobind */ static int dgram_connect(struct sock *sk, struct sockaddr *uaddr, - int len) + int len) { struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr; struct dgram_sock *ro = dgram_sk(sk); @@ -183,6 +186,7 @@ static int dgram_connect(struct sock *sk, struct sockaddr *uaddr, } ieee802154_addr_from_sa(&ro->dst_addr, &addr->addr); + ro->connected = 1; out: release_sock(sk); @@ -194,22 +198,21 @@ static int dgram_disconnect(struct sock *sk, int flags) struct dgram_sock *ro = dgram_sk(sk); lock_sock(sk); - - ro->dst_addr.mode = IEEE802154_ADDR_LONG; - memset(&ro->dst_addr.extended_addr, 0xff, IEEE802154_ADDR_LEN); - + ro->connected = 0; release_sock(sk); return 0; } static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t size) + struct msghdr *msg, size_t size) { struct net_device *dev; unsigned int mtu; struct sk_buff *skb; + struct ieee802154_mac_cb *cb; struct dgram_sock *ro = dgram_sk(sk); + struct ieee802154_addr dst_addr; int hlen, tlen; int err; @@ -218,6 +221,11 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk, return -EOPNOTSUPP; } + if (!ro->connected && !msg->msg_name) + return -EDESTADDRREQ; + else if (ro->connected && msg->msg_name) + return -EISCONN; + if (!ro->bound) dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154); else @@ -240,8 +248,8 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk, hlen = LL_RESERVED_SPACE(dev); tlen = dev->needed_tailroom; skb = sock_alloc_send_skb(sk, hlen + tlen + size, - msg->msg_flags & MSG_DONTWAIT, - &err); + msg->msg_flags & MSG_DONTWAIT, + &err); if (!skb) goto out_dev; @@ -249,18 +257,29 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk, skb_reset_network_header(skb); - mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA; - if (ro->want_ack) - mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ; + cb = mac_cb_init(skb); + cb->type = IEEE802154_FC_TYPE_DATA; + cb->ackreq = ro->want_ack; + + if (msg->msg_name) { + DECLARE_SOCKADDR(struct sockaddr_ieee802154*, + daddr, msg->msg_name); - mac_cb(skb)->seq = ieee802154_mlme_ops(dev)->get_dsn(dev); - err = dev_hard_header(skb, dev, ETH_P_IEEE802154, &ro->dst_addr, - ro->bound ? &ro->src_addr : NULL, size); + ieee802154_addr_from_sa(&dst_addr, &daddr->addr); + } else { + dst_addr = ro->dst_addr; + } + + cb->secen = ro->secen; + cb->secen_override = ro->secen_override; + cb->seclevel = ro->seclevel; + cb->seclevel_override = ro->seclevel_override; + + err = dev_hard_header(skb, dev, ETH_P_IEEE802154, &dst_addr, + ro->bound ? &ro->src_addr : NULL, size); if (err < 0) goto out_skb; - skb_reset_mac_header(skb); - err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); if (err < 0) goto out_skb; @@ -286,8 +305,8 @@ out: } static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int noblock, int flags, - int *addr_len) + struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len) { size_t copied = 0; int err = -EOPNOTSUPP; @@ -380,6 +399,7 @@ int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb) dgram_sk(sk))) { if (prev) { struct sk_buff *clone; + clone = skb_clone(skb, GFP_ATOMIC); if (clone) dgram_rcv_skb(prev, clone); @@ -389,9 +409,9 @@ int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb) } } - if (prev) + if (prev) { dgram_rcv_skb(prev, skb); - else { + } else { kfree_skb(skb); ret = NET_RX_DROP; } @@ -401,7 +421,7 @@ int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb) } static int dgram_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) + char __user *optval, int __user *optlen) { struct dgram_sock *ro = dgram_sk(sk); @@ -419,6 +439,20 @@ static int dgram_getsockopt(struct sock *sk, int level, int optname, case WPAN_WANTACK: val = ro->want_ack; break; + case WPAN_SECURITY: + if (!ro->secen_override) + val = WPAN_SECURITY_DEFAULT; + else if (ro->secen) + val = WPAN_SECURITY_ON; + else + val = WPAN_SECURITY_OFF; + break; + case WPAN_SECURITY_LEVEL: + if (!ro->seclevel_override) + val = WPAN_SECURITY_LEVEL_DEFAULT; + else + val = ro->seclevel; + break; default: return -ENOPROTOOPT; } @@ -431,9 +465,10 @@ static int dgram_getsockopt(struct sock *sk, int level, int optname, } static int dgram_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) + char __user *optval, unsigned int optlen) { struct dgram_sock *ro = dgram_sk(sk); + struct net *net = sock_net(sk); int val; int err = 0; @@ -449,6 +484,47 @@ static int dgram_setsockopt(struct sock *sk, int level, int optname, case WPAN_WANTACK: ro->want_ack = !!val; break; + case WPAN_SECURITY: + if (!ns_capable(net->user_ns, CAP_NET_ADMIN) && + !ns_capable(net->user_ns, CAP_NET_RAW)) { + err = -EPERM; + break; + } + + switch (val) { + case WPAN_SECURITY_DEFAULT: + ro->secen_override = 0; + break; + case WPAN_SECURITY_ON: + ro->secen_override = 1; + ro->secen = 1; + break; + case WPAN_SECURITY_OFF: + ro->secen_override = 1; + ro->secen = 0; + break; + default: + err = -EINVAL; + break; + } + break; + case WPAN_SECURITY_LEVEL: + if (!ns_capable(net->user_ns, CAP_NET_ADMIN) && + !ns_capable(net->user_ns, CAP_NET_RAW)) { + err = -EPERM; + break; + } + + if (val < WPAN_SECURITY_LEVEL_DEFAULT || + val > IEEE802154_SCF_SECLEVEL_ENC_MIC128) { + err = -EINVAL; + } else if (val == WPAN_SECURITY_LEVEL_DEFAULT) { + ro->seclevel_override = 0; + } else { + ro->seclevel_override = 1; + ro->seclevel = val; + } + break; default: err = -ENOPROTOOPT; break; diff --git a/net/ieee802154/header_ops.c b/net/ieee802154/header_ops.c index bed42a48408c..c09294e39ca6 100644 --- a/net/ieee802154/header_ops.c +++ b/net/ieee802154/header_ops.c @@ -195,15 +195,16 @@ ieee802154_hdr_get_sechdr(const u8 *buf, struct ieee802154_sechdr *hdr) return pos; } +static int ieee802154_sechdr_lengths[4] = { + [IEEE802154_SCF_KEY_IMPLICIT] = 5, + [IEEE802154_SCF_KEY_INDEX] = 6, + [IEEE802154_SCF_KEY_SHORT_INDEX] = 10, + [IEEE802154_SCF_KEY_HW_INDEX] = 14, +}; + static int ieee802154_hdr_sechdr_len(u8 sc) { - switch (IEEE802154_SCF_KEY_ID_MODE(sc)) { - case IEEE802154_SCF_KEY_IMPLICIT: return 5; - case IEEE802154_SCF_KEY_INDEX: return 6; - case IEEE802154_SCF_KEY_SHORT_INDEX: return 10; - case IEEE802154_SCF_KEY_HW_INDEX: return 14; - default: return -EINVAL; - } + return ieee802154_sechdr_lengths[IEEE802154_SCF_KEY_ID_MODE(sc)]; } static int ieee802154_hdr_minlen(const struct ieee802154_hdr *hdr) @@ -285,3 +286,40 @@ ieee802154_hdr_peek_addrs(const struct sk_buff *skb, struct ieee802154_hdr *hdr) return pos; } EXPORT_SYMBOL_GPL(ieee802154_hdr_peek_addrs); + +int +ieee802154_hdr_peek(const struct sk_buff *skb, struct ieee802154_hdr *hdr) +{ + const u8 *buf = skb_mac_header(skb); + int pos; + + pos = ieee802154_hdr_peek_addrs(skb, hdr); + if (pos < 0) + return -EINVAL; + + if (hdr->fc.security_enabled) { + u8 key_id_mode = IEEE802154_SCF_KEY_ID_MODE(*(buf + pos)); + int want = pos + ieee802154_sechdr_lengths[key_id_mode]; + + if (buf + want > skb_tail_pointer(skb)) + return -EINVAL; + + pos += ieee802154_hdr_get_sechdr(buf + pos, &hdr->sec); + } + + return pos; +} +EXPORT_SYMBOL_GPL(ieee802154_hdr_peek); + +int ieee802154_max_payload(const struct ieee802154_hdr *hdr) +{ + int hlen = ieee802154_hdr_minlen(hdr); + + if (hdr->fc.security_enabled) { + hlen += ieee802154_sechdr_lengths[hdr->sec.key_id_mode] - 1; + hlen += ieee802154_sechdr_authtag_len(&hdr->sec); + } + + return IEEE802154_MTU - hlen - IEEE802154_MFR_SIZE; +} +EXPORT_SYMBOL_GPL(ieee802154_max_payload); diff --git a/net/ieee802154/ieee802154.h b/net/ieee802154/ieee802154.h index 6693a5cf01ce..5d352f86979e 100644 --- a/net/ieee802154/ieee802154.h +++ b/net/ieee802154/ieee802154.h @@ -43,7 +43,7 @@ struct genl_info; struct sk_buff *ieee802154_nl_create(int flags, u8 req); int ieee802154_nl_mcast(struct sk_buff *msg, unsigned int group); struct sk_buff *ieee802154_nl_new_reply(struct genl_info *info, - int flags, u8 req); + int flags, u8 req); int ieee802154_nl_reply(struct sk_buff *msg, struct genl_info *info); extern struct genl_family nl802154_family; @@ -68,4 +68,23 @@ int ieee802154_list_iface(struct sk_buff *skb, struct genl_info *info); int ieee802154_dump_iface(struct sk_buff *skb, struct netlink_callback *cb); int ieee802154_set_macparams(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_getparams(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_setparams(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_add_key(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_del_key(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_dump_keys(struct sk_buff *skb, + struct netlink_callback *cb); +int ieee802154_llsec_add_dev(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_del_dev(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_dump_devs(struct sk_buff *skb, + struct netlink_callback *cb); +int ieee802154_llsec_add_devkey(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_del_devkey(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_dump_devkeys(struct sk_buff *skb, + struct netlink_callback *cb); +int ieee802154_llsec_add_seclevel(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_del_seclevel(struct sk_buff *skb, struct genl_info *info); +int ieee802154_llsec_dump_seclevels(struct sk_buff *skb, + struct netlink_callback *cb); + #endif diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index 04b20589d97a..9222966f5e6d 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -52,7 +52,7 @@ struct sk_buff *ieee802154_nl_create(int flags, u8 req) spin_lock_irqsave(&ieee802154_seq_lock, f); hdr = genlmsg_put(msg, 0, ieee802154_seq_num++, - &nl802154_family, flags, req); + &nl802154_family, flags, req); spin_unlock_irqrestore(&ieee802154_seq_lock, f); if (!hdr) { nlmsg_free(msg); @@ -86,7 +86,7 @@ struct sk_buff *ieee802154_nl_new_reply(struct genl_info *info, return NULL; hdr = genlmsg_put_reply(msg, info, - &nl802154_family, flags, req); + &nl802154_family, flags, req); if (!hdr) { nlmsg_free(msg); return NULL; @@ -124,6 +124,26 @@ static const struct genl_ops ieee8021154_ops[] = { IEEE802154_DUMP(IEEE802154_LIST_IFACE, ieee802154_list_iface, ieee802154_dump_iface), IEEE802154_OP(IEEE802154_SET_MACPARAMS, ieee802154_set_macparams), + IEEE802154_OP(IEEE802154_LLSEC_GETPARAMS, ieee802154_llsec_getparams), + IEEE802154_OP(IEEE802154_LLSEC_SETPARAMS, ieee802154_llsec_setparams), + IEEE802154_DUMP(IEEE802154_LLSEC_LIST_KEY, NULL, + ieee802154_llsec_dump_keys), + IEEE802154_OP(IEEE802154_LLSEC_ADD_KEY, ieee802154_llsec_add_key), + IEEE802154_OP(IEEE802154_LLSEC_DEL_KEY, ieee802154_llsec_del_key), + IEEE802154_DUMP(IEEE802154_LLSEC_LIST_DEV, NULL, + ieee802154_llsec_dump_devs), + IEEE802154_OP(IEEE802154_LLSEC_ADD_DEV, ieee802154_llsec_add_dev), + IEEE802154_OP(IEEE802154_LLSEC_DEL_DEV, ieee802154_llsec_del_dev), + IEEE802154_DUMP(IEEE802154_LLSEC_LIST_DEVKEY, NULL, + ieee802154_llsec_dump_devkeys), + IEEE802154_OP(IEEE802154_LLSEC_ADD_DEVKEY, ieee802154_llsec_add_devkey), + IEEE802154_OP(IEEE802154_LLSEC_DEL_DEVKEY, ieee802154_llsec_del_devkey), + IEEE802154_DUMP(IEEE802154_LLSEC_LIST_SECLEVEL, NULL, + ieee802154_llsec_dump_seclevels), + IEEE802154_OP(IEEE802154_LLSEC_ADD_SECLEVEL, + ieee802154_llsec_add_seclevel), + IEEE802154_OP(IEEE802154_LLSEC_DEL_SECLEVEL, + ieee802154_llsec_del_seclevel), }; static const struct genl_multicast_group ieee802154_mcgrps[] = { diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index 5d285498c0f6..c6bfe22bfa5e 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -60,7 +60,8 @@ static __le16 nla_get_shortaddr(const struct nlattr *nla) } int ieee802154_nl_assoc_indic(struct net_device *dev, - struct ieee802154_addr *addr, u8 cap) + struct ieee802154_addr *addr, + u8 cap) { struct sk_buff *msg; @@ -93,7 +94,7 @@ nla_put_failure: EXPORT_SYMBOL(ieee802154_nl_assoc_indic); int ieee802154_nl_assoc_confirm(struct net_device *dev, __le16 short_addr, - u8 status) + u8 status) { struct sk_buff *msg; @@ -119,7 +120,8 @@ nla_put_failure: EXPORT_SYMBOL(ieee802154_nl_assoc_confirm); int ieee802154_nl_disassoc_indic(struct net_device *dev, - struct ieee802154_addr *addr, u8 reason) + struct ieee802154_addr *addr, + u8 reason) { struct sk_buff *msg; @@ -205,8 +207,9 @@ nla_put_failure: EXPORT_SYMBOL(ieee802154_nl_beacon_indic); int ieee802154_nl_scan_confirm(struct net_device *dev, - u8 status, u8 scan_type, u32 unscanned, u8 page, - u8 *edl/* , struct list_head *pan_desc_list */) + u8 status, u8 scan_type, + u32 unscanned, u8 page, + u8 *edl/* , struct list_head *pan_desc_list */) { struct sk_buff *msg; @@ -260,7 +263,7 @@ nla_put_failure: EXPORT_SYMBOL(ieee802154_nl_start_confirm); static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid, - u32 seq, int flags, struct net_device *dev) + u32 seq, int flags, struct net_device *dev) { void *hdr; struct wpan_phy *phy; @@ -270,7 +273,7 @@ static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid, pr_debug("%s\n", __func__); hdr = genlmsg_put(msg, 0, seq, &nl802154_family, flags, - IEEE802154_LIST_IFACE); + IEEE802154_LIST_IFACE); if (!hdr) goto out; @@ -330,14 +333,16 @@ static struct net_device *ieee802154_nl_get_dev(struct genl_info *info) if (info->attrs[IEEE802154_ATTR_DEV_NAME]) { char name[IFNAMSIZ + 1]; + nla_strlcpy(name, info->attrs[IEEE802154_ATTR_DEV_NAME], - sizeof(name)); + sizeof(name)); dev = dev_get_by_name(&init_net, name); - } else if (info->attrs[IEEE802154_ATTR_DEV_INDEX]) + } else if (info->attrs[IEEE802154_ATTR_DEV_INDEX]) { dev = dev_get_by_index(&init_net, nla_get_u32(info->attrs[IEEE802154_ATTR_DEV_INDEX])); - else + } else { return NULL; + } if (!dev) return NULL; @@ -435,7 +440,7 @@ int ieee802154_disassociate_req(struct sk_buff *skb, struct genl_info *info) int ret = -EOPNOTSUPP; if ((!info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] && - !info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]) || + !info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]) || !info->attrs[IEEE802154_ATTR_REASON]) return -EINVAL; @@ -464,8 +469,7 @@ out: return ret; } -/* - * PANid, channel, beacon_order = 15, superframe_order = 15, +/* PANid, channel, beacon_order = 15, superframe_order = 15, * PAN_coordinator, battery_life_extension = 0, * coord_realignment = 0, security_enable = 0 */ @@ -559,8 +563,8 @@ int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info) page = 0; - ret = ieee802154_mlme_ops(dev)->scan_req(dev, type, channels, page, - duration); + ret = ieee802154_mlme_ops(dev)->scan_req(dev, type, channels, + page, duration); out: dev_put(dev); @@ -570,7 +574,8 @@ out: int ieee802154_list_iface(struct sk_buff *skb, struct genl_info *info) { /* Request for interface name, index, type, IEEE address, - PAN Id, short address */ + * PAN Id, short address + */ struct sk_buff *msg; struct net_device *dev = NULL; int rc = -ENOBUFS; @@ -586,7 +591,7 @@ int ieee802154_list_iface(struct sk_buff *skb, struct genl_info *info) goto out_dev; rc = ieee802154_nl_fill_iface(msg, info->snd_portid, info->snd_seq, - 0, dev); + 0, dev); if (rc < 0) goto out_free; @@ -598,7 +603,6 @@ out_free: out_dev: dev_put(dev); return rc; - } int ieee802154_dump_iface(struct sk_buff *skb, struct netlink_callback *cb) @@ -616,7 +620,8 @@ int ieee802154_dump_iface(struct sk_buff *skb, struct netlink_callback *cb) goto cont; if (ieee802154_nl_fill_iface(skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, dev) < 0) + cb->nlh->nlmsg_seq, + NLM_F_MULTI, dev) < 0) break; cont: idx++; @@ -715,3 +720,813 @@ out: dev_put(dev); return rc; } + + + +static int +ieee802154_llsec_parse_key_id(struct genl_info *info, + struct ieee802154_llsec_key_id *desc) +{ + memset(desc, 0, sizeof(*desc)); + + if (!info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE]) + return -EINVAL; + + desc->mode = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE]); + + if (desc->mode == IEEE802154_SCF_KEY_IMPLICIT) { + if (!info->attrs[IEEE802154_ATTR_PAN_ID] && + !(info->attrs[IEEE802154_ATTR_SHORT_ADDR] || + info->attrs[IEEE802154_ATTR_HW_ADDR])) + return -EINVAL; + + desc->device_addr.pan_id = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_PAN_ID]); + + if (info->attrs[IEEE802154_ATTR_SHORT_ADDR]) { + desc->device_addr.mode = IEEE802154_ADDR_SHORT; + desc->device_addr.short_addr = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_SHORT_ADDR]); + } else { + desc->device_addr.mode = IEEE802154_ADDR_LONG; + desc->device_addr.extended_addr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]); + } + } + + if (desc->mode != IEEE802154_SCF_KEY_IMPLICIT && + !info->attrs[IEEE802154_ATTR_LLSEC_KEY_ID]) + return -EINVAL; + + if (desc->mode == IEEE802154_SCF_KEY_SHORT_INDEX && + !info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT]) + return -EINVAL; + + if (desc->mode == IEEE802154_SCF_KEY_HW_INDEX && + !info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED]) + return -EINVAL; + + if (desc->mode != IEEE802154_SCF_KEY_IMPLICIT) + desc->id = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_KEY_ID]); + + switch (desc->mode) { + case IEEE802154_SCF_KEY_SHORT_INDEX: + { + u32 source = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT]); + + desc->short_source = cpu_to_le32(source); + break; + } + case IEEE802154_SCF_KEY_HW_INDEX: + desc->extended_source = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED]); + break; + } + + return 0; +} + +static int +ieee802154_llsec_fill_key_id(struct sk_buff *msg, + const struct ieee802154_llsec_key_id *desc) +{ + if (nla_put_u8(msg, IEEE802154_ATTR_LLSEC_KEY_MODE, desc->mode)) + return -EMSGSIZE; + + if (desc->mode == IEEE802154_SCF_KEY_IMPLICIT) { + if (nla_put_shortaddr(msg, IEEE802154_ATTR_PAN_ID, + desc->device_addr.pan_id)) + return -EMSGSIZE; + + if (desc->device_addr.mode == IEEE802154_ADDR_SHORT && + nla_put_shortaddr(msg, IEEE802154_ATTR_SHORT_ADDR, + desc->device_addr.short_addr)) + return -EMSGSIZE; + + if (desc->device_addr.mode == IEEE802154_ADDR_LONG && + nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, + desc->device_addr.extended_addr)) + return -EMSGSIZE; + } + + if (desc->mode != IEEE802154_SCF_KEY_IMPLICIT && + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_KEY_ID, desc->id)) + return -EMSGSIZE; + + if (desc->mode == IEEE802154_SCF_KEY_SHORT_INDEX && + nla_put_u32(msg, IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT, + le32_to_cpu(desc->short_source))) + return -EMSGSIZE; + + if (desc->mode == IEEE802154_SCF_KEY_HW_INDEX && + nla_put_hwaddr(msg, IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED, + desc->extended_source)) + return -EMSGSIZE; + + return 0; +} + +int ieee802154_llsec_getparams(struct sk_buff *skb, struct genl_info *info) +{ + struct sk_buff *msg; + struct net_device *dev = NULL; + int rc = -ENOBUFS; + struct ieee802154_mlme_ops *ops; + void *hdr; + struct ieee802154_llsec_params params; + + pr_debug("%s\n", __func__); + + dev = ieee802154_nl_get_dev(info); + if (!dev) + return -ENODEV; + + ops = ieee802154_mlme_ops(dev); + if (!ops->llsec) { + rc = -EOPNOTSUPP; + goto out_dev; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + goto out_dev; + + hdr = genlmsg_put(msg, 0, info->snd_seq, &nl802154_family, 0, + IEEE802154_LLSEC_GETPARAMS); + if (!hdr) + goto out_free; + + rc = ops->llsec->get_params(dev, ¶ms); + if (rc < 0) + goto out_free; + + if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || + nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_ENABLED, params.enabled) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_SECLEVEL, params.out_level) || + nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER, + be32_to_cpu(params.frame_counter)) || + ieee802154_llsec_fill_key_id(msg, ¶ms.out_key)) + goto out_free; + + dev_put(dev); + + return ieee802154_nl_reply(msg, info); +out_free: + nlmsg_free(msg); +out_dev: + dev_put(dev); + return rc; +} + +int ieee802154_llsec_setparams(struct sk_buff *skb, struct genl_info *info) +{ + struct net_device *dev = NULL; + int rc = -EINVAL; + struct ieee802154_mlme_ops *ops; + struct ieee802154_llsec_params params; + int changed = 0; + + pr_debug("%s\n", __func__); + + dev = ieee802154_nl_get_dev(info); + if (!dev) + return -ENODEV; + + if (!info->attrs[IEEE802154_ATTR_LLSEC_ENABLED] && + !info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE] && + !info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]) + goto out; + + ops = ieee802154_mlme_ops(dev); + if (!ops->llsec) { + rc = -EOPNOTSUPP; + goto out; + } + + if (info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL] && + nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]) > 7) + goto out; + + if (info->attrs[IEEE802154_ATTR_LLSEC_ENABLED]) { + params.enabled = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_ENABLED]); + changed |= IEEE802154_LLSEC_PARAM_ENABLED; + } + + if (info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE]) { + if (ieee802154_llsec_parse_key_id(info, ¶ms.out_key)) + goto out; + + changed |= IEEE802154_LLSEC_PARAM_OUT_KEY; + } + + if (info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]) { + params.out_level = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]); + changed |= IEEE802154_LLSEC_PARAM_OUT_LEVEL; + } + + if (info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]) { + u32 fc = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]); + + params.frame_counter = cpu_to_be32(fc); + changed |= IEEE802154_LLSEC_PARAM_FRAME_COUNTER; + } + + rc = ops->llsec->set_params(dev, ¶ms, changed); + + dev_put(dev); + + return rc; +out: + dev_put(dev); + return rc; +} + + + +struct llsec_dump_data { + struct sk_buff *skb; + int s_idx, s_idx2; + int portid; + int nlmsg_seq; + struct net_device *dev; + struct ieee802154_mlme_ops *ops; + struct ieee802154_llsec_table *table; +}; + +static int +ieee802154_llsec_dump_table(struct sk_buff *skb, struct netlink_callback *cb, + int (*step)(struct llsec_dump_data *)) +{ + struct net *net = sock_net(skb->sk); + struct net_device *dev; + struct llsec_dump_data data; + int idx = 0; + int first_dev = cb->args[0]; + int rc; + + for_each_netdev(net, dev) { + if (idx < first_dev || dev->type != ARPHRD_IEEE802154) + goto skip; + + data.ops = ieee802154_mlme_ops(dev); + if (!data.ops->llsec) + goto skip; + + data.skb = skb; + data.s_idx = cb->args[1]; + data.s_idx2 = cb->args[2]; + data.dev = dev; + data.portid = NETLINK_CB(cb->skb).portid; + data.nlmsg_seq = cb->nlh->nlmsg_seq; + + data.ops->llsec->lock_table(dev); + data.ops->llsec->get_table(data.dev, &data.table); + rc = step(&data); + data.ops->llsec->unlock_table(dev); + + if (rc < 0) + break; + +skip: + idx++; + } + cb->args[0] = idx; + + return skb->len; +} + +static int +ieee802154_nl_llsec_change(struct sk_buff *skb, struct genl_info *info, + int (*fn)(struct net_device*, struct genl_info*)) +{ + struct net_device *dev = NULL; + int rc = -EINVAL; + + dev = ieee802154_nl_get_dev(info); + if (!dev) + return -ENODEV; + + if (!ieee802154_mlme_ops(dev)->llsec) + rc = -EOPNOTSUPP; + else + rc = fn(dev, info); + + dev_put(dev); + return rc; +} + + + +static int +ieee802154_llsec_parse_key(struct genl_info *info, + struct ieee802154_llsec_key *key) +{ + u8 frames; + u32 commands[256 / 32]; + + memset(key, 0, sizeof(*key)); + + if (!info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES] || + !info->attrs[IEEE802154_ATTR_LLSEC_KEY_BYTES]) + return -EINVAL; + + frames = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES]); + if ((frames & BIT(IEEE802154_FC_TYPE_MAC_CMD)) && + !info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS]) + return -EINVAL; + + if (info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS]) { + nla_memcpy(commands, + info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS], + 256 / 8); + + if (commands[0] || commands[1] || commands[2] || commands[3] || + commands[4] || commands[5] || commands[6] || + commands[7] >= BIT(IEEE802154_CMD_GTS_REQ + 1)) + return -EINVAL; + + key->cmd_frame_ids = commands[7]; + } + + key->frame_types = frames; + + nla_memcpy(key->key, info->attrs[IEEE802154_ATTR_LLSEC_KEY_BYTES], + IEEE802154_LLSEC_KEY_SIZE); + + return 0; +} + +static int llsec_add_key(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + struct ieee802154_llsec_key key; + struct ieee802154_llsec_key_id id; + + if (ieee802154_llsec_parse_key(info, &key) || + ieee802154_llsec_parse_key_id(info, &id)) + return -EINVAL; + + return ops->llsec->add_key(dev, &id, &key); +} + +int ieee802154_llsec_add_key(struct sk_buff *skb, struct genl_info *info) +{ + if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) != + (NLM_F_CREATE | NLM_F_EXCL)) + return -EINVAL; + + return ieee802154_nl_llsec_change(skb, info, llsec_add_key); +} + +static int llsec_remove_key(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + struct ieee802154_llsec_key_id id; + + if (ieee802154_llsec_parse_key_id(info, &id)) + return -EINVAL; + + return ops->llsec->del_key(dev, &id); +} + +int ieee802154_llsec_del_key(struct sk_buff *skb, struct genl_info *info) +{ + return ieee802154_nl_llsec_change(skb, info, llsec_remove_key); +} + +static int +ieee802154_nl_fill_key(struct sk_buff *msg, u32 portid, u32 seq, + const struct ieee802154_llsec_key_entry *key, + const struct net_device *dev) +{ + void *hdr; + u32 commands[256 / 32]; + + hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI, + IEEE802154_LLSEC_LIST_KEY); + if (!hdr) + goto out; + + if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || + nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || + ieee802154_llsec_fill_key_id(msg, &key->id) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES, + key->key->frame_types)) + goto nla_put_failure; + + if (key->key->frame_types & BIT(IEEE802154_FC_TYPE_MAC_CMD)) { + memset(commands, 0, sizeof(commands)); + commands[7] = key->key->cmd_frame_ids; + if (nla_put(msg, IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS, + sizeof(commands), commands)) + goto nla_put_failure; + } + + if (nla_put(msg, IEEE802154_ATTR_LLSEC_KEY_BYTES, + IEEE802154_LLSEC_KEY_SIZE, key->key->key)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +out: + return -EMSGSIZE; +} + +static int llsec_iter_keys(struct llsec_dump_data *data) +{ + struct ieee802154_llsec_key_entry *pos; + int rc = 0, idx = 0; + + list_for_each_entry(pos, &data->table->keys, list) { + if (idx++ < data->s_idx) + continue; + + if (ieee802154_nl_fill_key(data->skb, data->portid, + data->nlmsg_seq, pos, data->dev)) { + rc = -EMSGSIZE; + break; + } + + data->s_idx++; + } + + return rc; +} + +int ieee802154_llsec_dump_keys(struct sk_buff *skb, struct netlink_callback *cb) +{ + return ieee802154_llsec_dump_table(skb, cb, llsec_iter_keys); +} + + + +static int +llsec_parse_dev(struct genl_info *info, + struct ieee802154_llsec_device *dev) +{ + memset(dev, 0, sizeof(*dev)); + + if (!info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER] || + !info->attrs[IEEE802154_ATTR_HW_ADDR] || + !info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE] || + !info->attrs[IEEE802154_ATTR_LLSEC_DEV_KEY_MODE] || + (!!info->attrs[IEEE802154_ATTR_PAN_ID] != + !!info->attrs[IEEE802154_ATTR_SHORT_ADDR])) + return -EINVAL; + + if (info->attrs[IEEE802154_ATTR_PAN_ID]) { + dev->pan_id = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_PAN_ID]); + dev->short_addr = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_SHORT_ADDR]); + } else { + dev->short_addr = cpu_to_le16(IEEE802154_ADDR_UNDEF); + } + + dev->hwaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]); + dev->frame_counter = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]); + dev->seclevel_exempt = !!nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE]); + dev->key_mode = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_DEV_KEY_MODE]); + + if (dev->key_mode >= __IEEE802154_LLSEC_DEVKEY_MAX) + return -EINVAL; + + return 0; +} + +static int llsec_add_dev(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + struct ieee802154_llsec_device desc; + + if (llsec_parse_dev(info, &desc)) + return -EINVAL; + + return ops->llsec->add_dev(dev, &desc); +} + +int ieee802154_llsec_add_dev(struct sk_buff *skb, struct genl_info *info) +{ + if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) != + (NLM_F_CREATE | NLM_F_EXCL)) + return -EINVAL; + + return ieee802154_nl_llsec_change(skb, info, llsec_add_dev); +} + +static int llsec_del_dev(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + __le64 devaddr; + + if (!info->attrs[IEEE802154_ATTR_HW_ADDR]) + return -EINVAL; + + devaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]); + + return ops->llsec->del_dev(dev, devaddr); +} + +int ieee802154_llsec_del_dev(struct sk_buff *skb, struct genl_info *info) +{ + return ieee802154_nl_llsec_change(skb, info, llsec_del_dev); +} + +static int +ieee802154_nl_fill_dev(struct sk_buff *msg, u32 portid, u32 seq, + const struct ieee802154_llsec_device *desc, + const struct net_device *dev) +{ + void *hdr; + + hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI, + IEEE802154_LLSEC_LIST_DEV); + if (!hdr) + goto out; + + if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || + nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || + nla_put_shortaddr(msg, IEEE802154_ATTR_PAN_ID, desc->pan_id) || + nla_put_shortaddr(msg, IEEE802154_ATTR_SHORT_ADDR, + desc->short_addr) || + nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, desc->hwaddr) || + nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER, + desc->frame_counter) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_DEV_OVERRIDE, + desc->seclevel_exempt) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_DEV_KEY_MODE, desc->key_mode)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +out: + return -EMSGSIZE; +} + +static int llsec_iter_devs(struct llsec_dump_data *data) +{ + struct ieee802154_llsec_device *pos; + int rc = 0, idx = 0; + + list_for_each_entry(pos, &data->table->devices, list) { + if (idx++ < data->s_idx) + continue; + + if (ieee802154_nl_fill_dev(data->skb, data->portid, + data->nlmsg_seq, pos, data->dev)) { + rc = -EMSGSIZE; + break; + } + + data->s_idx++; + } + + return rc; +} + +int ieee802154_llsec_dump_devs(struct sk_buff *skb, struct netlink_callback *cb) +{ + return ieee802154_llsec_dump_table(skb, cb, llsec_iter_devs); +} + + + +static int llsec_add_devkey(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + struct ieee802154_llsec_device_key key; + __le64 devaddr; + + if (!info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER] || + !info->attrs[IEEE802154_ATTR_HW_ADDR] || + ieee802154_llsec_parse_key_id(info, &key.key_id)) + return -EINVAL; + + devaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]); + key.frame_counter = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]); + + return ops->llsec->add_devkey(dev, devaddr, &key); +} + +int ieee802154_llsec_add_devkey(struct sk_buff *skb, struct genl_info *info) +{ + if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) != + (NLM_F_CREATE | NLM_F_EXCL)) + return -EINVAL; + + return ieee802154_nl_llsec_change(skb, info, llsec_add_devkey); +} + +static int llsec_del_devkey(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + struct ieee802154_llsec_device_key key; + __le64 devaddr; + + if (!info->attrs[IEEE802154_ATTR_HW_ADDR] || + ieee802154_llsec_parse_key_id(info, &key.key_id)) + return -EINVAL; + + devaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]); + + return ops->llsec->del_devkey(dev, devaddr, &key); +} + +int ieee802154_llsec_del_devkey(struct sk_buff *skb, struct genl_info *info) +{ + return ieee802154_nl_llsec_change(skb, info, llsec_del_devkey); +} + +static int +ieee802154_nl_fill_devkey(struct sk_buff *msg, u32 portid, u32 seq, + __le64 devaddr, + const struct ieee802154_llsec_device_key *devkey, + const struct net_device *dev) +{ + void *hdr; + + hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI, + IEEE802154_LLSEC_LIST_DEVKEY); + if (!hdr) + goto out; + + if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || + nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || + nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, devaddr) || + nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER, + devkey->frame_counter) || + ieee802154_llsec_fill_key_id(msg, &devkey->key_id)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +out: + return -EMSGSIZE; +} + +static int llsec_iter_devkeys(struct llsec_dump_data *data) +{ + struct ieee802154_llsec_device *dpos; + struct ieee802154_llsec_device_key *kpos; + int rc = 0, idx = 0, idx2; + + list_for_each_entry(dpos, &data->table->devices, list) { + if (idx++ < data->s_idx) + continue; + + idx2 = 0; + + list_for_each_entry(kpos, &dpos->keys, list) { + if (idx2++ < data->s_idx2) + continue; + + if (ieee802154_nl_fill_devkey(data->skb, data->portid, + data->nlmsg_seq, + dpos->hwaddr, kpos, + data->dev)) { + return rc = -EMSGSIZE; + } + + data->s_idx2++; + } + + data->s_idx++; + } + + return rc; +} + +int ieee802154_llsec_dump_devkeys(struct sk_buff *skb, + struct netlink_callback *cb) +{ + return ieee802154_llsec_dump_table(skb, cb, llsec_iter_devkeys); +} + + + +static int +llsec_parse_seclevel(struct genl_info *info, + struct ieee802154_llsec_seclevel *sl) +{ + memset(sl, 0, sizeof(*sl)); + + if (!info->attrs[IEEE802154_ATTR_LLSEC_FRAME_TYPE] || + !info->attrs[IEEE802154_ATTR_LLSEC_SECLEVELS] || + !info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE]) + return -EINVAL; + + sl->frame_type = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_TYPE]); + if (sl->frame_type == IEEE802154_FC_TYPE_MAC_CMD) { + if (!info->attrs[IEEE802154_ATTR_LLSEC_CMD_FRAME_ID]) + return -EINVAL; + + sl->cmd_frame_id = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_CMD_FRAME_ID]); + } + + sl->sec_levels = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_SECLEVELS]); + sl->device_override = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE]); + + return 0; +} + +static int llsec_add_seclevel(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + struct ieee802154_llsec_seclevel sl; + + if (llsec_parse_seclevel(info, &sl)) + return -EINVAL; + + return ops->llsec->add_seclevel(dev, &sl); +} + +int ieee802154_llsec_add_seclevel(struct sk_buff *skb, struct genl_info *info) +{ + if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) != + (NLM_F_CREATE | NLM_F_EXCL)) + return -EINVAL; + + return ieee802154_nl_llsec_change(skb, info, llsec_add_seclevel); +} + +static int llsec_del_seclevel(struct net_device *dev, struct genl_info *info) +{ + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + struct ieee802154_llsec_seclevel sl; + + if (llsec_parse_seclevel(info, &sl)) + return -EINVAL; + + return ops->llsec->del_seclevel(dev, &sl); +} + +int ieee802154_llsec_del_seclevel(struct sk_buff *skb, struct genl_info *info) +{ + return ieee802154_nl_llsec_change(skb, info, llsec_del_seclevel); +} + +static int +ieee802154_nl_fill_seclevel(struct sk_buff *msg, u32 portid, u32 seq, + const struct ieee802154_llsec_seclevel *sl, + const struct net_device *dev) +{ + void *hdr; + + hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI, + IEEE802154_LLSEC_LIST_SECLEVEL); + if (!hdr) + goto out; + + if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) || + nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_FRAME_TYPE, sl->frame_type) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_SECLEVELS, sl->sec_levels) || + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_DEV_OVERRIDE, + sl->device_override)) + goto nla_put_failure; + + if (sl->frame_type == IEEE802154_FC_TYPE_MAC_CMD && + nla_put_u8(msg, IEEE802154_ATTR_LLSEC_CMD_FRAME_ID, + sl->cmd_frame_id)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +out: + return -EMSGSIZE; +} + +static int llsec_iter_seclevels(struct llsec_dump_data *data) +{ + struct ieee802154_llsec_seclevel *pos; + int rc = 0, idx = 0; + + list_for_each_entry(pos, &data->table->security_levels, list) { + if (idx++ < data->s_idx) + continue; + + if (ieee802154_nl_fill_seclevel(data->skb, data->portid, + data->nlmsg_seq, pos, + data->dev)) { + rc = -EMSGSIZE; + break; + } + + data->s_idx++; + } + + return rc; +} + +int ieee802154_llsec_dump_seclevels(struct sk_buff *skb, + struct netlink_callback *cb) +{ + return ieee802154_llsec_dump_table(skb, cb, llsec_iter_seclevels); +} diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index 89b265aea151..972baf83411a 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -36,7 +36,7 @@ #include "ieee802154.h" static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid, - u32 seq, int flags, struct wpan_phy *phy) + u32 seq, int flags, struct wpan_phy *phy) { void *hdr; int i, pages = 0; @@ -48,7 +48,7 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid, return -EMSGSIZE; hdr = genlmsg_put(msg, 0, seq, &nl802154_family, flags, - IEEE802154_LIST_PHY); + IEEE802154_LIST_PHY); if (!hdr) goto out; @@ -80,7 +80,8 @@ out: int ieee802154_list_phy(struct sk_buff *skb, struct genl_info *info) { /* Request for interface name, index, type, IEEE address, - PAN Id, short address */ + * PAN Id, short address + */ struct sk_buff *msg; struct wpan_phy *phy; const char *name; @@ -105,7 +106,7 @@ int ieee802154_list_phy(struct sk_buff *skb, struct genl_info *info) goto out_dev; rc = ieee802154_nl_fill_phy(msg, info->snd_portid, info->snd_seq, - 0, phy); + 0, phy); if (rc < 0) goto out_free; @@ -117,7 +118,6 @@ out_free: out_dev: wpan_phy_put(phy); return rc; - } struct dump_phy_data { @@ -137,10 +137,10 @@ static int ieee802154_dump_phy_iter(struct wpan_phy *phy, void *_data) return 0; rc = ieee802154_nl_fill_phy(data->skb, - NETLINK_CB(data->cb->skb).portid, - data->cb->nlh->nlmsg_seq, - NLM_F_MULTI, - phy); + NETLINK_CB(data->cb->skb).portid, + data->cb->nlh->nlmsg_seq, + NLM_F_MULTI, + phy); if (rc < 0) { data->idx--; @@ -238,10 +238,9 @@ int ieee802154_add_iface(struct sk_buff *skb, struct genl_info *info) addr.sa_family = ARPHRD_IEEE802154; nla_memcpy(&addr.sa_data, info->attrs[IEEE802154_ATTR_HW_ADDR], - IEEE802154_ADDR_LEN); + IEEE802154_ADDR_LEN); - /* - * strangely enough, some callbacks (inetdev_event) from + /* strangely enough, some callbacks (inetdev_event) from * dev_set_mac_address require RTNL_LOCK */ rtnl_lock(); diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c index fd7be5e45cef..3a703ab88348 100644 --- a/net/ieee802154/nl_policy.c +++ b/net/ieee802154/nl_policy.c @@ -62,5 +62,21 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = { [IEEE802154_ATTR_CSMA_MAX_BE] = { .type = NLA_U8, }, [IEEE802154_ATTR_FRAME_RETRIES] = { .type = NLA_S8, }, + + [IEEE802154_ATTR_LLSEC_ENABLED] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_SECLEVEL] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_KEY_MODE] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT] = { .type = NLA_U32, }, + [IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED] = { .type = NLA_HW_ADDR, }, + [IEEE802154_ATTR_LLSEC_KEY_ID] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_FRAME_COUNTER] = { .type = NLA_U32 }, + [IEEE802154_ATTR_LLSEC_KEY_BYTES] = { .len = 16, }, + [IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS] = { .len = 258 / 8 }, + [IEEE802154_ATTR_LLSEC_FRAME_TYPE] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_CMD_FRAME_ID] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_SECLEVELS] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_DEV_OVERRIDE] = { .type = NLA_U8, }, + [IEEE802154_ATTR_LLSEC_DEV_KEY_MODE] = { .type = NLA_U8, }, }; diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c index 74d54fae33d7..9d1f64806f02 100644 --- a/net/ieee802154/raw.c +++ b/net/ieee802154/raw.c @@ -96,7 +96,7 @@ out: } static int raw_connect(struct sock *sk, struct sockaddr *uaddr, - int addr_len) + int addr_len) { return -ENOTSUPP; } @@ -106,8 +106,8 @@ static int raw_disconnect(struct sock *sk, int flags) return 0; } -static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t size) +static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t size) { struct net_device *dev; unsigned int mtu; @@ -145,7 +145,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, hlen = LL_RESERVED_SPACE(dev); tlen = dev->needed_tailroom; skb = sock_alloc_send_skb(sk, hlen + tlen + size, - msg->msg_flags & MSG_DONTWAIT, &err); + msg->msg_flags & MSG_DONTWAIT, &err); if (!skb) goto out_dev; @@ -235,7 +235,6 @@ void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb) bh_lock_sock(sk); if (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dev->ifindex) { - struct sk_buff *clone; clone = skb_clone(skb, GFP_ATOMIC); @@ -248,13 +247,13 @@ void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb) } static int raw_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) + char __user *optval, int __user *optlen) { return -EOPNOTSUPP; } static int raw_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) + char __user *optval, unsigned int optlen) { return -EOPNOTSUPP; } @@ -274,4 +273,3 @@ struct proto ieee802154_raw_prot = { .getsockopt = raw_getsockopt, .setsockopt = raw_setsockopt, }; - diff --git a/net/ieee802154/reassembly.c b/net/ieee802154/reassembly.c index ef2d54372b13..ffec6ce51005 100644 --- a/net/ieee802154/reassembly.c +++ b/net/ieee802154/reassembly.c @@ -30,13 +30,15 @@ #include "reassembly.h" +static const char lowpan_frags_cache_name[] = "lowpan-frags"; + struct lowpan_frag_info { __be16 d_tag; u16 d_size; u8 d_offset; }; -struct lowpan_frag_info *lowpan_cb(struct sk_buff *skb) +static struct lowpan_frag_info *lowpan_cb(struct sk_buff *skb) { return (struct lowpan_frag_info *)skb->cb; } @@ -50,29 +52,25 @@ static unsigned int lowpan_hash_frag(__be16 tag, u16 d_size, const struct ieee802154_addr *saddr, const struct ieee802154_addr *daddr) { - u32 c; - net_get_random_once(&lowpan_frags.rnd, sizeof(lowpan_frags.rnd)); - c = jhash_3words(ieee802154_addr_hash(saddr), - ieee802154_addr_hash(daddr), - (__force u32)(tag + (d_size << 16)), - lowpan_frags.rnd); - - return c & (INETFRAGS_HASHSZ - 1); + return jhash_3words(ieee802154_addr_hash(saddr), + ieee802154_addr_hash(daddr), + (__force u32)(tag + (d_size << 16)), + lowpan_frags.rnd); } -static unsigned int lowpan_hashfn(struct inet_frag_queue *q) +static unsigned int lowpan_hashfn(const struct inet_frag_queue *q) { - struct lowpan_frag_queue *fq; + const struct lowpan_frag_queue *fq; fq = container_of(q, struct lowpan_frag_queue, q); return lowpan_hash_frag(fq->tag, fq->d_size, &fq->saddr, &fq->daddr); } -static bool lowpan_frag_match(struct inet_frag_queue *q, void *a) +static bool lowpan_frag_match(const struct inet_frag_queue *q, const void *a) { - struct lowpan_frag_queue *fq; - struct lowpan_create_arg *arg = a; + const struct lowpan_frag_queue *fq; + const struct lowpan_create_arg *arg = a; fq = container_of(q, struct lowpan_frag_queue, q); return fq->tag == arg->tag && fq->d_size == arg->d_size && @@ -80,10 +78,10 @@ static bool lowpan_frag_match(struct inet_frag_queue *q, void *a) ieee802154_addr_equal(&fq->daddr, arg->dst); } -static void lowpan_frag_init(struct inet_frag_queue *q, void *a) +static void lowpan_frag_init(struct inet_frag_queue *q, const void *a) { + const struct lowpan_create_arg *arg = a; struct lowpan_frag_queue *fq; - struct lowpan_create_arg *arg = a; fq = container_of(q, struct lowpan_frag_queue, q); @@ -103,7 +101,7 @@ static void lowpan_frag_expire(unsigned long data) spin_lock(&fq->q.lock); - if (fq->q.last_in & INET_FRAG_COMPLETE) + if (fq->q.flags & INET_FRAG_COMPLETE) goto out; inet_frag_kill(&fq->q, &lowpan_frags); @@ -120,16 +118,17 @@ fq_find(struct net *net, const struct lowpan_frag_info *frag_info, struct inet_frag_queue *q; struct lowpan_create_arg arg; unsigned int hash; + struct netns_ieee802154_lowpan *ieee802154_lowpan = + net_ieee802154_lowpan(net); arg.tag = frag_info->d_tag; arg.d_size = frag_info->d_size; arg.src = src; arg.dst = dst; - read_lock(&lowpan_frags.lock); hash = lowpan_hash_frag(frag_info->d_tag, frag_info->d_size, src, dst); - q = inet_frag_find(&net->ieee802154_lowpan.frags, + q = inet_frag_find(&ieee802154_lowpan->frags, &lowpan_frags, &arg, hash); if (IS_ERR_OR_NULL(q)) { inet_frag_maybe_warn_overflow(q, pr_fmt()); @@ -145,7 +144,7 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq, struct net_device *dev; int end, offset; - if (fq->q.last_in & INET_FRAG_COMPLETE) + if (fq->q.flags & INET_FRAG_COMPLETE) goto err; offset = lowpan_cb(skb)->d_offset << 3; @@ -157,14 +156,14 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq, * or have different end, the segment is corrupted. */ if (end < fq->q.len || - ((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len)) + ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len)) goto err; - fq->q.last_in |= INET_FRAG_LAST_IN; + fq->q.flags |= INET_FRAG_LAST_IN; fq->q.len = end; } else { if (end > fq->q.len) { /* Some bits beyond end -> corruption. */ - if (fq->q.last_in & INET_FRAG_LAST_IN) + if (fq->q.flags & INET_FRAG_LAST_IN) goto err; fq->q.len = end; } @@ -204,13 +203,13 @@ found: if (frag_type == LOWPAN_DISPATCH_FRAG1) { /* Calculate uncomp. 6lowpan header to estimate full size */ fq->q.meat += lowpan_uncompress_size(skb, NULL); - fq->q.last_in |= INET_FRAG_FIRST_IN; + fq->q.flags |= INET_FRAG_FIRST_IN; } else { fq->q.meat += skb->len; } add_frag_mem_limit(&fq->q, skb->truesize); - if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && + if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && fq->q.meat == fq->q.len) { int res; unsigned long orefdst = skb->_skb_refdst; @@ -221,7 +220,6 @@ found: return res; } - inet_frag_lru_move(&fq->q); return -1; err: kfree_skb(skb); @@ -357,6 +355,8 @@ int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type) struct net *net = dev_net(skb->dev); struct lowpan_frag_info *frag_info = lowpan_cb(skb); struct ieee802154_addr source, dest; + struct netns_ieee802154_lowpan *ieee802154_lowpan = + net_ieee802154_lowpan(net); int err; source = mac_cb(skb)->source; @@ -366,14 +366,13 @@ int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type) if (err < 0) goto err; - if (frag_info->d_size > net->ieee802154_lowpan.max_dsize) + if (frag_info->d_size > ieee802154_lowpan->max_dsize) goto err; - inet_frag_evictor(&net->ieee802154_lowpan.frags, &lowpan_frags, false); - fq = fq_find(net, frag_info, &source, &dest); if (fq != NULL) { int ret; + spin_lock(&fq->q.lock); ret = lowpan_frag_queue(fq, skb, frag_type); spin_unlock(&fq->q.lock); @@ -389,20 +388,25 @@ err: EXPORT_SYMBOL(lowpan_frag_rcv); #ifdef CONFIG_SYSCTL +static int zero; + static struct ctl_table lowpan_frags_ns_ctl_table[] = { { .procname = "6lowpanfrag_high_thresh", .data = &init_net.ieee802154_lowpan.frags.high_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &init_net.ieee802154_lowpan.frags.low_thresh }, { .procname = "6lowpanfrag_low_thresh", .data = &init_net.ieee802154_lowpan.frags.low_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &init_net.ieee802154_lowpan.frags.high_thresh }, { .procname = "6lowpanfrag_time", @@ -421,10 +425,12 @@ static struct ctl_table lowpan_frags_ns_ctl_table[] = { { } }; +/* secret interval has been deprecated */ +static int lowpan_frags_secret_interval_unused; static struct ctl_table lowpan_frags_ctl_table[] = { { .procname = "6lowpanfrag_secret_interval", - .data = &lowpan_frags.secret_interval, + .data = &lowpan_frags_secret_interval_unused, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -436,6 +442,8 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net) { struct ctl_table *table; struct ctl_table_header *hdr; + struct netns_ieee802154_lowpan *ieee802154_lowpan = + net_ieee802154_lowpan(net); table = lowpan_frags_ns_ctl_table; if (!net_eq(net, &init_net)) { @@ -444,10 +452,13 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net) if (table == NULL) goto err_alloc; - table[0].data = &net->ieee802154_lowpan.frags.high_thresh; - table[1].data = &net->ieee802154_lowpan.frags.low_thresh; - table[2].data = &net->ieee802154_lowpan.frags.timeout; - table[3].data = &net->ieee802154_lowpan.max_dsize; + table[0].data = &ieee802154_lowpan->frags.high_thresh; + table[0].extra1 = &ieee802154_lowpan->frags.low_thresh; + table[0].extra2 = &init_net.ieee802154_lowpan.frags.high_thresh; + table[1].data = &ieee802154_lowpan->frags.low_thresh; + table[1].extra2 = &ieee802154_lowpan->frags.high_thresh; + table[2].data = &ieee802154_lowpan->frags.timeout; + table[3].data = &ieee802154_lowpan->max_dsize; /* Don't export sysctls to unprivileged users */ if (net->user_ns != &init_user_ns) @@ -458,7 +469,7 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net) if (hdr == NULL) goto err_reg; - net->ieee802154_lowpan.sysctl.frags_hdr = hdr; + ieee802154_lowpan->sysctl.frags_hdr = hdr; return 0; err_reg: @@ -471,9 +482,11 @@ err_alloc: static void __net_exit lowpan_frags_ns_sysctl_unregister(struct net *net) { struct ctl_table *table; + struct netns_ieee802154_lowpan *ieee802154_lowpan = + net_ieee802154_lowpan(net); - table = net->ieee802154_lowpan.sysctl.frags_hdr->ctl_table_arg; - unregister_net_sysctl_table(net->ieee802154_lowpan.sysctl.frags_hdr); + table = ieee802154_lowpan->sysctl.frags_hdr->ctl_table_arg; + unregister_net_sysctl_table(ieee802154_lowpan->sysctl.frags_hdr); if (!net_eq(net, &init_net)) kfree(table); } @@ -514,20 +527,26 @@ static inline void lowpan_frags_sysctl_unregister(void) static int __net_init lowpan_frags_init_net(struct net *net) { - net->ieee802154_lowpan.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; - net->ieee802154_lowpan.frags.low_thresh = IPV6_FRAG_LOW_THRESH; - net->ieee802154_lowpan.frags.timeout = IPV6_FRAG_TIMEOUT; - net->ieee802154_lowpan.max_dsize = 0xFFFF; + struct netns_ieee802154_lowpan *ieee802154_lowpan = + net_ieee802154_lowpan(net); + + ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH; + ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH; + ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT; + ieee802154_lowpan->max_dsize = 0xFFFF; - inet_frags_init_net(&net->ieee802154_lowpan.frags); + inet_frags_init_net(&ieee802154_lowpan->frags); return lowpan_frags_ns_sysctl_register(net); } static void __net_exit lowpan_frags_exit_net(struct net *net) { + struct netns_ieee802154_lowpan *ieee802154_lowpan = + net_ieee802154_lowpan(net); + lowpan_frags_ns_sysctl_unregister(net); - inet_frags_exit_net(&net->ieee802154_lowpan.frags, &lowpan_frags); + inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags); } static struct pernet_operations lowpan_frags_ops = { @@ -554,8 +573,10 @@ int __init lowpan_net_frag_init(void) lowpan_frags.qsize = sizeof(struct frag_queue); lowpan_frags.match = lowpan_frag_match; lowpan_frags.frag_expire = lowpan_frag_expire; - lowpan_frags.secret_interval = 10 * 60 * HZ; - inet_frags_init(&lowpan_frags); + lowpan_frags.frags_cache_name = lowpan_frags_cache_name; + ret = inet_frags_init(&lowpan_frags); + if (ret) + goto err_pernet; return ret; err_pernet: diff --git a/net/ieee802154/wpan-class.c b/net/ieee802154/wpan-class.c index 8d6f6704da84..4955e0fe5883 100644 --- a/net/ieee802154/wpan-class.c +++ b/net/ieee802154/wpan-class.c @@ -48,7 +48,8 @@ MASTER_SHOW(transmit_power, "%d +- 1 dB"); MASTER_SHOW(cca_mode, "%d"); static ssize_t channels_supported_show(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, + char *buf) { struct wpan_phy *phy = container_of(dev, struct wpan_phy, dev); int ret; @@ -57,7 +58,7 @@ static ssize_t channels_supported_show(struct device *dev, mutex_lock(&phy->pib_lock); for (i = 0; i < 32; i++) { ret = snprintf(buf + len, PAGE_SIZE - len, - "%#09x\n", phy->channels_supported[i]); + "%#09x\n", phy->channels_supported[i]); if (ret < 0) break; len += ret; @@ -80,6 +81,7 @@ ATTRIBUTE_GROUPS(pmib); static void wpan_phy_release(struct device *d) { struct wpan_phy *phy = container_of(d, struct wpan_phy, dev); + kfree(phy); } @@ -121,11 +123,12 @@ static int wpan_phy_iter(struct device *dev, void *_data) { struct wpan_phy_iter_data *wpid = _data; struct wpan_phy *phy = container_of(dev, struct wpan_phy, dev); + return wpid->fn(phy, wpid->data); } int wpan_phy_for_each(int (*fn)(struct wpan_phy *phy, void *data), - void *data) + void *data) { struct wpan_phy_iter_data wpid = { .fn = fn, @@ -197,6 +200,7 @@ EXPORT_SYMBOL(wpan_phy_free); static int __init wpan_phy_class_init(void) { int rc; + rc = class_register(&wpan_phy_class); if (rc) goto err; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 05c57f0fcabe..dbc10d84161f 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -307,6 +307,10 @@ config NET_IPVTI the notion of a secure tunnel for IPSEC and then use routing protocol on top. +config NET_UDP_TUNNEL + tristate + default n + config INET_AH tristate "IP: AH transformation" select XFRM_ALGO diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index f032688d20d3..8ee1cd4053ee 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -22,6 +22,7 @@ obj-$(CONFIG_NET_IPIP) += ipip.o gre-y := gre_demux.o obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o obj-$(CONFIG_NET_IPGRE) += ip_gre.o +obj-$(CONFIG_NET_UDP_TUNNEL) += udp_tunnel.o obj-$(CONFIG_NET_IPVTI) += ip_vti.o obj-$(CONFIG_SYN_COOKIES) += syncookies.o obj-$(CONFIG_INET_AH) += ah4.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8c54870db792..d156b3c5f363 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -254,7 +254,6 @@ static int inet_create(struct net *net, struct socket *sock, int protocol, struct inet_sock *inet; struct proto *answer_prot; unsigned char answer_flags; - char answer_no_check; int try_loading_module = 0; int err; @@ -312,7 +311,6 @@ lookup_protocol: sock->ops = answer->ops; answer_prot = answer->prot; - answer_no_check = answer->no_check; answer_flags = answer->flags; rcu_read_unlock(); @@ -324,7 +322,6 @@ lookup_protocol: goto out; err = 0; - sk->sk_no_check = answer_no_check; if (INET_PROTOSW_REUSE & answer_flags) sk->sk_reuse = SK_CAN_REUSE; @@ -1002,7 +999,6 @@ static struct inet_protosw inetsw_array[] = .protocol = IPPROTO_TCP, .prot = &tcp_prot, .ops = &inet_stream_ops, - .no_check = 0, .flags = INET_PROTOSW_PERMANENT | INET_PROTOSW_ICSK, }, @@ -1012,7 +1008,6 @@ static struct inet_protosw inetsw_array[] = .protocol = IPPROTO_UDP, .prot = &udp_prot, .ops = &inet_dgram_ops, - .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_PERMANENT, }, @@ -1021,7 +1016,6 @@ static struct inet_protosw inetsw_array[] = .protocol = IPPROTO_ICMP, .prot = &ping_prot, .ops = &inet_dgram_ops, - .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_REUSE, }, @@ -1030,7 +1024,6 @@ static struct inet_protosw inetsw_array[] = .protocol = IPPROTO_IP, /* wild card */ .prot = &raw_prot, .ops = &inet_sockraw_ops, - .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_REUSE, } }; @@ -1261,10 +1254,12 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | SKB_GSO_SIT | SKB_GSO_TCPV6 | SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_MPLS | 0))) goto out; @@ -1434,6 +1429,9 @@ static int inet_gro_complete(struct sk_buff *skb, int nhoff) int proto = iph->protocol; int err = -ENOSYS; + if (skb->encapsulation) + skb_set_inner_network_header(skb, nhoff); + csum_replace2(&iph->check, iph->tot_len, newlen); iph->tot_len = newlen; @@ -1476,22 +1474,20 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family, } EXPORT_SYMBOL_GPL(inet_ctl_sock_create); -unsigned long snmp_fold_field(void __percpu *mib[], int offt) +unsigned long snmp_fold_field(void __percpu *mib, int offt) { unsigned long res = 0; - int i, j; + int i; - for_each_possible_cpu(i) { - for (j = 0; j < SNMP_ARRAY_SZ; j++) - res += *(((unsigned long *) per_cpu_ptr(mib[j], i)) + offt); - } + for_each_possible_cpu(i) + res += *(((unsigned long *) per_cpu_ptr(mib, i)) + offt); return res; } EXPORT_SYMBOL_GPL(snmp_fold_field); #if BITS_PER_LONG==32 -u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset) +u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_offset) { u64 res = 0; int cpu; @@ -1502,7 +1498,7 @@ u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset) u64 v; unsigned int start; - bhptr = per_cpu_ptr(mib[0], cpu); + bhptr = per_cpu_ptr(mib, cpu); syncp = (struct u64_stats_sync *)(bhptr + syncp_offset); do { start = u64_stats_fetch_begin_irq(syncp); @@ -1516,25 +1512,6 @@ u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset) EXPORT_SYMBOL_GPL(snmp_fold_field64); #endif -int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align) -{ - BUG_ON(ptr == NULL); - ptr[0] = __alloc_percpu(mibsize, align); - if (!ptr[0]) - return -ENOMEM; - -#if SNMP_ARRAY_SZ == 2 - ptr[1] = __alloc_percpu(mibsize, align); - if (!ptr[1]) { - free_percpu(ptr[0]); - ptr[0] = NULL; - return -ENOMEM; - } -#endif - return 0; -} -EXPORT_SYMBOL_GPL(snmp_mib_init); - #ifdef CONFIG_IP_MULTICAST static const struct net_protocol igmp_protocol = { .handler = igmp_rcv, @@ -1570,40 +1547,30 @@ static __net_init int ipv4_mib_init_net(struct net *net) { int i; - if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics, - sizeof(struct tcp_mib), - __alignof__(struct tcp_mib)) < 0) + net->mib.tcp_statistics = alloc_percpu(struct tcp_mib); + if (!net->mib.tcp_statistics) goto err_tcp_mib; - if (snmp_mib_init((void __percpu **)net->mib.ip_statistics, - sizeof(struct ipstats_mib), - __alignof__(struct ipstats_mib)) < 0) + net->mib.ip_statistics = alloc_percpu(struct ipstats_mib); + if (!net->mib.ip_statistics) goto err_ip_mib; for_each_possible_cpu(i) { struct ipstats_mib *af_inet_stats; - af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[0], i); - u64_stats_init(&af_inet_stats->syncp); -#if SNMP_ARRAY_SZ == 2 - af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[1], i); + af_inet_stats = per_cpu_ptr(net->mib.ip_statistics, i); u64_stats_init(&af_inet_stats->syncp); -#endif } - if (snmp_mib_init((void __percpu **)net->mib.net_statistics, - sizeof(struct linux_mib), - __alignof__(struct linux_mib)) < 0) + net->mib.net_statistics = alloc_percpu(struct linux_mib); + if (!net->mib.net_statistics) goto err_net_mib; - if (snmp_mib_init((void __percpu **)net->mib.udp_statistics, - sizeof(struct udp_mib), - __alignof__(struct udp_mib)) < 0) + net->mib.udp_statistics = alloc_percpu(struct udp_mib); + if (!net->mib.udp_statistics) goto err_udp_mib; - if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics, - sizeof(struct udp_mib), - __alignof__(struct udp_mib)) < 0) + net->mib.udplite_statistics = alloc_percpu(struct udp_mib); + if (!net->mib.udplite_statistics) goto err_udplite_mib; - if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics, - sizeof(struct icmp_mib), - __alignof__(struct icmp_mib)) < 0) + net->mib.icmp_statistics = alloc_percpu(struct icmp_mib); + if (!net->mib.icmp_statistics) goto err_icmp_mib; net->mib.icmpmsg_statistics = kzalloc(sizeof(struct icmpmsg_mib), GFP_KERNEL); @@ -1614,17 +1581,17 @@ static __net_init int ipv4_mib_init_net(struct net *net) return 0; err_icmpmsg_mib: - snmp_mib_free((void __percpu **)net->mib.icmp_statistics); + free_percpu(net->mib.icmp_statistics); err_icmp_mib: - snmp_mib_free((void __percpu **)net->mib.udplite_statistics); + free_percpu(net->mib.udplite_statistics); err_udplite_mib: - snmp_mib_free((void __percpu **)net->mib.udp_statistics); + free_percpu(net->mib.udp_statistics); err_udp_mib: - snmp_mib_free((void __percpu **)net->mib.net_statistics); + free_percpu(net->mib.net_statistics); err_net_mib: - snmp_mib_free((void __percpu **)net->mib.ip_statistics); + free_percpu(net->mib.ip_statistics); err_ip_mib: - snmp_mib_free((void __percpu **)net->mib.tcp_statistics); + free_percpu(net->mib.tcp_statistics); err_tcp_mib: return -ENOMEM; } @@ -1632,12 +1599,12 @@ err_tcp_mib: static __net_exit void ipv4_mib_exit_net(struct net *net) { kfree(net->mib.icmpmsg_statistics); - snmp_mib_free((void __percpu **)net->mib.icmp_statistics); - snmp_mib_free((void __percpu **)net->mib.udplite_statistics); - snmp_mib_free((void __percpu **)net->mib.udp_statistics); - snmp_mib_free((void __percpu **)net->mib.net_statistics); - snmp_mib_free((void __percpu **)net->mib.ip_statistics); - snmp_mib_free((void __percpu **)net->mib.tcp_statistics); + free_percpu(net->mib.icmp_statistics); + free_percpu(net->mib.udplite_statistics); + free_percpu(net->mib.udp_statistics); + free_percpu(net->mib.net_statistics); + free_percpu(net->mib.ip_statistics); + free_percpu(net->mib.tcp_statistics); } static __net_initdata struct pernet_operations ipv4_mib_ops = { @@ -1650,6 +1617,39 @@ static int __init init_ipv4_mibs(void) return register_pernet_subsys(&ipv4_mib_ops); } +static __net_init int inet_init_net(struct net *net) +{ + /* + * Set defaults for local port range + */ + seqlock_init(&net->ipv4.ip_local_ports.lock); + net->ipv4.ip_local_ports.range[0] = 32768; + net->ipv4.ip_local_ports.range[1] = 61000; + + seqlock_init(&net->ipv4.ping_group_range.lock); + /* + * Sane defaults - nobody may create ping sockets. + * Boot scripts should set this to distro-specific group. + */ + net->ipv4.ping_group_range.range[0] = make_kgid(&init_user_ns, 1); + net->ipv4.ping_group_range.range[1] = make_kgid(&init_user_ns, 0); + return 0; +} + +static __net_exit void inet_exit_net(struct net *net) +{ +} + +static __net_initdata struct pernet_operations af_inet_ops = { + .init = inet_init_net, + .exit = inet_exit_net, +}; + +static int __init init_inet_pernet_ops(void) +{ + return register_pernet_subsys(&af_inet_ops); +} + static int ipv4_proc_init(void); /* @@ -1703,13 +1703,9 @@ static int __init inet_init(void) BUILD_BUG_ON(sizeof(struct inet_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb)); - sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL); - if (!sysctl_local_reserved_ports) - goto out; - rc = proto_register(&tcp_prot, 1); if (rc) - goto out_free_reserved_ports; + goto out; rc = proto_register(&udp_prot, 1); if (rc) @@ -1794,6 +1790,9 @@ static int __init inet_init(void) if (ip_mr_init()) pr_crit("%s: Cannot init ipv4 mroute\n", __func__); #endif + + if (init_inet_pernet_ops()) + pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__); /* * Initialise per-cpu ipv4 mibs */ @@ -1816,8 +1815,6 @@ out_unregister_udp_proto: proto_unregister(&udp_prot); out_unregister_tcp_proto: proto_unregister(&tcp_prot); -out_free_reserved_ports: - kfree(sysctl_local_reserved_ports); goto out; } diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 69e77c8ff285..05b708bbdb0d 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -890,8 +890,8 @@ static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def, } for (;;) { - host_spot = netlbl_secattr_catmap_walk(secattr->attr.mls.cat, - host_spot + 1); + host_spot = netlbl_catmap_walk(secattr->attr.mls.cat, + host_spot + 1); if (host_spot < 0) break; @@ -973,7 +973,7 @@ static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def, return -EPERM; break; } - ret_val = netlbl_secattr_catmap_setbit(secattr->attr.mls.cat, + ret_val = netlbl_catmap_setbit(&secattr->attr.mls.cat, host_spot, GFP_ATOMIC); if (ret_val != 0) @@ -1039,8 +1039,7 @@ static int cipso_v4_map_cat_enum_hton(const struct cipso_v4_doi *doi_def, u32 cat_iter = 0; for (;;) { - cat = netlbl_secattr_catmap_walk(secattr->attr.mls.cat, - cat + 1); + cat = netlbl_catmap_walk(secattr->attr.mls.cat, cat + 1); if (cat < 0) break; if ((cat_iter + 2) > net_cat_len) @@ -1075,9 +1074,9 @@ static int cipso_v4_map_cat_enum_ntoh(const struct cipso_v4_doi *doi_def, u32 iter; for (iter = 0; iter < net_cat_len; iter += 2) { - ret_val = netlbl_secattr_catmap_setbit(secattr->attr.mls.cat, - get_unaligned_be16(&net_cat[iter]), - GFP_ATOMIC); + ret_val = netlbl_catmap_setbit(&secattr->attr.mls.cat, + get_unaligned_be16(&net_cat[iter]), + GFP_ATOMIC); if (ret_val != 0) return ret_val; } @@ -1155,8 +1154,7 @@ static int cipso_v4_map_cat_rng_hton(const struct cipso_v4_doi *doi_def, return -ENOSPC; for (;;) { - iter = netlbl_secattr_catmap_walk(secattr->attr.mls.cat, - iter + 1); + iter = netlbl_catmap_walk(secattr->attr.mls.cat, iter + 1); if (iter < 0) break; cat_size += (iter == 0 ? 0 : sizeof(u16)); @@ -1164,8 +1162,7 @@ static int cipso_v4_map_cat_rng_hton(const struct cipso_v4_doi *doi_def, return -ENOSPC; array[array_cnt++] = iter; - iter = netlbl_secattr_catmap_walk_rng(secattr->attr.mls.cat, - iter); + iter = netlbl_catmap_walkrng(secattr->attr.mls.cat, iter); if (iter < 0) return -EFAULT; cat_size += sizeof(u16); @@ -1217,10 +1214,10 @@ static int cipso_v4_map_cat_rng_ntoh(const struct cipso_v4_doi *doi_def, else cat_low = 0; - ret_val = netlbl_secattr_catmap_setrng(secattr->attr.mls.cat, - cat_low, - cat_high, - GFP_ATOMIC); + ret_val = netlbl_catmap_setrng(&secattr->attr.mls.cat, + cat_low, + cat_high, + GFP_ATOMIC); if (ret_val != 0) return ret_val; } @@ -1335,16 +1332,12 @@ static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def, secattr->flags |= NETLBL_SECATTR_MLS_LVL; if (tag_len > 4) { - secattr->attr.mls.cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC); - if (secattr->attr.mls.cat == NULL) - return -ENOMEM; - ret_val = cipso_v4_map_cat_rbm_ntoh(doi_def, &tag[4], tag_len - 4, secattr); if (ret_val != 0) { - netlbl_secattr_catmap_free(secattr->attr.mls.cat); + netlbl_catmap_free(secattr->attr.mls.cat); return ret_val; } @@ -1430,16 +1423,12 @@ static int cipso_v4_parsetag_enum(const struct cipso_v4_doi *doi_def, secattr->flags |= NETLBL_SECATTR_MLS_LVL; if (tag_len > 4) { - secattr->attr.mls.cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC); - if (secattr->attr.mls.cat == NULL) - return -ENOMEM; - ret_val = cipso_v4_map_cat_enum_ntoh(doi_def, &tag[4], tag_len - 4, secattr); if (ret_val != 0) { - netlbl_secattr_catmap_free(secattr->attr.mls.cat); + netlbl_catmap_free(secattr->attr.mls.cat); return ret_val; } @@ -1524,16 +1513,12 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def, secattr->flags |= NETLBL_SECATTR_MLS_LVL; if (tag_len > 4) { - secattr->attr.mls.cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC); - if (secattr->attr.mls.cat == NULL) - return -ENOMEM; - ret_val = cipso_v4_map_cat_rng_ntoh(doi_def, &tag[4], tag_len - 4, secattr); if (ret_val != 0) { - netlbl_secattr_catmap_free(secattr->attr.mls.cat); + netlbl_catmap_free(secattr->attr.mls.cat); return ret_val; } diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 8b5134c582f1..90c0e8386116 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -76,6 +76,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_daddr = fl4->daddr; inet->inet_dport = usin->sin_port; sk->sk_state = TCP_ESTABLISHED; + inet_set_txhash(sk); inet->inet_id = jiffies; sk_dst_set(sk, &rt->dst); @@ -86,18 +87,26 @@ out: } EXPORT_SYMBOL(ip4_datagram_connect); +/* Because UDP xmit path can manipulate sk_dst_cache without holding + * socket lock, we need to use sk_dst_set() here, + * even if we own the socket lock. + */ void ip4_datagram_release_cb(struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); const struct ip_options_rcu *inet_opt; __be32 daddr = inet->inet_daddr; + struct dst_entry *dst; struct flowi4 fl4; struct rtable *rt; - if (! __sk_dst_get(sk) || __sk_dst_check(sk, 0)) - return; - rcu_read_lock(); + + dst = __sk_dst_get(sk); + if (!dst || !dst->obsolete || dst->ops->check(dst, 0)) { + rcu_read_unlock(); + return; + } inet_opt = rcu_dereference(inet->inet_opt); if (inet_opt && inet_opt->opt.srr) daddr = inet_opt->opt.faddr; @@ -105,8 +114,10 @@ void ip4_datagram_release_cb(struct sock *sk) inet->inet_saddr, inet->inet_dport, inet->inet_sport, sk->sk_protocol, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if); - if (!IS_ERR(rt)) - __sk_dst_set(sk, &rt->dst); + + dst = !IS_ERR(rt) ? &rt->dst : NULL; + sk_dst_set(sk, dst); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(ip4_datagram_release_cb); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index bdbf68bb2e2d..214882e7d6de 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -106,7 +106,6 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT) static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE]; -static DEFINE_SPINLOCK(inet_addr_hash_lock); static u32 inet_addr_hash(struct net *net, __be32 addr) { @@ -119,16 +118,14 @@ static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa) { u32 hash = inet_addr_hash(net, ifa->ifa_local); - spin_lock(&inet_addr_hash_lock); + ASSERT_RTNL(); hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]); - spin_unlock(&inet_addr_hash_lock); } static void inet_hash_remove(struct in_ifaddr *ifa) { - spin_lock(&inet_addr_hash_lock); + ASSERT_RTNL(); hlist_del_init_rcu(&ifa->hash); - spin_unlock(&inet_addr_hash_lock); } /** @@ -183,11 +180,12 @@ static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, int destroy); #ifdef CONFIG_SYSCTL -static void devinet_sysctl_register(struct in_device *idev); +static int devinet_sysctl_register(struct in_device *idev); static void devinet_sysctl_unregister(struct in_device *idev); #else -static void devinet_sysctl_register(struct in_device *idev) +static int devinet_sysctl_register(struct in_device *idev) { + return 0; } static void devinet_sysctl_unregister(struct in_device *idev) { @@ -235,6 +233,7 @@ EXPORT_SYMBOL(in_dev_finish_destroy); static struct in_device *inetdev_init(struct net_device *dev) { struct in_device *in_dev; + int err = -ENOMEM; ASSERT_RTNL(); @@ -255,7 +254,13 @@ static struct in_device *inetdev_init(struct net_device *dev) /* Account for reference dev->ip_ptr (below) */ in_dev_hold(in_dev); - devinet_sysctl_register(in_dev); + err = devinet_sysctl_register(in_dev); + if (err) { + in_dev->dead = 1; + in_dev_put(in_dev); + in_dev = NULL; + goto out; + } ip_mc_init_dev(in_dev); if (dev->flags & IFF_UP) ip_mc_up(in_dev); @@ -263,7 +268,7 @@ static struct in_device *inetdev_init(struct net_device *dev) /* we can receive as soon as ip_ptr is set -- do this last */ rcu_assign_pointer(dev->ip_ptr, in_dev); out: - return in_dev; + return in_dev ?: ERR_PTR(err); out_kfree: kfree(in_dev); in_dev = NULL; @@ -830,7 +835,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) ifa_existing = find_matching_ifa(ifa); if (!ifa_existing) { /* It would be best to check for !NLM_F_CREATE here but - * userspace alreay relies on not having to provide this. + * userspace already relies on not having to provide this. */ set_ifa_lifetime(ifa, valid_lft, prefered_lft); return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); @@ -1350,8 +1355,8 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, if (!in_dev) { if (event == NETDEV_REGISTER) { in_dev = inetdev_init(dev); - if (!in_dev) - return notifier_from_errno(-ENOMEM); + if (IS_ERR(in_dev)) + return notifier_from_errno(PTR_ERR(in_dev)); if (dev->flags & IFF_LOOPBACK) { IN_DEV_CONF_SET(in_dev, NOXFRM, 1); IN_DEV_CONF_SET(in_dev, NOPOLICY, 1); @@ -2185,11 +2190,21 @@ static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf) kfree(t); } -static void devinet_sysctl_register(struct in_device *idev) +static int devinet_sysctl_register(struct in_device *idev) { - neigh_sysctl_register(idev->dev, idev->arp_parms, NULL); - __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name, + int err; + + if (!sysctl_dev_name_is_allowed(idev->dev->name)) + return -EINVAL; + + err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL); + if (err) + return err; + err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name, &idev->cnf); + if (err) + neigh_sysctl_unregister(idev->arp_parms); + return err; } static void devinet_sysctl_unregister(struct in_device *idev) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 8a043f03c88e..b10cd43a4722 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -821,13 +821,13 @@ struct fib_info *fib_create_info(struct fib_config *cfg) fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); if (fi == NULL) goto failure; + fib_info_cnt++; if (cfg->fc_mx) { fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); if (!fi->fib_metrics) goto failure; } else fi->fib_metrics = (u32 *) dst_default_metrics; - fib_info_cnt++; fi->fib_net = hold_net(net); fi->fib_protocol = cfg->fc_protocol; diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 250be7421ab3..0485bf7f8f03 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -68,6 +68,7 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, skb_push(skb, hdr_len); + skb_reset_transport_header(skb); greh = (struct gre_base_hdr *)skb->data; greh->flags = tnl_flags_to_gre_flags(tpi->flags); greh->protocol = tpi->proto; @@ -84,7 +85,8 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, ptr--; } if (tpi->flags&TUNNEL_CSUM && - !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) { + !(skb_shinfo(skb)->gso_type & + (SKB_GSO_GRE|SKB_GSO_GRE_CSUM))) { *ptr = 0; *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, skb->len, 0)); @@ -93,28 +95,6 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, } EXPORT_SYMBOL_GPL(gre_build_header); -static __sum16 check_checksum(struct sk_buff *skb) -{ - __sum16 csum = 0; - - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - csum = csum_fold(skb->csum); - - if (!csum) - break; - /* Fall through. */ - - case CHECKSUM_NONE: - skb->csum = 0; - csum = __skb_checksum_complete(skb); - skb->ip_summed = CHECKSUM_COMPLETE; - break; - } - - return csum; -} - static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, bool *csum_err) { @@ -141,7 +121,7 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, options = (__be32 *)(greh + 1); if (greh->flags & GRE_CSUM) { - if (check_checksum(skb)) { + if (skb_checksum_simple_validate(skb)) { *csum_err = true; return -EINVAL; } diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index f1d32280cb54..6556263c8fa5 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -42,6 +42,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | SKB_GSO_IPIP))) goto out; @@ -55,6 +56,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, goto out; csum = !!(greh->flags & GRE_CSUM); + if (csum) + skb->encap_hdr_csum = 1; if (unlikely(!pskb_may_pull(skb, ghl))) goto out; @@ -71,7 +74,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, /* segment inner packet. */ enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); segs = skb_mac_gso_segment(skb, enc_features); - if (!segs || IS_ERR(segs)) { + if (IS_ERR_OR_NULL(segs)) { skb_gso_error_unwind(skb, protocol, ghl, mac_offset, mac_len); goto out; } @@ -94,10 +97,13 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, } } - greh = (struct gre_base_hdr *)(skb->data); + skb_reset_transport_header(skb); + + greh = (struct gre_base_hdr *) + skb_transport_header(skb); pcsum = (__be32 *)(greh + 1); *pcsum = 0; - *(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0)); + *(__sum16 *)pcsum = gso_make_checksum(skb, 0); } __skb_push(skb, tnl_hlen - ghl); @@ -125,10 +131,12 @@ static __sum16 gro_skb_checksum(struct sk_buff *skb) csum_partial(skb->data, skb_gro_offset(skb), 0)); sum = csum_fold(NAPI_GRO_CB(skb)->csum); if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) { - if (unlikely(!sum)) + if (unlikely(!sum) && !skb->csum_complete_sw) netdev_rx_csum_fault(skb->dev); - } else + } else { skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum_complete_sw = 1; + } return sum; } @@ -255,6 +263,9 @@ static int gre_gro_complete(struct sk_buff *skb, int nhoff) int err = -ENOENT; __be16 type; + skb->encapsulation = 1; + skb_shinfo(skb)->gso_type = SKB_GSO_GRE; + type = greh->protocol; if (greh->flags & GRE_KEY) grehlen += GRE_HEADER_SECTION; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 0134663fdbce..ea7d4afe8205 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -337,6 +337,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) struct sock *sk; struct inet_sock *inet; __be32 daddr, saddr; + u32 mark = IP4_REPLY_MARK(net, skb->mark); if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb)) return; @@ -349,6 +350,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) icmp_param->data.icmph.checksum = 0; inet->tos = ip_hdr(skb)->tos; + sk->sk_mark = mark; daddr = ipc.addr = ip_hdr(skb)->saddr; saddr = fib_compute_spec_dst(skb); ipc.opt = NULL; @@ -364,6 +366,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) memset(&fl4, 0, sizeof(fl4)); fl4.daddr = daddr; fl4.saddr = saddr; + fl4.flowi4_mark = mark; fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_proto = IPPROTO_ICMP; security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); @@ -382,7 +385,7 @@ static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4, struct sk_buff *skb_in, const struct iphdr *iph, - __be32 saddr, u8 tos, + __be32 saddr, u8 tos, u32 mark, int type, int code, struct icmp_bxm *param) { @@ -394,6 +397,7 @@ static struct rtable *icmp_route_lookup(struct net *net, fl4->daddr = (param->replyopts.opt.opt.srr ? param->replyopts.opt.opt.faddr : iph->saddr); fl4->saddr = saddr; + fl4->flowi4_mark = mark; fl4->flowi4_tos = RT_TOS(tos); fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; @@ -491,6 +495,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) struct flowi4 fl4; __be32 saddr; u8 tos; + u32 mark; struct net *net; struct sock *sk; @@ -592,6 +597,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) | IPTOS_PREC_INTERNETCONTROL) : iph->tos; + mark = IP4_REPLY_MARK(net, skb_in->mark); if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in)) goto out_unlock; @@ -608,13 +614,14 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) icmp_param->skb = skb_in; icmp_param->offset = skb_network_offset(skb_in); inet_sk(sk)->tos = tos; + sk->sk_mark = mark; ipc.addr = iph->saddr; ipc.opt = &icmp_param->replyopts.opt; ipc.tx_flags = 0; ipc.ttl = 0; ipc.tos = -1; - rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, + rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark, type, code, icmp_param); if (IS_ERR(rt)) goto out_unlock; @@ -656,16 +663,16 @@ static void icmp_socket_deliver(struct sk_buff *skb, u32 info) /* Checkin full IP header plus 8 bytes of protocol to * avoid additional coding at protocol handlers. */ - if (!pskb_may_pull(skb, iph->ihl * 4 + 8)) + if (!pskb_may_pull(skb, iph->ihl * 4 + 8)) { + ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS); return; + } raw_icmp_error(skb, protocol, info); - rcu_read_lock(); ipprot = rcu_dereference(inet_protos[protocol]); if (ipprot && ipprot->err_handler) ipprot->err_handler(skb, info); - rcu_read_unlock(); } static bool icmp_tag_validation(int proto) @@ -732,8 +739,6 @@ static void icmp_unreach(struct sk_buff *skb) /* fall through */ case 0: info = ntohs(icmph->un.frag.mtu); - if (!info) - goto out; } break; case ICMP_SR_FAILED: @@ -908,16 +913,8 @@ int icmp_rcv(struct sk_buff *skb) ICMP_INC_STATS_BH(net, ICMP_MIB_INMSGS); - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - if (!csum_fold(skb->csum)) - break; - /* fall through */ - case CHECKSUM_NONE: - skb->csum = 0; - if (__skb_checksum_complete(skb)) - goto csum_error; - } + if (skb_checksum_simple_validate(skb)) + goto csum_error; if (!pskb_pull(skb, sizeof(*icmph))) goto error; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 97e4d1655d26..f10eab462282 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -369,7 +369,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) pip->saddr = fl4.saddr; pip->protocol = IPPROTO_IGMP; pip->tot_len = 0; /* filled in later */ - ip_select_ident(skb, &rt->dst, NULL); + ip_select_ident(skb, NULL); ((u8 *)&pip[1])[0] = IPOPT_RA; ((u8 *)&pip[1])[1] = 4; ((u8 *)&pip[1])[2] = 0; @@ -714,7 +714,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, iph->daddr = dst; iph->saddr = fl4.saddr; iph->protocol = IPPROTO_IGMP; - ip_select_ident(skb, &rt->dst, NULL); + ip_select_ident(skb, NULL); ((u8 *)&iph[1])[0] = IPOPT_RA; ((u8 *)&iph[1])[1] = 4; ((u8 *)&iph[1])[2] = 0; @@ -988,16 +988,8 @@ int igmp_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(struct igmphdr))) goto drop; - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - if (!csum_fold(skb->csum)) - break; - /* fall through */ - case CHECKSUM_NONE: - skb->csum = 0; - if (__skb_checksum_complete(skb)) - goto drop; - } + if (skb_checksum_simple_validate(skb)) + goto drop; ih = igmp_hdr(skb); switch (ih->type) { @@ -1329,7 +1321,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) atomic_set(&im->refcnt, 1); spin_lock_init(&im->lock); #ifdef CONFIG_IP_MULTICAST - setup_timer(&im->timer, &igmp_timer_expire, (unsigned long)im); + setup_timer(&im->timer, igmp_timer_expire, (unsigned long)im); im->unsolicit_count = IGMP_Unsolicited_Report_Count; #endif @@ -1952,6 +1944,10 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) rtnl_lock(); in_dev = ip_mc_find_dev(net, imr); + if (!in_dev) { + ret = -ENODEV; + goto out; + } ifindex = imr->imr_ifindex; for (imlp = &inet->mc_list; (iml = rtnl_dereference(*imlp)) != NULL; @@ -1969,16 +1965,14 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) *imlp = iml->next_rcu; - if (in_dev) - ip_mc_dec_group(in_dev, group); + ip_mc_dec_group(in_dev, group); rtnl_unlock(); /* decrease mem now to avoid the memleak warning */ atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); kfree_rcu(iml, rcu); return 0; } - if (!in_dev) - ret = -ENODEV; +out: rtnl_unlock(); return ret; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 0d1e2cb877ec..14d02ea905b6 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -29,19 +29,16 @@ const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; EXPORT_SYMBOL(inet_csk_timer_bug_msg); #endif -unsigned long *sysctl_local_reserved_ports; -EXPORT_SYMBOL(sysctl_local_reserved_ports); - void inet_get_local_port_range(struct net *net, int *low, int *high) { unsigned int seq; do { - seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock); + seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); - *low = net->ipv4.sysctl_local_ports.range[0]; - *high = net->ipv4.sysctl_local_ports.range[1]; - } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq)); + *low = net->ipv4.ip_local_ports.range[0]; + *high = net->ipv4.ip_local_ports.range[1]; + } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq)); } EXPORT_SYMBOL(inet_get_local_port_range); @@ -113,7 +110,7 @@ again: smallest_size = -1; do { - if (inet_is_reserved_local_port(rover)) + if (inet_is_local_reserved_port(net, rover)) goto next_nolock; head = &hashinfo->bhash[inet_bhashfn(net, rover, hashinfo->bhash_size)]; @@ -408,7 +405,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, struct net *net = sock_net(sk); int flags = inet_sk_flowi_flags(sk); - flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, + flowi4_init_output(fl4, sk->sk_bound_dev_if, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, flags, @@ -445,7 +442,7 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, rcu_read_lock(); opt = rcu_dereference(newinet->inet_opt); - flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, + flowi4_init_output(fl4, sk->sk_bound_dev_if, inet_rsk(req)->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, @@ -680,6 +677,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num); newsk->sk_write_space = sk_stream_write_space; + newsk->sk_mark = inet_rsk(req)->ir_mark; + newicsk->icsk_retransmits = 0; newicsk->icsk_backoff = 0; newicsk->icsk_probes_out = 0; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 3b01959bf4bb..9eb89f3f0ee4 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -25,6 +25,12 @@ #include <net/inet_frag.h> #include <net/inet_ecn.h> +#define INETFRAGS_EVICT_BUCKETS 128 +#define INETFRAGS_EVICT_MAX 512 + +/* don't rebuild inetfrag table with new secret more often than this */ +#define INETFRAGS_MIN_REBUILD_INTERVAL (5 * HZ) + /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements * Value : 0xff if frame should be dropped. * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field @@ -46,24 +52,39 @@ const u8 ip_frag_ecn_table[16] = { }; EXPORT_SYMBOL(ip_frag_ecn_table); -static void inet_frag_secret_rebuild(unsigned long dummy) +static unsigned int +inet_frag_hashfn(const struct inet_frags *f, const struct inet_frag_queue *q) +{ + return f->hashfn(q) & (INETFRAGS_HASHSZ - 1); +} + +static bool inet_frag_may_rebuild(struct inet_frags *f) +{ + return time_after(jiffies, + f->last_rebuild_jiffies + INETFRAGS_MIN_REBUILD_INTERVAL); +} + +static void inet_frag_secret_rebuild(struct inet_frags *f) { - struct inet_frags *f = (struct inet_frags *)dummy; - unsigned long now = jiffies; int i; - /* Per bucket lock NOT needed here, due to write lock protection */ - write_lock(&f->lock); + write_seqlock_bh(&f->rnd_seqlock); + + if (!inet_frag_may_rebuild(f)) + goto out; get_random_bytes(&f->rnd, sizeof(u32)); + for (i = 0; i < INETFRAGS_HASHSZ; i++) { struct inet_frag_bucket *hb; struct inet_frag_queue *q; struct hlist_node *n; hb = &f->hash[i]; + spin_lock(&hb->chain_lock); + hlist_for_each_entry_safe(q, n, &hb->chain, list) { - unsigned int hval = f->hashfn(q); + unsigned int hval = inet_frag_hashfn(f, q); if (hval != i) { struct inet_frag_bucket *hb_dest; @@ -72,76 +93,200 @@ static void inet_frag_secret_rebuild(unsigned long dummy) /* Relink to new hash chain. */ hb_dest = &f->hash[hval]; + + /* This is the only place where we take + * another chain_lock while already holding + * one. As this will not run concurrently, + * we cannot deadlock on hb_dest lock below, if its + * already locked it will be released soon since + * other caller cannot be waiting for hb lock + * that we've taken above. + */ + spin_lock_nested(&hb_dest->chain_lock, + SINGLE_DEPTH_NESTING); hlist_add_head(&q->list, &hb_dest->chain); + spin_unlock(&hb_dest->chain_lock); } } + spin_unlock(&hb->chain_lock); + } + + f->rebuild = false; + f->last_rebuild_jiffies = jiffies; +out: + write_sequnlock_bh(&f->rnd_seqlock); +} + +static bool inet_fragq_should_evict(const struct inet_frag_queue *q) +{ + return q->net->low_thresh == 0 || + frag_mem_limit(q->net) >= q->net->low_thresh; +} + +static unsigned int +inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb) +{ + struct inet_frag_queue *fq; + struct hlist_node *n; + unsigned int evicted = 0; + HLIST_HEAD(expired); + +evict_again: + spin_lock(&hb->chain_lock); + + hlist_for_each_entry_safe(fq, n, &hb->chain, list) { + if (!inet_fragq_should_evict(fq)) + continue; + + if (!del_timer(&fq->timer)) { + /* q expiring right now thus increment its refcount so + * it won't be freed under us and wait until the timer + * has finished executing then destroy it + */ + atomic_inc(&fq->refcnt); + spin_unlock(&hb->chain_lock); + del_timer_sync(&fq->timer); + WARN_ON(atomic_read(&fq->refcnt) != 1); + inet_frag_put(fq, f); + goto evict_again; + } + + fq->flags |= INET_FRAG_EVICTED; + hlist_del(&fq->list); + hlist_add_head(&fq->list, &expired); + ++evicted; } - write_unlock(&f->lock); - mod_timer(&f->secret_timer, now + f->secret_interval); + spin_unlock(&hb->chain_lock); + + hlist_for_each_entry_safe(fq, n, &expired, list) + f->frag_expire((unsigned long) fq); + + return evicted; } -void inet_frags_init(struct inet_frags *f) +static void inet_frag_worker(struct work_struct *work) +{ + unsigned int budget = INETFRAGS_EVICT_BUCKETS; + unsigned int i, evicted = 0; + struct inet_frags *f; + + f = container_of(work, struct inet_frags, frags_work); + + BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ); + + local_bh_disable(); + + for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) { + evicted += inet_evict_bucket(f, &f->hash[i]); + i = (i + 1) & (INETFRAGS_HASHSZ - 1); + if (evicted > INETFRAGS_EVICT_MAX) + break; + } + + f->next_bucket = i; + + local_bh_enable(); + + if (f->rebuild && inet_frag_may_rebuild(f)) + inet_frag_secret_rebuild(f); +} + +static void inet_frag_schedule_worker(struct inet_frags *f) +{ + if (unlikely(!work_pending(&f->frags_work))) + schedule_work(&f->frags_work); +} + +int inet_frags_init(struct inet_frags *f) { int i; + INIT_WORK(&f->frags_work, inet_frag_worker); + for (i = 0; i < INETFRAGS_HASHSZ; i++) { struct inet_frag_bucket *hb = &f->hash[i]; spin_lock_init(&hb->chain_lock); INIT_HLIST_HEAD(&hb->chain); } - rwlock_init(&f->lock); - setup_timer(&f->secret_timer, inet_frag_secret_rebuild, - (unsigned long)f); - f->secret_timer.expires = jiffies + f->secret_interval; - add_timer(&f->secret_timer); + seqlock_init(&f->rnd_seqlock); + f->last_rebuild_jiffies = 0; + f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0, + NULL); + if (!f->frags_cachep) + return -ENOMEM; + + return 0; } EXPORT_SYMBOL(inet_frags_init); void inet_frags_init_net(struct netns_frags *nf) { - nf->nqueues = 0; init_frag_mem_limit(nf); - INIT_LIST_HEAD(&nf->lru_list); - spin_lock_init(&nf->lru_lock); } EXPORT_SYMBOL(inet_frags_init_net); void inet_frags_fini(struct inet_frags *f) { - del_timer(&f->secret_timer); + cancel_work_sync(&f->frags_work); + kmem_cache_destroy(f->frags_cachep); } EXPORT_SYMBOL(inet_frags_fini); void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f) { - nf->low_thresh = 0; + unsigned int seq; + int i; + nf->low_thresh = 0; local_bh_disable(); - inet_frag_evictor(nf, f, true); + +evict_again: + seq = read_seqbegin(&f->rnd_seqlock); + + for (i = 0; i < INETFRAGS_HASHSZ ; i++) + inet_evict_bucket(f, &f->hash[i]); + + if (read_seqretry(&f->rnd_seqlock, seq)) + goto evict_again; + local_bh_enable(); percpu_counter_destroy(&nf->mem); } EXPORT_SYMBOL(inet_frags_exit_net); -static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) +static struct inet_frag_bucket * +get_frag_bucket_locked(struct inet_frag_queue *fq, struct inet_frags *f) +__acquires(hb->chain_lock) { struct inet_frag_bucket *hb; - unsigned int hash; + unsigned int seq, hash; + + restart: + seq = read_seqbegin(&f->rnd_seqlock); - read_lock(&f->lock); - hash = f->hashfn(fq); + hash = inet_frag_hashfn(f, fq); hb = &f->hash[hash]; spin_lock(&hb->chain_lock); + if (read_seqretry(&f->rnd_seqlock, seq)) { + spin_unlock(&hb->chain_lock); + goto restart; + } + + return hb; +} + +static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) +{ + struct inet_frag_bucket *hb; + + hb = get_frag_bucket_locked(fq, f); hlist_del(&fq->list); spin_unlock(&hb->chain_lock); - - read_unlock(&f->lock); - inet_frag_lru_del(fq); } void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) @@ -149,30 +294,29 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) if (del_timer(&fq->timer)) atomic_dec(&fq->refcnt); - if (!(fq->last_in & INET_FRAG_COMPLETE)) { + if (!(fq->flags & INET_FRAG_COMPLETE)) { fq_unlink(fq, f); atomic_dec(&fq->refcnt); - fq->last_in |= INET_FRAG_COMPLETE; + fq->flags |= INET_FRAG_COMPLETE; } } EXPORT_SYMBOL(inet_frag_kill); static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f, - struct sk_buff *skb) + struct sk_buff *skb) { if (f->skb_free) f->skb_free(skb); kfree_skb(skb); } -void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, - int *work) +void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f) { struct sk_buff *fp; struct netns_frags *nf; unsigned int sum, sum_truesize = 0; - WARN_ON(!(q->last_in & INET_FRAG_COMPLETE)); + WARN_ON(!(q->flags & INET_FRAG_COMPLETE)); WARN_ON(del_timer(&q->timer) != 0); /* Release all fragment data. */ @@ -186,87 +330,32 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, fp = xp; } sum = sum_truesize + f->qsize; - if (work) - *work -= sum; sub_frag_mem_limit(q, sum); if (f->destructor) f->destructor(q); - kfree(q); - + kmem_cache_free(f->frags_cachep, q); } EXPORT_SYMBOL(inet_frag_destroy); -int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force) -{ - struct inet_frag_queue *q; - int work, evicted = 0; - - if (!force) { - if (frag_mem_limit(nf) <= nf->high_thresh) - return 0; - } - - work = frag_mem_limit(nf) - nf->low_thresh; - while (work > 0 || force) { - spin_lock(&nf->lru_lock); - - if (list_empty(&nf->lru_list)) { - spin_unlock(&nf->lru_lock); - break; - } - - q = list_first_entry(&nf->lru_list, - struct inet_frag_queue, lru_list); - atomic_inc(&q->refcnt); - /* Remove q from list to avoid several CPUs grabbing it */ - list_del_init(&q->lru_list); - - spin_unlock(&nf->lru_lock); - - spin_lock(&q->lock); - if (!(q->last_in & INET_FRAG_COMPLETE)) - inet_frag_kill(q, f); - spin_unlock(&q->lock); - - if (atomic_dec_and_test(&q->refcnt)) - inet_frag_destroy(q, f, &work); - evicted++; - } - - return evicted; -} -EXPORT_SYMBOL(inet_frag_evictor); - static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, - struct inet_frag_queue *qp_in, struct inet_frags *f, - void *arg) + struct inet_frag_queue *qp_in, + struct inet_frags *f, + void *arg) { - struct inet_frag_bucket *hb; + struct inet_frag_bucket *hb = get_frag_bucket_locked(qp_in, f); struct inet_frag_queue *qp; - unsigned int hash; - - read_lock(&f->lock); /* Protects against hash rebuild */ - /* - * While we stayed w/o the lock other CPU could update - * the rnd seed, so we need to re-calculate the hash - * chain. Fortunatelly the qp_in can be used to get one. - */ - hash = f->hashfn(qp_in); - hb = &f->hash[hash]; - spin_lock(&hb->chain_lock); #ifdef CONFIG_SMP /* With SMP race we have to recheck hash table, because - * such entry could be created on other cpu, while we - * released the hash bucket lock. + * such entry could have been created on other cpu before + * we acquired hash bucket lock. */ hlist_for_each_entry(qp, &hb->chain, list) { if (qp->net == nf && f->match(qp, arg)) { atomic_inc(&qp->refcnt); spin_unlock(&hb->chain_lock); - read_unlock(&f->lock); - qp_in->last_in |= INET_FRAG_COMPLETE; + qp_in->flags |= INET_FRAG_COMPLETE; inet_frag_put(qp_in, f); return qp; } @@ -278,19 +367,24 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, atomic_inc(&qp->refcnt); hlist_add_head(&qp->list, &hb->chain); - inet_frag_lru_add(nf, qp); + spin_unlock(&hb->chain_lock); - read_unlock(&f->lock); return qp; } static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, - struct inet_frags *f, void *arg) + struct inet_frags *f, + void *arg) { struct inet_frag_queue *q; - q = kzalloc(f->qsize, GFP_ATOMIC); + if (frag_mem_limit(nf) > nf->high_thresh) { + inet_frag_schedule_worker(f); + return NULL; + } + + q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC); if (q == NULL) return NULL; @@ -301,13 +395,13 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, setup_timer(&q->timer, f->frag_expire, (unsigned long)q); spin_lock_init(&q->lock); atomic_set(&q->refcnt, 1); - INIT_LIST_HEAD(&q->lru_list); return q; } static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, - struct inet_frags *f, void *arg) + struct inet_frags *f, + void *arg) { struct inet_frag_queue *q; @@ -319,13 +413,17 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, } struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, - struct inet_frags *f, void *key, unsigned int hash) - __releases(&f->lock) + struct inet_frags *f, void *key, + unsigned int hash) { struct inet_frag_bucket *hb; struct inet_frag_queue *q; int depth = 0; + if (frag_mem_limit(nf) > nf->low_thresh) + inet_frag_schedule_worker(f); + + hash &= (INETFRAGS_HASHSZ - 1); hb = &f->hash[hash]; spin_lock(&hb->chain_lock); @@ -333,18 +431,22 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, if (q->net == nf && f->match(q, key)) { atomic_inc(&q->refcnt); spin_unlock(&hb->chain_lock); - read_unlock(&f->lock); return q; } depth++; } spin_unlock(&hb->chain_lock); - read_unlock(&f->lock); if (depth <= INETFRAGS_MAXDEPTH) return inet_frag_create(nf, f, key); - else - return ERR_PTR(-ENOBUFS); + + if (inet_frag_may_rebuild(f)) { + if (!f->rebuild) + f->rebuild = true; + inet_frag_schedule_worker(f); + } + + return ERR_PTR(-ENOBUFS); } EXPORT_SYMBOL(inet_frag_find); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 8b9cf279450d..43116e8c8e13 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -274,7 +274,7 @@ struct sock *__inet_lookup_established(struct net *net, const __be32 daddr, const u16 hnum, const int dif) { - INET_ADDR_COOKIE(acookie, saddr, daddr) + INET_ADDR_COOKIE(acookie, saddr, daddr); const __portpair ports = INET_COMBINED_PORTS(sport, hnum); struct sock *sk; const struct hlist_nulls_node *node; @@ -327,7 +327,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, __be32 daddr = inet->inet_rcv_saddr; __be32 saddr = inet->inet_daddr; int dif = sk->sk_bound_dev_if; - INET_ADDR_COOKIE(acookie, saddr, daddr) + INET_ADDR_COOKIE(acookie, saddr, daddr); const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); struct net *net = sock_net(sk); unsigned int hash = inet_ehashfn(net, daddr, lport, @@ -500,7 +500,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, local_bh_disable(); for (i = 1; i <= remaining; i++) { port = low + (i + offset) % remaining; - if (inet_is_reserved_local_port(port)) + if (inet_is_local_reserved_port(net, port)) continue; head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)]; diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 48f424465112..bd5f5928167d 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -26,20 +26,7 @@ * Theory of operations. * We keep one entry for each peer IP address. The nodes contains long-living * information about the peer which doesn't depend on routes. - * At this moment this information consists only of ID field for the next - * outgoing IP packet. This field is incremented with each packet as encoded - * in inet_getid() function (include/net/inetpeer.h). - * At the moment of writing this notes identifier of IP packets is generated - * to be unpredictable using this code only for packets subjected - * (actually or potentially) to defragmentation. I.e. DF packets less than - * PMTU in size when local fragmentation is disabled use a constant ID and do - * not use this code (see ip_select_ident() in include/net/ip.h). * - * Route cache entries hold references to our nodes. - * New cache entries get references via lookup by destination IP address in - * the avl tree. The reference is grabbed only when it's needed i.e. only - * when we try to output IP packet which needs an unpredictable ID (see - * __ip_select_ident() in net/ipv4/route.c). * Nodes are removed only when reference counter goes to 0. * When it's happened the node may be removed when a sufficient amount of * time has been passed since its last use. The less-recently-used entry can @@ -62,7 +49,6 @@ * refcnt: atomically against modifications on other CPU; * usually under some other lock to prevent node disappearing * daddr: unchangeable - * ip_id_count: atomic value (no lock needed) */ static struct kmem_cache *peer_cachep __read_mostly; @@ -120,7 +106,7 @@ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min static void inetpeer_gc_worker(struct work_struct *work) { struct inet_peer *p, *n, *c; - LIST_HEAD(list); + struct list_head list; spin_lock_bh(&gc_lock); list_replace_init(&gc_list, &list); @@ -497,10 +483,6 @@ relookup: p->daddr = *daddr; atomic_set(&p->refcnt, 1); atomic_set(&p->rid, 0); - atomic_set(&p->ip_id_count, - (daddr->family == AF_INET) ? - secure_ip_id(daddr->addr.a4) : - secure_ipv6_id(daddr->addr.a6)); p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; /* 60*HZ is arbitrary, but chosen enough high so that the first @@ -522,7 +504,7 @@ EXPORT_SYMBOL_GPL(inet_getpeer); void inet_putpeer(struct inet_peer *p) { p->dtime = (__u32)jiffies; - smp_mb__before_atomic_dec(); + smp_mb__before_atomic(); atomic_dec(&p->refcnt); } EXPORT_SYMBOL_GPL(inet_putpeer); diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index be8abe73bb9f..3a83ce5efa80 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -42,12 +42,12 @@ static bool ip_may_fragment(const struct sk_buff *skb) { return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) || - !skb->local_df; + skb->ignore_df; } static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) { - if (skb->len <= mtu || skb->local_df) + if (skb->len <= mtu) return false; if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) @@ -56,53 +56,6 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) return true; } -static bool ip_gso_exceeds_dst_mtu(const struct sk_buff *skb) -{ - unsigned int mtu; - - if (skb->local_df || !skb_is_gso(skb)) - return false; - - mtu = ip_dst_mtu_maybe_forward(skb_dst(skb), true); - - /* if seglen > mtu, do software segmentation for IP fragmentation on - * output. DF bit cannot be set since ip_forward would have sent - * icmp error. - */ - return skb_gso_network_seglen(skb) > mtu; -} - -/* called if GSO skb needs to be fragmented on forward */ -static int ip_forward_finish_gso(struct sk_buff *skb) -{ - struct dst_entry *dst = skb_dst(skb); - netdev_features_t features; - struct sk_buff *segs; - int ret = 0; - - features = netif_skb_dev_features(skb, dst->dev); - segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); - if (IS_ERR(segs)) { - kfree_skb(skb); - return -ENOMEM; - } - - consume_skb(skb); - - do { - struct sk_buff *nskb = segs->next; - int err; - - segs->next = NULL; - err = dst_output(segs); - - if (err && ret == 0) - ret = err; - segs = nskb; - } while (segs); - - return ret; -} static int ip_forward_finish(struct sk_buff *skb) { @@ -114,9 +67,6 @@ static int ip_forward_finish(struct sk_buff *skb) if (unlikely(opt->optlen)) ip_forward_options(skb); - if (ip_gso_exceeds_dst_mtu(skb)) - return ip_forward_finish_gso(skb); - return dst_output(skb); } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index c10a3ce5cbff..15f0e2bad7ad 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -55,6 +55,7 @@ */ static int sysctl_ipfrag_max_dist __read_mostly = 64; +static const char ip_frag_cache_name[] = "ip4-frags"; struct ipfrag_skb_cb { @@ -86,11 +87,6 @@ static inline u8 ip4_frag_ecn(u8 tos) static struct inet_frags ip4_frags; -int ip_frag_nqueues(struct net *net) -{ - return net->ipv4.frags.nqueues; -} - int ip_frag_mem(struct net *net) { return sum_frag_mem_limit(&net->ipv4.frags); @@ -109,21 +105,21 @@ static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot) net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd)); return jhash_3words((__force u32)id << 16 | prot, (__force u32)saddr, (__force u32)daddr, - ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1); + ip4_frags.rnd); } -static unsigned int ip4_hashfn(struct inet_frag_queue *q) +static unsigned int ip4_hashfn(const struct inet_frag_queue *q) { - struct ipq *ipq; + const struct ipq *ipq; ipq = container_of(q, struct ipq, q); return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol); } -static bool ip4_frag_match(struct inet_frag_queue *q, void *a) +static bool ip4_frag_match(const struct inet_frag_queue *q, const void *a) { - struct ipq *qp; - struct ip4_create_arg *arg = a; + const struct ipq *qp; + const struct ip4_create_arg *arg = a; qp = container_of(q, struct ipq, q); return qp->id == arg->iph->id && @@ -133,14 +129,14 @@ static bool ip4_frag_match(struct inet_frag_queue *q, void *a) qp->user == arg->user; } -static void ip4_frag_init(struct inet_frag_queue *q, void *a) +static void ip4_frag_init(struct inet_frag_queue *q, const void *a) { struct ipq *qp = container_of(q, struct ipq, q); struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4, frags); struct net *net = container_of(ipv4, struct net, ipv4); - struct ip4_create_arg *arg = a; + const struct ip4_create_arg *arg = a; qp->protocol = arg->iph->protocol; qp->id = arg->iph->id; @@ -177,18 +173,6 @@ static void ipq_kill(struct ipq *ipq) inet_frag_kill(&ipq->q, &ip4_frags); } -/* Memory limiting on fragments. Evictor trashes the oldest - * fragment queue until we are back under the threshold. - */ -static void ip_evictor(struct net *net) -{ - int evicted; - - evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags, false); - if (evicted) - IP_ADD_STATS_BH(net, IPSTATS_MIB_REASMFAILS, evicted); -} - /* * Oops, a fragment queue timed out. Kill it and send an ICMP reply. */ @@ -202,19 +186,22 @@ static void ip_expire(unsigned long arg) spin_lock(&qp->q.lock); - if (qp->q.last_in & INET_FRAG_COMPLETE) + if (qp->q.flags & INET_FRAG_COMPLETE) goto out; ipq_kill(qp); - - IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT); IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); - if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { + if (!(qp->q.flags & INET_FRAG_EVICTED)) { struct sk_buff *head = qp->q.fragments; const struct iphdr *iph; int err; + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT); + + if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments) + goto out; + rcu_read_lock(); head->dev = dev_get_by_index_rcu(net, qp->iif); if (!head->dev) @@ -227,16 +214,15 @@ static void ip_expire(unsigned long arg) if (err) goto out_rcu_unlock; - /* - * Only an end host needs to send an ICMP + /* Only an end host needs to send an ICMP * "Fragment Reassembly Timeout" message, per RFC792. */ if (qp->user == IP_DEFRAG_AF_PACKET || - (qp->user == IP_DEFRAG_CONNTRACK_IN && - skb_rtable(head)->rt_type != RTN_LOCAL)) + ((qp->user >= IP_DEFRAG_CONNTRACK_IN) && + (qp->user <= __IP_DEFRAG_CONNTRACK_IN_END) && + (skb_rtable(head)->rt_type != RTN_LOCAL))) goto out_rcu_unlock; - /* Send an ICMP "Fragment Reassembly Timeout" message. */ icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); out_rcu_unlock: @@ -259,7 +245,6 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user) arg.iph = iph; arg.user = user; - read_lock(&ip4_frags.lock); hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); @@ -318,7 +303,7 @@ static int ip_frag_reinit(struct ipq *qp) } while (fp); sub_frag_mem_limit(&qp->q, sum_truesize); - qp->q.last_in = 0; + qp->q.flags = 0; qp->q.len = 0; qp->q.meat = 0; qp->q.fragments = NULL; @@ -339,7 +324,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) int err = -ENOENT; u8 ecn; - if (qp->q.last_in & INET_FRAG_COMPLETE) + if (qp->q.flags & INET_FRAG_COMPLETE) goto err; if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && @@ -366,9 +351,9 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) * or have different end, the segment is corrupted. */ if (end < qp->q.len || - ((qp->q.last_in & INET_FRAG_LAST_IN) && end != qp->q.len)) + ((qp->q.flags & INET_FRAG_LAST_IN) && end != qp->q.len)) goto err; - qp->q.last_in |= INET_FRAG_LAST_IN; + qp->q.flags |= INET_FRAG_LAST_IN; qp->q.len = end; } else { if (end&7) { @@ -378,7 +363,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) } if (end > qp->q.len) { /* Some bits beyond end -> corruption. */ - if (qp->q.last_in & INET_FRAG_LAST_IN) + if (qp->q.flags & INET_FRAG_LAST_IN) goto err; qp->q.len = end; } @@ -487,13 +472,13 @@ found: qp->ecn |= ecn; add_frag_mem_limit(&qp->q, skb->truesize); if (offset == 0) - qp->q.last_in |= INET_FRAG_FIRST_IN; + qp->q.flags |= INET_FRAG_FIRST_IN; if (ip_hdr(skb)->frag_off & htons(IP_DF) && skb->len + ihl > qp->q.max_size) qp->q.max_size = skb->len + ihl; - if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && + if (qp->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && qp->q.meat == qp->q.len) { unsigned long orefdst = skb->_skb_refdst; @@ -504,7 +489,6 @@ found: } skb_dst_drop(skb); - inet_frag_lru_move(&qp->q); return -EINPROGRESS; err: @@ -654,9 +638,6 @@ int ip_defrag(struct sk_buff *skb, u32 user) net = skb->dev ? dev_net(skb->dev) : dev_net(skb_dst(skb)->dev); IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS); - /* Start by cleaning up the memory. */ - ip_evictor(net); - /* Lookup (or create) queue header */ if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) { int ret; @@ -720,14 +701,17 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { .data = &init_net.ipv4.frags.high_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &init_net.ipv4.frags.low_thresh }, { .procname = "ipfrag_low_thresh", .data = &init_net.ipv4.frags.low_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &init_net.ipv4.frags.high_thresh }, { .procname = "ipfrag_time", @@ -739,10 +723,12 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { { } }; +/* secret interval has been deprecated */ +static int ip4_frags_secret_interval_unused; static struct ctl_table ip4_frags_ctl_table[] = { { .procname = "ipfrag_secret_interval", - .data = &ip4_frags.secret_interval, + .data = &ip4_frags_secret_interval_unused, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -770,7 +756,10 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net) goto err_alloc; table[0].data = &net->ipv4.frags.high_thresh; + table[0].extra1 = &net->ipv4.frags.low_thresh; + table[0].extra2 = &init_net.ipv4.frags.high_thresh; table[1].data = &net->ipv4.frags.low_thresh; + table[1].extra2 = &net->ipv4.frags.high_thresh; table[2].data = &net->ipv4.frags.timeout; /* Don't export sysctls to unprivileged users */ @@ -872,6 +861,7 @@ void __init ipfrag_init(void) ip4_frags.qsize = sizeof(struct ipq); ip4_frags.match = ip4_frag_match; ip4_frags.frag_expire = ip_expire; - ip4_frags.secret_interval = 10 * 60 * HZ; - inet_frags_init(&ip4_frags); + ip4_frags.frags_cache_name = ip_frag_cache_name; + if (inet_frags_init(&ip4_frags)) + panic("IP: failed to allocate ip4_frags cache\n"); } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 94213c891565..9b842544aea3 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -410,7 +410,7 @@ static int ipgre_open(struct net_device *dev) struct flowi4 fl4; struct rtable *rt; - rt = ip_route_output_gre(dev_net(dev), &fl4, + rt = ip_route_output_gre(t->net, &fl4, t->parms.iph.daddr, t->parms.iph.saddr, t->parms.o_key, @@ -434,7 +434,7 @@ static int ipgre_close(struct net_device *dev) if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { struct in_device *in_dev; - in_dev = inetdev_by_index(dev_net(dev), t->mlink); + in_dev = inetdev_by_index(t->net, t->mlink); if (in_dev) ip_mc_dec_group(in_dev, t->parms.iph.daddr); } @@ -478,7 +478,7 @@ static void __gre_tunnel_init(struct net_device *dev) dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4; - dev->features |= NETIF_F_NETNS_LOCAL | GRE_FEATURES; + dev->features |= GRE_FEATURES; dev->hw_features |= GRE_FEATURES; if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { @@ -649,6 +649,7 @@ static void ipgre_tap_setup(struct net_device *dev) { ether_setup(dev); dev->netdev_ops = &gre_tap_netdev_ops; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; ip_tunnel_setup(dev, gre_tap_net_id); } diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index f4ab72e19af9..ad382499bace 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -288,6 +288,10 @@ int ip_options_compile(struct net *net, optptr++; continue; } + if (unlikely(l < 2)) { + pp_ptr = optptr; + goto error; + } optlen = optptr[1]; if (optlen < 2 || optlen > l) { pp_ptr = optptr; @@ -364,7 +368,7 @@ int ip_options_compile(struct net *net, } if (optptr[2] <= optlen) { unsigned char *timeptr = NULL; - if (optptr[2]+3 > optptr[1]) { + if (optptr[2]+3 > optlen) { pp_ptr = optptr + 2; goto error; } @@ -376,7 +380,7 @@ int ip_options_compile(struct net *net, optptr[2] += 4; break; case IPOPT_TS_TSANDADDR: - if (optptr[2]+7 > optptr[1]) { + if (optptr[2]+7 > optlen) { pp_ptr = optptr + 2; goto error; } @@ -390,7 +394,7 @@ int ip_options_compile(struct net *net, optptr[2] += 8; break; case IPOPT_TS_PRESPEC: - if (optptr[2]+7 > optptr[1]) { + if (optptr[2]+7 > optlen) { pp_ptr = optptr + 2; goto error; } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 1cbeba5edff9..215af2b155cb 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -148,7 +148,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); iph->saddr = saddr; iph->protocol = sk->sk_protocol; - ip_select_ident(skb, &rt->dst, sk); + ip_select_ident(skb, sk); if (opt && opt->opt.optlen) { iph->ihl += opt->opt.optlen>>2; @@ -211,6 +211,48 @@ static inline int ip_finish_output2(struct sk_buff *skb) return -EINVAL; } +static int ip_finish_output_gso(struct sk_buff *skb) +{ + netdev_features_t features; + struct sk_buff *segs; + int ret = 0; + + /* common case: locally created skb or seglen is <= mtu */ + if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) || + skb_gso_network_seglen(skb) <= ip_skb_dst_mtu(skb)) + return ip_finish_output2(skb); + + /* Slowpath - GSO segment length is exceeding the dst MTU. + * + * This can happen in two cases: + * 1) TCP GRO packet, DF bit not set + * 2) skb arrived via virtio-net, we thus get TSO/GSO skbs directly + * from host network stack. + */ + features = netif_skb_features(skb); + segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); + if (IS_ERR(segs)) { + kfree_skb(skb); + return -ENOMEM; + } + + consume_skb(skb); + + do { + struct sk_buff *nskb = segs->next; + int err; + + segs->next = NULL; + err = ip_fragment(segs, ip_finish_output2); + + if (err && ret == 0) + ret = err; + segs = nskb; + } while (segs); + + return ret; +} + static int ip_finish_output(struct sk_buff *skb) { #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) @@ -220,10 +262,13 @@ static int ip_finish_output(struct sk_buff *skb) return dst_output(skb); } #endif - if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb)) + if (skb_is_gso(skb)) + return ip_finish_output_gso(skb); + + if (skb->len > ip_skb_dst_mtu(skb)) return ip_fragment(skb, ip_finish_output2); - else - return ip_finish_output2(skb); + + return ip_finish_output2(skb); } int ip_mc_output(struct sock *sk, struct sk_buff *skb) @@ -370,7 +415,7 @@ packet_routed: skb_reset_network_header(skb); iph = ip_hdr(skb); *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); - if (ip_dont_fragment(sk, &rt->dst) && !skb->local_df) + if (ip_dont_fragment(sk, &rt->dst) && !skb->ignore_df) iph->frag_off = htons(IP_DF); else iph->frag_off = 0; @@ -385,8 +430,7 @@ packet_routed: ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0); } - ip_select_ident_more(skb, &rt->dst, sk, - (skb_shinfo(skb)->gso_segs ?: 1) - 1); + ip_select_ident_segs(skb, sk, skb_shinfo(skb)->gso_segs ?: 1); /* TODO : should we use skb->sk here instead of sk ? */ skb->priority = sk->sk_priority; @@ -456,7 +500,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) iph = ip_hdr(skb); mtu = ip_skb_dst_mtu(skb); - if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->local_df) || + if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size > mtu))) { IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); @@ -811,17 +855,21 @@ static int __ip_append_data(struct sock *sk, unsigned int maxfraglen, fragheaderlen, maxnonfragsize; int csummode = CHECKSUM_NONE; struct rtable *rt = (struct rtable *)cork->dst; + u32 tskey = 0; skb = skb_peek_tail(queue); exthdrlen = !skb ? rt->dst.header_len : 0; mtu = cork->fragsize; + if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP && + sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) + tskey = sk->sk_tskey++; hh_len = LL_RESERVED_SPACE(rt->dst.dev); fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; - maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu; + maxnonfragsize = ip_sk_ignore_df(sk) ? 0xFFFF : mtu; if (cork->length + length > maxnonfragsize - fragheaderlen) { ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, @@ -918,10 +966,6 @@ alloc_new_skb: sk->sk_allocation); if (unlikely(skb == NULL)) err = -ENOBUFS; - else - /* only the initial fragment is - time stamped */ - cork->tx_flags = 0; } if (skb == NULL) goto error; @@ -932,7 +976,12 @@ alloc_new_skb: skb->ip_summed = csummode; skb->csum = 0; skb_reserve(skb, hh_len); + + /* only the initial fragment is time stamped */ skb_shinfo(skb)->tx_flags = cork->tx_flags; + cork->tx_flags = 0; + skb_shinfo(skb)->tskey = tskey; + tskey = 0; /* * Find where to start putting bytes. @@ -1144,7 +1193,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; - maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu; + maxnonfragsize = ip_sk_ignore_df(sk) ? 0xFFFF : mtu; if (cork->length + size > maxnonfragsize - fragheaderlen) { ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, @@ -1305,10 +1354,10 @@ struct sk_buff *__ip_make_skb(struct sock *sk, * to fragment the frame generated here. No matter, what transforms * how transforms change size of the packet, it will come out. */ - skb->local_df = ip_sk_local_df(sk); + skb->ignore_df = ip_sk_ignore_df(sk); /* DF bit is set when we want to see DF on outgoing frames. - * If local_df is set too, we still allow to fragment this frame + * If ignore_df is set too, we still allow to fragment this frame * locally. */ if (inet->pmtudisc == IP_PMTUDISC_DO || inet->pmtudisc == IP_PMTUDISC_PROBE || @@ -1334,7 +1383,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, iph->ttl = ttl; iph->protocol = sk->sk_protocol; ip_copy_addrs(iph, fl4); - ip_select_ident(skb, &rt->dst, sk); + ip_select_ident(skb, sk); if (opt) { iph->ihl += opt->optlen>>2; @@ -1501,7 +1550,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, daddr = replyopts.opt.opt.faddr; } - flowi4_init_output(&fl4, arg->bound_dev_if, 0, + flowi4_init_output(&fl4, arg->bound_dev_if, + IP4_REPLY_MARK(net, skb->mark), RT_TOS(arg->tos), RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol, ip_reply_arg_flowi_flags(arg), diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 64741b938632..5cb830c78990 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1319,7 +1319,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, if (sk->sk_type != SOCK_STREAM) return -ENOPROTOOPT; - msg.msg_control = optval; + msg.msg_control = (__force void *) optval; msg.msg_controllen = len; msg.msg_flags = flags; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index fa5b7519765f..afed1aac2638 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -69,28 +69,25 @@ static unsigned int ip_tunnel_hash(__be32 key, __be32 remote) } static void __tunnel_dst_set(struct ip_tunnel_dst *idst, - struct dst_entry *dst) + struct dst_entry *dst, __be32 saddr) { struct dst_entry *old_dst; - if (dst) { - if (dst->flags & DST_NOCACHE) - dst = NULL; - else - dst_clone(dst); - } + dst_clone(dst); old_dst = xchg((__force struct dst_entry **)&idst->dst, dst); dst_release(old_dst); + idst->saddr = saddr; } -static void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst) +static void tunnel_dst_set(struct ip_tunnel *t, + struct dst_entry *dst, __be32 saddr) { - __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst); + __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst, saddr); } static void tunnel_dst_reset(struct ip_tunnel *t) { - tunnel_dst_set(t, NULL); + tunnel_dst_set(t, NULL, 0); } void ip_tunnel_dst_reset_all(struct ip_tunnel *t) @@ -98,23 +95,29 @@ void ip_tunnel_dst_reset_all(struct ip_tunnel *t) int i; for_each_possible_cpu(i) - __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL); + __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0); } EXPORT_SYMBOL(ip_tunnel_dst_reset_all); -static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie) +static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, + u32 cookie, __be32 *saddr) { + struct ip_tunnel_dst *idst; struct dst_entry *dst; rcu_read_lock(); - dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst); + idst = this_cpu_ptr(t->dst_cache); + dst = rcu_dereference(idst->dst); + if (dst && !atomic_inc_not_zero(&dst->__refcnt)) + dst = NULL; if (dst) { - if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) { - rcu_read_unlock(); + if (!dst->obsolete || dst->ops->check(dst, cookie)) { + *saddr = idst->saddr; + } else { tunnel_dst_reset(t); - return NULL; + dst_release(dst); + dst = NULL; } - dst_hold(dst); } rcu_read_unlock(); return (struct rtable *)dst; @@ -173,6 +176,7 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, hlist_for_each_entry_rcu(t, head, hash_node) { if (remote != t->parms.iph.daddr || + t->parms.iph.saddr != 0 || !(t->dev->flags & IFF_UP)) continue; @@ -189,10 +193,11 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, head = &itn->tunnels[hash]; hlist_for_each_entry_rcu(t, head, hash_node) { - if ((local != t->parms.iph.saddr && - (local != t->parms.iph.daddr || - !ipv4_is_multicast(local))) || - !(t->dev->flags & IFF_UP)) + if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) && + (local != t->parms.iph.daddr || !ipv4_is_multicast(local))) + continue; + + if (!(t->dev->flags & IFF_UP)) continue; if (!ip_tunnel_key_match(&t->parms, flags, key)) @@ -209,6 +214,8 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, hlist_for_each_entry_rcu(t, head, hash_node) { if (t->parms.i_key != key || + t->parms.iph.saddr != 0 || + t->parms.iph.daddr != 0 || !(t->dev->flags & IFF_UP)) continue; @@ -268,6 +275,7 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, __be32 remote = parms->iph.daddr; __be32 local = parms->iph.saddr; __be32 key = parms->i_key; + __be16 flags = parms->i_flags; int link = parms->link; struct ip_tunnel *t = NULL; struct hlist_head *head = ip_bucket(itn, parms); @@ -275,9 +283,9 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, hlist_for_each_entry_rcu(t, head, hash_node) { if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr && - key == t->parms.i_key && link == t->parms.link && - type == t->dev->type) + type == t->dev->type && + ip_tunnel_key_match(&t->parms, flags, key)) break; } return t; @@ -304,7 +312,7 @@ static struct net_device *__ip_tunnel_create(struct net *net, } ASSERT_RTNL(); - dev = alloc_netdev(ops->priv_size, name, ops->setup); + dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup); if (!dev) { err = -ENOMEM; goto failed; @@ -366,7 +374,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev) if (!IS_ERR(rt)) { tdev = rt->dst.dev; - tunnel_dst_set(tunnel, &rt->dst); + tunnel_dst_set(tunnel, &rt->dst, fl4.saddr); ip_rt_put(rt); } if (dev->type != ARPHRD_ETHER) @@ -395,11 +403,10 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net, struct ip_tunnel_net *itn, struct ip_tunnel_parm *parms) { - struct ip_tunnel *nt, *fbt; + struct ip_tunnel *nt; struct net_device *dev; BUG_ON(!itn->fb_tunnel_dev); - fbt = netdev_priv(itn->fb_tunnel_dev); dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms); if (IS_ERR(dev)) return ERR_CAST(dev); @@ -442,6 +449,8 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, tunnel->i_seqno = ntohl(tpi->seq) + 1; } + skb_reset_network_header(skb); + err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { if (log_ecn_error) @@ -538,9 +547,10 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, unsigned int max_headroom; /* The extra header space needed */ __be32 dst; int err; - bool connected = true; + bool connected; inner_iph = (const struct iphdr *)skb_inner_network_header(skb); + connected = (tunnel->parms.iph.daddr != 0); dst = tnl_params->daddr; if (dst == 0) { @@ -607,7 +617,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link); - rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL; + rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL; if (!rt) { rt = ip_route_output_key(tunnel->net, &fl4); @@ -617,7 +627,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error; } if (connected) - tunnel_dst_set(tunnel, &rt->dst); + tunnel_dst_set(tunnel, &rt->dst, fl4.saddr); } if (rt->dst.dev == dev) { @@ -665,6 +675,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, dev->needed_headroom = max_headroom; if (skb_cow_head(skb, dev->needed_headroom)) { + ip_rt_put(rt); dev->stats.tx_dropped++; kfree_skb(skb); return; @@ -744,19 +755,19 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) goto done; if (p->iph.ttl) p->iph.frag_off |= htons(IP_DF); - if (!(p->i_flags&TUNNEL_KEY)) - p->i_key = 0; - if (!(p->o_flags&TUNNEL_KEY)) - p->o_key = 0; + if (!(p->i_flags & VTI_ISVTI)) { + if (!(p->i_flags & TUNNEL_KEY)) + p->i_key = 0; + if (!(p->o_flags & TUNNEL_KEY)) + p->o_key = 0; + } t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); if (!t && (cmd == SIOCADDTUNNEL)) { t = ip_tunnel_create(net, itn, p); - if (IS_ERR(t)) { - err = PTR_ERR(t); - break; - } + err = PTR_ERR_OR_ZERO(t); + break; } if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { if (t != NULL) { @@ -880,6 +891,7 @@ int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, */ if (!IS_ERR(itn->fb_tunnel_dev)) { itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; + itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev); ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); } rtnl_unlock(); diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index bcf206c79005..f4c987bb7e94 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -74,7 +74,7 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, iph->daddr = dst; iph->saddr = src; iph->ttl = ttl; - __ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1); + __ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1); err = ip_local_out_sk(sk, skb); if (unlikely(net_xmit_eval(err))) @@ -135,6 +135,14 @@ struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, return skb; } + /* If packet is not gso and we are resolving any partial checksum, + * clear encapsulation flag. This allows setting CHECKSUM_PARTIAL + * on the outer header without confusing devices that implement + * NETIF_F_IP_CSUM with encapsulation. + */ + if (csum_help) + skb->encapsulation = 0; + if (skb->ip_summed == CHECKSUM_PARTIAL && csum_help) { err = skb_checksum_help(skb); if (unlikely(err)) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index afcee51b90ed..e453cb724a95 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -239,6 +239,7 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) static int vti4_err(struct sk_buff *skb, u32 info) { __be32 spi; + __u32 mark; struct xfrm_state *x; struct ip_tunnel *tunnel; struct ip_esp_hdr *esph; @@ -254,6 +255,8 @@ static int vti4_err(struct sk_buff *skb, u32 info) if (!tunnel) return -1; + mark = be32_to_cpu(tunnel->parms.o_key); + switch (protocol) { case IPPROTO_ESP: esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2)); @@ -281,7 +284,7 @@ static int vti4_err(struct sk_buff *skb, u32 info) return 0; } - x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, + x = xfrm_state_lookup(net, mark, (const xfrm_address_t *)&iph->daddr, spi, protocol, AF_INET); if (!x) return 0; @@ -310,7 +313,13 @@ vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return -EINVAL; } - p.i_flags |= VTI_ISVTI; + if (!(p.i_flags & GRE_KEY)) + p.i_key = 0; + if (!(p.o_flags & GRE_KEY)) + p.o_key = 0; + + p.i_flags = VTI_ISVTI; + err = ip_tunnel_ioctl(dev, &p, cmd); if (err) return err; @@ -525,40 +534,28 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = { static int __init vti_init(void) { + const char *msg; int err; - pr_info("IPv4 over IPSec tunneling driver\n"); + pr_info("IPv4 over IPsec tunneling driver\n"); + msg = "tunnel device"; err = register_pernet_device(&vti_net_ops); if (err < 0) - return err; - err = xfrm4_protocol_register(&vti_esp4_protocol, IPPROTO_ESP); - if (err < 0) { - unregister_pernet_device(&vti_net_ops); - pr_info("vti init: can't register tunnel\n"); - - return err; - } + goto pernet_dev_failed; + msg = "tunnel protocols"; + err = xfrm4_protocol_register(&vti_esp4_protocol, IPPROTO_ESP); + if (err < 0) + goto xfrm_proto_esp_failed; err = xfrm4_protocol_register(&vti_ah4_protocol, IPPROTO_AH); - if (err < 0) { - xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); - unregister_pernet_device(&vti_net_ops); - pr_info("vti init: can't register tunnel\n"); - - return err; - } - + if (err < 0) + goto xfrm_proto_ah_failed; err = xfrm4_protocol_register(&vti_ipcomp4_protocol, IPPROTO_COMP); - if (err < 0) { - xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); - xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); - unregister_pernet_device(&vti_net_ops); - pr_info("vti init: can't register tunnel\n"); - - return err; - } + if (err < 0) + goto xfrm_proto_comp_failed; + msg = "netlink interface"; err = rtnl_link_register(&vti_link_ops); if (err < 0) goto rtnl_link_failed; @@ -567,23 +564,23 @@ static int __init vti_init(void) rtnl_link_failed: xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); +xfrm_proto_comp_failed: xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); +xfrm_proto_ah_failed: xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); +xfrm_proto_esp_failed: unregister_pernet_device(&vti_net_ops); +pernet_dev_failed: + pr_err("vti init: failed to register %s\n", msg); return err; } static void __exit vti_fini(void) { rtnl_link_unregister(&vti_link_ops); - if (xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP)) - pr_info("vti close: can't deregister tunnel\n"); - if (xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH)) - pr_info("vti close: can't deregister tunnel\n"); - if (xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP)) - pr_info("vti close: can't deregister tunnel\n"); - - + xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); + xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); + xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); unregister_pernet_device(&vti_net_ops); } diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index b3e86ea7b71b..5bbef4fdcb43 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -143,8 +143,6 @@ __be32 ic_servaddr = NONE; /* Boot server IP address */ __be32 root_server_addr = NONE; /* Address of NFS server */ u8 root_server_path[256] = { 0, }; /* Path to mount as root */ -__be32 ic_dev_xid; /* Device under configuration */ - /* vendor class identifier */ static char vendor_class_identifier[253] __initdata; @@ -654,6 +652,7 @@ static struct packet_type bootp_packet_type __initdata = { .func = ic_bootp_recv, }; +static __be32 ic_dev_xid; /* Device under configuration */ /* * Initialize DHCP/BOOTP extension fields in the request. @@ -1218,10 +1217,10 @@ static int __init ic_dynamic(void) get_random_bytes(&timeout, sizeof(timeout)); timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned int) CONF_TIMEOUT_RANDOM); for (;;) { +#ifdef IPCONFIG_BOOTP /* Track the device we are configuring */ ic_dev_xid = d->xid; -#ifdef IPCONFIG_BOOTP if (do_bootp && (d->able & IC_BOOTP)) ic_bootp_send_if(d, jiffies - start_jiffies); #endif diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 812b18351462..62eaa005e146 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -149,13 +149,13 @@ static int ipip_err(struct sk_buff *skb, u32 info) if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->dev->ifindex, 0, IPPROTO_IPIP, 0); + t->parms.link, 0, IPPROTO_IPIP, 0); err = 0; goto out; } if (type == ICMP_REDIRECT) { - ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0, + ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, IPPROTO_IPIP, 0); err = 0; goto out; @@ -486,4 +486,5 @@ static void __exit ipip_fini(void) module_init(ipip_init); module_exit(ipip_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS_RTNL_LINK("ipip"); MODULE_ALIAS_NETDEV("tunl0"); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index d84dc8d4c916..c8034587859d 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -484,7 +484,7 @@ static void reg_vif_setup(struct net_device *dev) dev->type = ARPHRD_PIMREG; dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; dev->flags = IFF_NOARP; - dev->netdev_ops = ®_vif_netdev_ops, + dev->netdev_ops = ®_vif_netdev_ops; dev->destructor = free_netdev; dev->features |= NETIF_F_NETNS_LOCAL; } @@ -500,7 +500,7 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) else sprintf(name, "pimreg%u", mrt->id); - dev = alloc_netdev(0, name, reg_vif_setup); + dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); if (dev == NULL) return NULL; @@ -1663,7 +1663,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) iph->protocol = IPPROTO_IPIP; iph->ihl = 5; iph->tot_len = htons(skb->len); - ip_select_ident(skb, skb_dst(skb), NULL); + ip_select_ident(skb, NULL); ip_send_check(iph); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index a26ce035e3fa..fb173126f03d 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -36,6 +36,16 @@ config NF_CONNTRACK_PROC_COMPAT If unsure, say Y. +config NF_LOG_ARP + tristate "ARP packet logging" + default m if NETFILTER_ADVANCED=n + select NF_LOG_COMMON + +config NF_LOG_IPV4 + tristate "IPv4 packet logging" + default m if NETFILTER_ADVANCED=n + select NF_LOG_COMMON + config NF_TABLES_IPV4 depends on NF_TABLES tristate "IPv4 nf_tables support" @@ -159,25 +169,6 @@ config IP_NF_TARGET_SYNPROXY To compile it as a module, choose M here. If unsure, say N. -config IP_NF_TARGET_ULOG - tristate "ULOG target support (obsolete)" - default m if NETFILTER_ADVANCED=n - ---help--- - - This option enables the old IPv4-only "ipt_ULOG" implementation - which has been obsoleted by the new "nfnetlink_log" code (see - CONFIG_NETFILTER_NETLINK_LOG). - - This option adds a `ULOG' target, which allows you to create rules in - any iptables table. The packet is passed to a userspace logging - daemon using netlink multicast sockets; unlike the LOG target - which can only be viewed through syslog. - - The appropriate userspace logging daemon (ulogd) may be obtained from - <http://www.netfilter.org/projects/ulogd/index.html> - - To compile it as a module, choose M here. If unsure, say N. - # NAT + specific targets: nf_conntrack config NF_NAT_IPV4 tristate "IPv4 NAT" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 90b82405331e..33001621465b 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -19,6 +19,10 @@ obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o # defrag obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o +# logging +obj-$(CONFIG_NF_LOG_ARP) += nf_log_arp.o +obj-$(CONFIG_NF_LOG_IPV4) += nf_log_ipv4.o + # NAT helpers (nf_conntrack) obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o @@ -53,7 +57,6 @@ obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o obj-$(CONFIG_IP_NF_TARGET_SYNPROXY) += ipt_SYNPROXY.o -obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o # generic ARP tables obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c deleted file mode 100644 index 9cb993cd224b..000000000000 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ /dev/null @@ -1,498 +0,0 @@ -/* - * netfilter module for userspace packet logging daemons - * - * (C) 2000-2004 by Harald Welte <laforge@netfilter.org> - * (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> - * (C) 2005-2007 Patrick McHardy <kaber@trash.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This module accepts two parameters: - * - * nlbufsiz: - * The parameter specifies how big the buffer for each netlink multicast - * group is. e.g. If you say nlbufsiz=8192, up to eight kb of packets will - * get accumulated in the kernel until they are sent to userspace. It is - * NOT possible to allocate more than 128kB, and it is strongly discouraged, - * because atomically allocating 128kB inside the network rx softirq is not - * reliable. Please also keep in mind that this buffer size is allocated for - * each nlgroup you are using, so the total kernel memory usage increases - * by that factor. - * - * Actually you should use nlbufsiz a bit smaller than PAGE_SIZE, since - * nlbufsiz is used with alloc_skb, which adds another - * sizeof(struct skb_shared_info). Use NLMSG_GOODSIZE instead. - * - * flushtimeout: - * Specify, after how many hundredths of a second the queue should be - * flushed even if it is not full yet. - */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <linux/module.h> -#include <linux/spinlock.h> -#include <linux/socket.h> -#include <linux/slab.h> -#include <linux/skbuff.h> -#include <linux/kernel.h> -#include <linux/timer.h> -#include <net/netlink.h> -#include <linux/netdevice.h> -#include <linux/mm.h> -#include <linux/moduleparam.h> -#include <linux/netfilter.h> -#include <linux/netfilter/x_tables.h> -#include <linux/netfilter_ipv4/ipt_ULOG.h> -#include <net/netfilter/nf_log.h> -#include <net/netns/generic.h> -#include <net/sock.h> -#include <linux/bitops.h> -#include <asm/unaligned.h> - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); -MODULE_DESCRIPTION("Xtables: packet logging to netlink using ULOG"); -MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG); - -#define ULOG_NL_EVENT 111 /* Harald's favorite number */ -#define ULOG_MAXNLGROUPS 32 /* numer of nlgroups */ - -static unsigned int nlbufsiz = NLMSG_GOODSIZE; -module_param(nlbufsiz, uint, 0400); -MODULE_PARM_DESC(nlbufsiz, "netlink buffer size"); - -static unsigned int flushtimeout = 10; -module_param(flushtimeout, uint, 0600); -MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths of a second)"); - -static bool nflog = true; -module_param(nflog, bool, 0400); -MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); - -/* global data structures */ - -typedef struct { - unsigned int qlen; /* number of nlmsgs' in the skb */ - struct nlmsghdr *lastnlh; /* netlink header of last msg in skb */ - struct sk_buff *skb; /* the pre-allocated skb */ - struct timer_list timer; /* the timer function */ -} ulog_buff_t; - -static int ulog_net_id __read_mostly; -struct ulog_net { - unsigned int nlgroup[ULOG_MAXNLGROUPS]; - ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS]; - struct sock *nflognl; - spinlock_t lock; -}; - -static struct ulog_net *ulog_pernet(struct net *net) -{ - return net_generic(net, ulog_net_id); -} - -/* send one ulog_buff_t to userspace */ -static void ulog_send(struct ulog_net *ulog, unsigned int nlgroupnum) -{ - ulog_buff_t *ub = &ulog->ulog_buffers[nlgroupnum]; - - pr_debug("ulog_send: timer is deleting\n"); - del_timer(&ub->timer); - - if (!ub->skb) { - pr_debug("ulog_send: nothing to send\n"); - return; - } - - /* last nlmsg needs NLMSG_DONE */ - if (ub->qlen > 1) - ub->lastnlh->nlmsg_type = NLMSG_DONE; - - NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1; - pr_debug("throwing %d packets to netlink group %u\n", - ub->qlen, nlgroupnum + 1); - netlink_broadcast(ulog->nflognl, ub->skb, 0, nlgroupnum + 1, - GFP_ATOMIC); - - ub->qlen = 0; - ub->skb = NULL; - ub->lastnlh = NULL; -} - - -/* timer function to flush queue in flushtimeout time */ -static void ulog_timer(unsigned long data) -{ - unsigned int groupnum = *((unsigned int *)data); - struct ulog_net *ulog = container_of((void *)data, - struct ulog_net, - nlgroup[groupnum]); - pr_debug("timer function called, calling ulog_send\n"); - - /* lock to protect against somebody modifying our structure - * from ipt_ulog_target at the same time */ - spin_lock_bh(&ulog->lock); - ulog_send(ulog, groupnum); - spin_unlock_bh(&ulog->lock); -} - -static struct sk_buff *ulog_alloc_skb(unsigned int size) -{ - struct sk_buff *skb; - unsigned int n; - - /* alloc skb which should be big enough for a whole - * multipart message. WARNING: has to be <= 131000 - * due to slab allocator restrictions */ - - n = max(size, nlbufsiz); - skb = alloc_skb(n, GFP_ATOMIC | __GFP_NOWARN); - if (!skb) { - if (n > size) { - /* try to allocate only as much as we need for - * current packet */ - - skb = alloc_skb(size, GFP_ATOMIC); - if (!skb) - pr_debug("cannot even allocate %ub\n", size); - } - } - - return skb; -} - -static void ipt_ulog_packet(struct net *net, - unsigned int hooknum, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct ipt_ulog_info *loginfo, - const char *prefix) -{ - ulog_buff_t *ub; - ulog_packet_msg_t *pm; - size_t size, copy_len; - struct nlmsghdr *nlh; - struct timeval tv; - struct ulog_net *ulog = ulog_pernet(net); - - /* ffs == find first bit set, necessary because userspace - * is already shifting groupnumber, but we need unshifted. - * ffs() returns [1..32], we need [0..31] */ - unsigned int groupnum = ffs(loginfo->nl_group) - 1; - - /* calculate the size of the skb needed */ - if (loginfo->copy_range == 0 || loginfo->copy_range > skb->len) - copy_len = skb->len; - else - copy_len = loginfo->copy_range; - - size = nlmsg_total_size(sizeof(*pm) + copy_len); - - ub = &ulog->ulog_buffers[groupnum]; - - spin_lock_bh(&ulog->lock); - - if (!ub->skb) { - if (!(ub->skb = ulog_alloc_skb(size))) - goto alloc_failure; - } else if (ub->qlen >= loginfo->qthreshold || - size > skb_tailroom(ub->skb)) { - /* either the queue len is too high or we don't have - * enough room in nlskb left. send it to userspace. */ - - ulog_send(ulog, groupnum); - - if (!(ub->skb = ulog_alloc_skb(size))) - goto alloc_failure; - } - - pr_debug("qlen %d, qthreshold %Zu\n", ub->qlen, loginfo->qthreshold); - - nlh = nlmsg_put(ub->skb, 0, ub->qlen, ULOG_NL_EVENT, - sizeof(*pm)+copy_len, 0); - if (!nlh) { - pr_debug("error during nlmsg_put\n"); - goto out_unlock; - } - ub->qlen++; - - pm = nlmsg_data(nlh); - memset(pm, 0, sizeof(*pm)); - - /* We might not have a timestamp, get one */ - if (skb->tstamp.tv64 == 0) - __net_timestamp((struct sk_buff *)skb); - - /* copy hook, prefix, timestamp, payload, etc. */ - pm->data_len = copy_len; - tv = ktime_to_timeval(skb->tstamp); - put_unaligned(tv.tv_sec, &pm->timestamp_sec); - put_unaligned(tv.tv_usec, &pm->timestamp_usec); - put_unaligned(skb->mark, &pm->mark); - pm->hook = hooknum; - if (prefix != NULL) { - strncpy(pm->prefix, prefix, sizeof(pm->prefix) - 1); - pm->prefix[sizeof(pm->prefix) - 1] = '\0'; - } - else if (loginfo->prefix[0] != '\0') - strncpy(pm->prefix, loginfo->prefix, sizeof(pm->prefix)); - - if (in && in->hard_header_len > 0 && - skb->mac_header != skb->network_header && - in->hard_header_len <= ULOG_MAC_LEN) { - memcpy(pm->mac, skb_mac_header(skb), in->hard_header_len); - pm->mac_len = in->hard_header_len; - } else - pm->mac_len = 0; - - if (in) - strncpy(pm->indev_name, in->name, sizeof(pm->indev_name)); - - if (out) - strncpy(pm->outdev_name, out->name, sizeof(pm->outdev_name)); - - /* copy_len <= skb->len, so can't fail. */ - if (skb_copy_bits(skb, 0, pm->payload, copy_len) < 0) - BUG(); - - /* check if we are building multi-part messages */ - if (ub->qlen > 1) - ub->lastnlh->nlmsg_flags |= NLM_F_MULTI; - - ub->lastnlh = nlh; - - /* if timer isn't already running, start it */ - if (!timer_pending(&ub->timer)) { - ub->timer.expires = jiffies + flushtimeout * HZ / 100; - add_timer(&ub->timer); - } - - /* if threshold is reached, send message to userspace */ - if (ub->qlen >= loginfo->qthreshold) { - if (loginfo->qthreshold > 1) - nlh->nlmsg_type = NLMSG_DONE; - ulog_send(ulog, groupnum); - } -out_unlock: - spin_unlock_bh(&ulog->lock); - - return; - -alloc_failure: - pr_debug("Error building netlink message\n"); - spin_unlock_bh(&ulog->lock); -} - -static unsigned int -ulog_tg(struct sk_buff *skb, const struct xt_action_param *par) -{ - struct net *net = dev_net(par->in ? par->in : par->out); - - ipt_ulog_packet(net, par->hooknum, skb, par->in, par->out, - par->targinfo, NULL); - return XT_CONTINUE; -} - -static void ipt_logfn(struct net *net, - u_int8_t pf, - unsigned int hooknum, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct nf_loginfo *li, - const char *prefix) -{ - struct ipt_ulog_info loginfo; - - if (!li || li->type != NF_LOG_TYPE_ULOG) { - loginfo.nl_group = ULOG_DEFAULT_NLGROUP; - loginfo.copy_range = 0; - loginfo.qthreshold = ULOG_DEFAULT_QTHRESHOLD; - loginfo.prefix[0] = '\0'; - } else { - loginfo.nl_group = li->u.ulog.group; - loginfo.copy_range = li->u.ulog.copy_len; - loginfo.qthreshold = li->u.ulog.qthreshold; - strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix)); - } - - ipt_ulog_packet(net, hooknum, skb, in, out, &loginfo, prefix); -} - -static int ulog_tg_check(const struct xt_tgchk_param *par) -{ - const struct ipt_ulog_info *loginfo = par->targinfo; - - if (!par->net->xt.ulog_warn_deprecated) { - pr_info("ULOG is deprecated and it will be removed soon, " - "use NFLOG instead\n"); - par->net->xt.ulog_warn_deprecated = true; - } - - if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') { - pr_debug("prefix not null-terminated\n"); - return -EINVAL; - } - if (loginfo->qthreshold > ULOG_MAX_QLEN) { - pr_debug("queue threshold %Zu > MAX_QLEN\n", - loginfo->qthreshold); - return -EINVAL; - } - return 0; -} - -#ifdef CONFIG_COMPAT -struct compat_ipt_ulog_info { - compat_uint_t nl_group; - compat_size_t copy_range; - compat_size_t qthreshold; - char prefix[ULOG_PREFIX_LEN]; -}; - -static void ulog_tg_compat_from_user(void *dst, const void *src) -{ - const struct compat_ipt_ulog_info *cl = src; - struct ipt_ulog_info l = { - .nl_group = cl->nl_group, - .copy_range = cl->copy_range, - .qthreshold = cl->qthreshold, - }; - - memcpy(l.prefix, cl->prefix, sizeof(l.prefix)); - memcpy(dst, &l, sizeof(l)); -} - -static int ulog_tg_compat_to_user(void __user *dst, const void *src) -{ - const struct ipt_ulog_info *l = src; - struct compat_ipt_ulog_info cl = { - .nl_group = l->nl_group, - .copy_range = l->copy_range, - .qthreshold = l->qthreshold, - }; - - memcpy(cl.prefix, l->prefix, sizeof(cl.prefix)); - return copy_to_user(dst, &cl, sizeof(cl)) ? -EFAULT : 0; -} -#endif /* CONFIG_COMPAT */ - -static struct xt_target ulog_tg_reg __read_mostly = { - .name = "ULOG", - .family = NFPROTO_IPV4, - .target = ulog_tg, - .targetsize = sizeof(struct ipt_ulog_info), - .checkentry = ulog_tg_check, -#ifdef CONFIG_COMPAT - .compatsize = sizeof(struct compat_ipt_ulog_info), - .compat_from_user = ulog_tg_compat_from_user, - .compat_to_user = ulog_tg_compat_to_user, -#endif - .me = THIS_MODULE, -}; - -static struct nf_logger ipt_ulog_logger __read_mostly = { - .name = "ipt_ULOG", - .logfn = ipt_logfn, - .me = THIS_MODULE, -}; - -static int __net_init ulog_tg_net_init(struct net *net) -{ - int i; - struct ulog_net *ulog = ulog_pernet(net); - struct netlink_kernel_cfg cfg = { - .groups = ULOG_MAXNLGROUPS, - }; - - spin_lock_init(&ulog->lock); - /* initialize ulog_buffers */ - for (i = 0; i < ULOG_MAXNLGROUPS; i++) { - ulog->nlgroup[i] = i; - setup_timer(&ulog->ulog_buffers[i].timer, ulog_timer, - (unsigned long)&ulog->nlgroup[i]); - } - - ulog->nflognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg); - if (!ulog->nflognl) - return -ENOMEM; - - if (nflog) - nf_log_set(net, NFPROTO_IPV4, &ipt_ulog_logger); - - return 0; -} - -static void __net_exit ulog_tg_net_exit(struct net *net) -{ - ulog_buff_t *ub; - int i; - struct ulog_net *ulog = ulog_pernet(net); - - if (nflog) - nf_log_unset(net, &ipt_ulog_logger); - - netlink_kernel_release(ulog->nflognl); - - /* remove pending timers and free allocated skb's */ - for (i = 0; i < ULOG_MAXNLGROUPS; i++) { - ub = &ulog->ulog_buffers[i]; - pr_debug("timer is deleting\n"); - del_timer(&ub->timer); - - if (ub->skb) { - kfree_skb(ub->skb); - ub->skb = NULL; - } - } -} - -static struct pernet_operations ulog_tg_net_ops = { - .init = ulog_tg_net_init, - .exit = ulog_tg_net_exit, - .id = &ulog_net_id, - .size = sizeof(struct ulog_net), -}; - -static int __init ulog_tg_init(void) -{ - int ret; - pr_debug("init module\n"); - - if (nlbufsiz > 128*1024) { - pr_warn("Netlink buffer has to be <= 128kB\n"); - return -EINVAL; - } - - ret = register_pernet_subsys(&ulog_tg_net_ops); - if (ret) - goto out_pernet; - - ret = xt_register_target(&ulog_tg_reg); - if (ret < 0) - goto out_target; - - if (nflog) - nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger); - - return 0; - -out_target: - unregister_pernet_subsys(&ulog_tg_net_ops); -out_pernet: - return ret; -} - -static void __exit ulog_tg_exit(void) -{ - pr_debug("cleanup_module\n"); - if (nflog) - nf_log_unregister(&ipt_ulog_logger); - xt_unregister_target(&ulog_tg_reg); - unregister_pernet_subsys(&ulog_tg_net_ops); -} - -module_init(ulog_tg_init); -module_exit(ulog_tg_exit); diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index ee2886126e3d..f1787c04a4dd 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -91,17 +91,9 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, if (nf_ct_is_untracked(ct)) return NF_ACCEPT; - nat = nfct_nat(ct); - if (!nat) { - /* NAT module was loaded late. */ - if (nf_ct_is_confirmed(ct)) - return NF_ACCEPT; - nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); - if (nat == NULL) { - pr_debug("failed to add NAT extension\n"); - return NF_ACCEPT; - } - } + nat = nf_ct_nat_ext_add(ct); + if (nat == NULL) + return NF_ACCEPT; switch (ctinfo) { case IP_CT_RELATED: diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 8127dc802865..a054fe083431 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -314,7 +314,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) return -ENOENT; } -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> @@ -358,7 +358,7 @@ static struct nf_sockopt_ops so_getorigdst = { .pf = PF_INET, .get_optmin = SO_ORIGINAL_DST, .get_optmax = SO_ORIGINAL_DST+1, - .get = &getorigdst, + .get = getorigdst, .owner = THIS_MODULE, }; @@ -388,7 +388,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { .invert_tuple = ipv4_invert_tuple, .print_tuple = ipv4_print_tuple, .get_l4proto = ipv4_get_l4proto, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = ipv4_tuple_to_nlattr, .nlattr_tuple_size = ipv4_nlattr_tuple_size, .nlattr_to_tuple = ipv4_nlattr_to_tuple, diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index a338dad41b7d..b91b2641adda 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -226,7 +226,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl, return icmp_error_message(net, tmpl, skb, ctinfo, hooknum); } -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> @@ -408,7 +408,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = .error = icmp_error, .destroy = NULL, .me = NULL, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = icmp_tuple_to_nlattr, .nlattr_tuple_size = icmp_nlattr_tuple_size, .nlattr_to_tuple = icmp_nlattr_to_tuple, diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 12e13bd82b5b..76bd1aef257f 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -17,12 +17,11 @@ #include <linux/netfilter_bridge.h> #include <linux/netfilter_ipv4.h> #include <net/netfilter/ipv4/nf_defrag_ipv4.h> -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <net/netfilter/nf_conntrack.h> #endif #include <net/netfilter/nf_conntrack_zones.h> -/* Returns new sk_buff, or NULL */ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) { int err; @@ -33,8 +32,10 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) err = ip_defrag(skb, user); local_bh_enable(); - if (!err) + if (!err) { ip_send_check(ip_hdr(skb)); + skb->ignore_df = 1; + } return err; } @@ -44,7 +45,7 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, { u16 zone = NF_CT_DEFAULT_ZONE; -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) if (skb->nfct) zone = nf_ct_zone((struct nf_conn *)skb->nfct); #endif @@ -73,8 +74,8 @@ static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops, inet->nodefrag) return NF_ACCEPT; -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) -#if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#if !IS_ENABLED(CONFIG_NF_NAT) /* Previously seen (loopback)? Ignore. Do this before fragment check. */ if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c new file mode 100644 index 000000000000..ccfc78db12ee --- /dev/null +++ b/net/ipv4/netfilter/nf_log_arp.c @@ -0,0 +1,149 @@ +/* + * (C) 2014 by Pablo Neira Ayuso <pablo@netfilter.org> + * + * Based on code from ebt_log from: + * + * Bart De Schuymer <bdschuym@pandora.be> + * Harald Welte <laforge@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/skbuff.h> +#include <linux/if_arp.h> +#include <linux/ip.h> +#include <net/route.h> + +#include <linux/netfilter.h> +#include <linux/netfilter/xt_LOG.h> +#include <net/netfilter/nf_log.h> + +static struct nf_loginfo default_loginfo = { + .type = NF_LOG_TYPE_LOG, + .u = { + .log = { + .level = 5, + .logflags = NF_LOG_MASK, + }, + }, +}; + +struct arppayload { + unsigned char mac_src[ETH_ALEN]; + unsigned char ip_src[4]; + unsigned char mac_dst[ETH_ALEN]; + unsigned char ip_dst[4]; +}; + +static void dump_arp_packet(struct nf_log_buf *m, + const struct nf_loginfo *info, + const struct sk_buff *skb, unsigned int nhoff) +{ + const struct arphdr *ah; + struct arphdr _arph; + const struct arppayload *ap; + struct arppayload _arpp; + + ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); + if (ah == NULL) { + nf_log_buf_add(m, "TRUNCATED"); + return; + } + nf_log_buf_add(m, "ARP HTYPE=%d PTYPE=0x%04x OPCODE=%d", + ntohs(ah->ar_hrd), ntohs(ah->ar_pro), ntohs(ah->ar_op)); + + /* If it's for Ethernet and the lengths are OK, then log the ARP + * payload. + */ + if (ah->ar_hrd != htons(1) || + ah->ar_hln != ETH_ALEN || + ah->ar_pln != sizeof(__be32)) + return; + + ap = skb_header_pointer(skb, sizeof(_arph), sizeof(_arpp), &_arpp); + if (ap == NULL) { + nf_log_buf_add(m, " INCOMPLETE [%Zu bytes]", + skb->len - sizeof(_arph)); + return; + } + nf_log_buf_add(m, " MACSRC=%pM IPSRC=%pI4 MACDST=%pM IPDST=%pI4", + ap->mac_src, ap->ip_src, ap->mac_dst, ap->ip_dst); +} + +void nf_log_arp_packet(struct net *net, u_int8_t pf, + unsigned int hooknum, const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) +{ + struct nf_log_buf *m; + + /* FIXME: Disabled from containers until syslog ns is supported */ + if (!net_eq(net, &init_net)) + return; + + m = nf_log_buf_open(); + + if (!loginfo) + loginfo = &default_loginfo; + + nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo, + prefix); + dump_arp_packet(m, loginfo, skb, 0); + + nf_log_buf_close(m); +} + +static struct nf_logger nf_arp_logger __read_mostly = { + .name = "nf_log_arp", + .type = NF_LOG_TYPE_LOG, + .logfn = nf_log_arp_packet, + .me = THIS_MODULE, +}; + +static int __net_init nf_log_arp_net_init(struct net *net) +{ + nf_log_set(net, NFPROTO_ARP, &nf_arp_logger); + return 0; +} + +static void __net_exit nf_log_arp_net_exit(struct net *net) +{ + nf_log_unset(net, &nf_arp_logger); +} + +static struct pernet_operations nf_log_arp_net_ops = { + .init = nf_log_arp_net_init, + .exit = nf_log_arp_net_exit, +}; + +static int __init nf_log_arp_init(void) +{ + int ret; + + ret = register_pernet_subsys(&nf_log_arp_net_ops); + if (ret < 0) + return ret; + + nf_log_register(NFPROTO_ARP, &nf_arp_logger); + return 0; +} + +static void __exit nf_log_arp_exit(void) +{ + unregister_pernet_subsys(&nf_log_arp_net_ops); + nf_log_unregister(&nf_arp_logger); +} + +module_init(nf_log_arp_init); +module_exit(nf_log_arp_exit); + +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_DESCRIPTION("Netfilter ARP packet logging"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NF_LOGGER(3, 0); diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c new file mode 100644 index 000000000000..078bdca1b607 --- /dev/null +++ b/net/ipv4/netfilter/nf_log_ipv4.c @@ -0,0 +1,385 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/skbuff.h> +#include <linux/if_arp.h> +#include <linux/ip.h> +#include <net/ipv6.h> +#include <net/icmp.h> +#include <net/udp.h> +#include <net/tcp.h> +#include <net/route.h> + +#include <linux/netfilter.h> +#include <linux/netfilter/xt_LOG.h> +#include <net/netfilter/nf_log.h> + +static struct nf_loginfo default_loginfo = { + .type = NF_LOG_TYPE_LOG, + .u = { + .log = { + .level = 5, + .logflags = NF_LOG_MASK, + }, + }, +}; + +/* One level of recursion won't kill us */ +static void dump_ipv4_packet(struct nf_log_buf *m, + const struct nf_loginfo *info, + const struct sk_buff *skb, unsigned int iphoff) +{ + struct iphdr _iph; + const struct iphdr *ih; + unsigned int logflags; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + else + logflags = NF_LOG_MASK; + + ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph); + if (ih == NULL) { + nf_log_buf_add(m, "TRUNCATED"); + return; + } + + /* Important fields: + * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */ + /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */ + nf_log_buf_add(m, "SRC=%pI4 DST=%pI4 ", &ih->saddr, &ih->daddr); + + /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */ + nf_log_buf_add(m, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ", + ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK, + ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id)); + + /* Max length: 6 "CE DF MF " */ + if (ntohs(ih->frag_off) & IP_CE) + nf_log_buf_add(m, "CE "); + if (ntohs(ih->frag_off) & IP_DF) + nf_log_buf_add(m, "DF "); + if (ntohs(ih->frag_off) & IP_MF) + nf_log_buf_add(m, "MF "); + + /* Max length: 11 "FRAG:65535 " */ + if (ntohs(ih->frag_off) & IP_OFFSET) + nf_log_buf_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET); + + if ((logflags & XT_LOG_IPOPT) && + ih->ihl * 4 > sizeof(struct iphdr)) { + const unsigned char *op; + unsigned char _opt[4 * 15 - sizeof(struct iphdr)]; + unsigned int i, optsize; + + optsize = ih->ihl * 4 - sizeof(struct iphdr); + op = skb_header_pointer(skb, iphoff+sizeof(_iph), + optsize, _opt); + if (op == NULL) { + nf_log_buf_add(m, "TRUNCATED"); + return; + } + + /* Max length: 127 "OPT (" 15*4*2chars ") " */ + nf_log_buf_add(m, "OPT ("); + for (i = 0; i < optsize; i++) + nf_log_buf_add(m, "%02X", op[i]); + nf_log_buf_add(m, ") "); + } + + switch (ih->protocol) { + case IPPROTO_TCP: + if (nf_log_dump_tcp_header(m, skb, ih->protocol, + ntohs(ih->frag_off) & IP_OFFSET, + iphoff+ih->ihl*4, logflags)) + return; + break; + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + if (nf_log_dump_udp_header(m, skb, ih->protocol, + ntohs(ih->frag_off) & IP_OFFSET, + iphoff+ih->ihl*4)) + return; + break; + case IPPROTO_ICMP: { + struct icmphdr _icmph; + const struct icmphdr *ich; + static const size_t required_len[NR_ICMP_TYPES+1] + = { [ICMP_ECHOREPLY] = 4, + [ICMP_DEST_UNREACH] + = 8 + sizeof(struct iphdr), + [ICMP_SOURCE_QUENCH] + = 8 + sizeof(struct iphdr), + [ICMP_REDIRECT] + = 8 + sizeof(struct iphdr), + [ICMP_ECHO] = 4, + [ICMP_TIME_EXCEEDED] + = 8 + sizeof(struct iphdr), + [ICMP_PARAMETERPROB] + = 8 + sizeof(struct iphdr), + [ICMP_TIMESTAMP] = 20, + [ICMP_TIMESTAMPREPLY] = 20, + [ICMP_ADDRESS] = 12, + [ICMP_ADDRESSREPLY] = 12 }; + + /* Max length: 11 "PROTO=ICMP " */ + nf_log_buf_add(m, "PROTO=ICMP "); + + if (ntohs(ih->frag_off) & IP_OFFSET) + break; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + ich = skb_header_pointer(skb, iphoff + ih->ihl * 4, + sizeof(_icmph), &_icmph); + if (ich == NULL) { + nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", + skb->len - iphoff - ih->ihl*4); + break; + } + + /* Max length: 18 "TYPE=255 CODE=255 " */ + nf_log_buf_add(m, "TYPE=%u CODE=%u ", ich->type, ich->code); + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + if (ich->type <= NR_ICMP_TYPES && + required_len[ich->type] && + skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) { + nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", + skb->len - iphoff - ih->ihl*4); + break; + } + + switch (ich->type) { + case ICMP_ECHOREPLY: + case ICMP_ECHO: + /* Max length: 19 "ID=65535 SEQ=65535 " */ + nf_log_buf_add(m, "ID=%u SEQ=%u ", + ntohs(ich->un.echo.id), + ntohs(ich->un.echo.sequence)); + break; + + case ICMP_PARAMETERPROB: + /* Max length: 14 "PARAMETER=255 " */ + nf_log_buf_add(m, "PARAMETER=%u ", + ntohl(ich->un.gateway) >> 24); + break; + case ICMP_REDIRECT: + /* Max length: 24 "GATEWAY=255.255.255.255 " */ + nf_log_buf_add(m, "GATEWAY=%pI4 ", &ich->un.gateway); + /* Fall through */ + case ICMP_DEST_UNREACH: + case ICMP_SOURCE_QUENCH: + case ICMP_TIME_EXCEEDED: + /* Max length: 3+maxlen */ + if (!iphoff) { /* Only recurse once. */ + nf_log_buf_add(m, "["); + dump_ipv4_packet(m, info, skb, + iphoff + ih->ihl*4+sizeof(_icmph)); + nf_log_buf_add(m, "] "); + } + + /* Max length: 10 "MTU=65535 " */ + if (ich->type == ICMP_DEST_UNREACH && + ich->code == ICMP_FRAG_NEEDED) { + nf_log_buf_add(m, "MTU=%u ", + ntohs(ich->un.frag.mtu)); + } + } + break; + } + /* Max Length */ + case IPPROTO_AH: { + struct ip_auth_hdr _ahdr; + const struct ip_auth_hdr *ah; + + if (ntohs(ih->frag_off) & IP_OFFSET) + break; + + /* Max length: 9 "PROTO=AH " */ + nf_log_buf_add(m, "PROTO=AH "); + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + ah = skb_header_pointer(skb, iphoff+ih->ihl*4, + sizeof(_ahdr), &_ahdr); + if (ah == NULL) { + nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", + skb->len - iphoff - ih->ihl*4); + break; + } + + /* Length: 15 "SPI=0xF1234567 " */ + nf_log_buf_add(m, "SPI=0x%x ", ntohl(ah->spi)); + break; + } + case IPPROTO_ESP: { + struct ip_esp_hdr _esph; + const struct ip_esp_hdr *eh; + + /* Max length: 10 "PROTO=ESP " */ + nf_log_buf_add(m, "PROTO=ESP "); + + if (ntohs(ih->frag_off) & IP_OFFSET) + break; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + eh = skb_header_pointer(skb, iphoff+ih->ihl*4, + sizeof(_esph), &_esph); + if (eh == NULL) { + nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", + skb->len - iphoff - ih->ihl*4); + break; + } + + /* Length: 15 "SPI=0xF1234567 " */ + nf_log_buf_add(m, "SPI=0x%x ", ntohl(eh->spi)); + break; + } + /* Max length: 10 "PROTO 255 " */ + default: + nf_log_buf_add(m, "PROTO=%u ", ih->protocol); + } + + /* Max length: 15 "UID=4294967295 " */ + if ((logflags & XT_LOG_UID) && !iphoff) + nf_log_dump_sk_uid_gid(m, skb->sk); + + /* Max length: 16 "MARK=0xFFFFFFFF " */ + if (!iphoff && skb->mark) + nf_log_buf_add(m, "MARK=0x%x ", skb->mark); + + /* Proto Max log string length */ + /* IP: 40+46+6+11+127 = 230 */ + /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */ + /* UDP: 10+max(25,20) = 35 */ + /* UDPLITE: 14+max(25,20) = 39 */ + /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */ + /* ESP: 10+max(25)+15 = 50 */ + /* AH: 9+max(25)+15 = 49 */ + /* unknown: 10 */ + + /* (ICMP allows recursion one level deep) */ + /* maxlen = IP + ICMP + IP + max(TCP,UDP,ICMP,unknown) */ + /* maxlen = 230+ 91 + 230 + 252 = 803 */ +} + +static void dump_ipv4_mac_header(struct nf_log_buf *m, + const struct nf_loginfo *info, + const struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + unsigned int logflags = 0; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + + if (!(logflags & XT_LOG_MACDECODE)) + goto fallback; + + switch (dev->type) { + case ARPHRD_ETHER: + nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ", + eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, + ntohs(eth_hdr(skb)->h_proto)); + return; + default: + break; + } + +fallback: + nf_log_buf_add(m, "MAC="); + if (dev->hard_header_len && + skb->mac_header != skb->network_header) { + const unsigned char *p = skb_mac_header(skb); + unsigned int i; + + nf_log_buf_add(m, "%02x", *p++); + for (i = 1; i < dev->hard_header_len; i++, p++) + nf_log_buf_add(m, ":%02x", *p); + } + nf_log_buf_add(m, " "); +} + +static void nf_log_ip_packet(struct net *net, u_int8_t pf, + unsigned int hooknum, const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) +{ + struct nf_log_buf *m; + + /* FIXME: Disabled from containers until syslog ns is supported */ + if (!net_eq(net, &init_net)) + return; + + m = nf_log_buf_open(); + + if (!loginfo) + loginfo = &default_loginfo; + + nf_log_dump_packet_common(m, pf, hooknum, skb, in, + out, loginfo, prefix); + + if (in != NULL) + dump_ipv4_mac_header(m, loginfo, skb); + + dump_ipv4_packet(m, loginfo, skb, 0); + + nf_log_buf_close(m); +} + +static struct nf_logger nf_ip_logger __read_mostly = { + .name = "nf_log_ipv4", + .type = NF_LOG_TYPE_LOG, + .logfn = nf_log_ip_packet, + .me = THIS_MODULE, +}; + +static int __net_init nf_log_ipv4_net_init(struct net *net) +{ + nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger); + return 0; +} + +static void __net_exit nf_log_ipv4_net_exit(struct net *net) +{ + nf_log_unset(net, &nf_ip_logger); +} + +static struct pernet_operations nf_log_ipv4_net_ops = { + .init = nf_log_ipv4_net_init, + .exit = nf_log_ipv4_net_exit, +}; + +static int __init nf_log_ipv4_init(void) +{ + int ret; + + ret = register_pernet_subsys(&nf_log_ipv4_net_ops); + if (ret < 0) + return ret; + + nf_log_register(NFPROTO_IPV4, &nf_ip_logger); + return 0; +} + +static void __exit nf_log_ipv4_exit(void) +{ + unregister_pernet_subsys(&nf_log_ipv4_net_ops); + nf_log_unregister(&nf_ip_logger); +} + +module_init(nf_log_ipv4_init); +module_exit(nf_log_ipv4_exit); + +MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); +MODULE_DESCRIPTION("Netfilter IPv4 packet logging"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NF_LOGGER(AF_INET, 0); diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index d8b2e14efddc..14f5ccd06337 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -154,6 +154,7 @@ static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb, htons(oldlen), htons(datalen), 1); } +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[], struct nf_nat_range *range) { @@ -169,6 +170,7 @@ static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[], return 0; } +#endif static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = { .l3proto = NFPROTO_IPV4, @@ -177,7 +179,9 @@ static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = { .manip_pkt = nf_nat_ipv4_manip_pkt, .csum_update = nf_nat_ipv4_csum_update, .csum_recalc = nf_nat_ipv4_csum_recalc, +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .nlattr_to_range = nf_nat_ipv4_nlattr_to_range, +#endif #ifdef CONFIG_XFRM .decode_session = nf_nat_ipv4_decode_session, #endif diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index 690d890111bb..9414923f1e15 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c @@ -124,7 +124,7 @@ static const struct nf_nat_l4proto gre = { .manip_pkt = gre_manip_pkt, .in_range = nf_nat_l4proto_in_range, .unique_tuple = gre_unique_tuple, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c index eb303471bcf6..4557b4ab8342 100644 --- a/net/ipv4/netfilter/nf_nat_proto_icmp.c +++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c @@ -77,7 +77,7 @@ const struct nf_nat_l4proto nf_nat_l4proto_icmp = { .manip_pkt = icmp_manip_pkt, .in_range = icmp_in_range, .unique_tuple = icmp_unique_tuple, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c index b5b256d45e67..3964157d826c 100644 --- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c @@ -48,15 +48,9 @@ static unsigned int nf_nat_fn(const struct nf_hook_ops *ops, NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))); - nat = nfct_nat(ct); - if (nat == NULL) { - /* Conntrack module was loaded late, can't add extension. */ - if (nf_ct_is_confirmed(ct)) - return NF_ACCEPT; - nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); - if (nat == NULL) - return NF_ACCEPT; - } + nat = nf_ct_nat_ext_add(ct); + if (nat == NULL) + return NF_ACCEPT; switch (ctinfo) { case IP_CT_RELATED: diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 8210964a9f19..a3c59a077a5f 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -236,15 +236,15 @@ exit: static void inet_get_ping_group_range_net(struct net *net, kgid_t *low, kgid_t *high) { - kgid_t *data = net->ipv4.sysctl_ping_group_range; + kgid_t *data = net->ipv4.ping_group_range.range; unsigned int seq; do { - seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock); + seq = read_seqbegin(&net->ipv4.ping_group_range.lock); *low = data[0]; *high = data[1]; - } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq)); + } while (read_seqretry(&net->ipv4.ping_group_range.lock, seq)); } @@ -911,7 +911,7 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sin6->sin6_flowinfo = ip6_flowinfo(ip6); sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, - IP6CB(skb)->iif); + inet6_iif(skb)); *addr_len = sizeof(*sin6); } diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index ad737fad6d8b..8e3eb39f84e7 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -52,6 +52,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; + unsigned int frag_mem; int orphans, sockets; local_bh_disable(); @@ -71,8 +72,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) sock_prot_inuse_get(net, &udplite_prot)); seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(net, &raw_prot)); - seq_printf(seq, "FRAG: inuse %d memory %d\n", - ip_frag_nqueues(net), ip_frag_mem(net)); + frag_mem = ip_frag_mem(net); + seq_printf(seq, "FRAG: inuse %u memory %u\n", !!frag_mem, frag_mem); return 0; } @@ -345,15 +346,15 @@ static void icmp_put(struct seq_file *seq) for (i = 0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " Out%s", icmpmibmap[i].name); seq_printf(seq, "\nIcmp: %lu %lu %lu", - snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INMSGS), - snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS), - snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS)); + snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INMSGS), + snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INERRORS), + snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS)); for (i = 0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + icmpmibmap[i].index)); seq_printf(seq, " %lu %lu", - snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), - snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); + snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), + snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); for (i = 0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + (icmpmibmap[i].index | 0x100))); @@ -379,7 +380,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) seq_printf(seq, " %llu", - snmp_fold_field64((void __percpu **)net->mib.ip_statistics, + snmp_fold_field64(net->mib.ip_statistics, snmp4_ipstats_list[i].entry, offsetof(struct ipstats_mib, syncp))); @@ -395,11 +396,11 @@ static int snmp_seq_show(struct seq_file *seq, void *v) /* MaxConn field is signed, RFC 2012 */ if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) seq_printf(seq, " %ld", - snmp_fold_field((void __percpu **)net->mib.tcp_statistics, + snmp_fold_field(net->mib.tcp_statistics, snmp4_tcp_list[i].entry)); else seq_printf(seq, " %lu", - snmp_fold_field((void __percpu **)net->mib.tcp_statistics, + snmp_fold_field(net->mib.tcp_statistics, snmp4_tcp_list[i].entry)); } @@ -410,7 +411,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "\nUdp:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void __percpu **)net->mib.udp_statistics, + snmp_fold_field(net->mib.udp_statistics, snmp4_udp_list[i].entry)); /* the UDP and UDP-Lite MIBs are the same */ @@ -421,7 +422,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "\nUdpLite:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void __percpu **)net->mib.udplite_statistics, + snmp_fold_field(net->mib.udplite_statistics, snmp4_udp_list[i].entry)); seq_putc(seq, '\n'); @@ -458,7 +459,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "\nTcpExt:"); for (i = 0; snmp4_net_list[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void __percpu **)net->mib.net_statistics, + snmp_fold_field(net->mib.net_statistics, snmp4_net_list[i].entry)); seq_puts(seq, "\nIpExt:"); @@ -468,7 +469,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "\nIpExt:"); for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) seq_printf(seq, " %llu", - snmp_fold_field64((void __percpu **)net->mib.ip_statistics, + snmp_fold_field64(net->mib.ip_statistics, snmp4_ipextstats_list[i].entry, offsetof(struct ipstats_mib, syncp))); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index a9dbe58bdfe7..739db3100c23 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -58,6 +58,7 @@ #include <linux/in_route.h> #include <linux/route.h> #include <linux/skbuff.h> +#include <linux/igmp.h> #include <net/net_namespace.h> #include <net/dst.h> #include <net/sock.h> @@ -174,7 +175,9 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash) while (sk) { delivered = 1; - if (iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) { + if ((iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) && + ip_mc_sf_allow(sk, iph->daddr, iph->saddr, + skb->dev->ifindex)) { struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); /* Not releasing hash table! */ @@ -365,6 +368,8 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, skb->ip_summed = CHECKSUM_NONE; + sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); + skb->transport_header = skb->network_header; err = -EFAULT; if (memcpy_fromiovecend((void *)iph, from, 0, length)) @@ -389,7 +394,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, iph->check = 0; iph->tot_len = htons(length); if (!iph->id) - ip_select_ident(skb, &rt->dst, NULL); + ip_select_ident(skb, NULL); iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } @@ -606,6 +611,8 @@ back_from_confirm: &rt, msg->msg_flags); else { + sock_tx_timestamp(sk, &ipc.tx_flags); + if (!ipc.addr) ipc.addr = fl4.daddr; lock_sock(sk); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index db1e0da871f4..190199851c9a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -89,6 +89,7 @@ #include <linux/rcupdate.h> #include <linux/times.h> #include <linux/slab.h> +#include <linux/jhash.h> #include <net/dst.h> #include <net/net_namespace.h> #include <net/protocol.h> @@ -456,39 +457,45 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, return neigh_create(&arp_tbl, pkey, dev); } -/* - * Peer allocation may fail only in serious out-of-memory conditions. However - * we still can generate some output. - * Random ID selection looks a bit dangerous because we have no chances to - * select ID being unique in a reasonable period of time. - * But broken packet identifier may be better than no packet at all. +#define IP_IDENTS_SZ 2048u +struct ip_ident_bucket { + atomic_t id; + u32 stamp32; +}; + +static struct ip_ident_bucket *ip_idents __read_mostly; + +/* In order to protect privacy, we add a perturbation to identifiers + * if one generator is seldom used. This makes hard for an attacker + * to infer how many packets were sent between two points in time. */ -static void ip_select_fb_ident(struct iphdr *iph) +u32 ip_idents_reserve(u32 hash, int segs) { - static DEFINE_SPINLOCK(ip_fb_id_lock); - static u32 ip_fallback_id; - u32 salt; + struct ip_ident_bucket *bucket = ip_idents + hash % IP_IDENTS_SZ; + u32 old = ACCESS_ONCE(bucket->stamp32); + u32 now = (u32)jiffies; + u32 delta = 0; - spin_lock_bh(&ip_fb_id_lock); - salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr); - iph->id = htons(salt & 0xFFFF); - ip_fallback_id = salt; - spin_unlock_bh(&ip_fb_id_lock); + if (old != now && cmpxchg(&bucket->stamp32, old, now) == old) + delta = prandom_u32_max(now - old); + + return atomic_add_return(segs + delta, &bucket->id) - segs; } +EXPORT_SYMBOL(ip_idents_reserve); -void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) +void __ip_select_ident(struct iphdr *iph, int segs) { - struct net *net = dev_net(dst->dev); - struct inet_peer *peer; + static u32 ip_idents_hashrnd __read_mostly; + u32 hash, id; - peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1); - if (peer) { - iph->id = htons(inet_getid(peer, more)); - inet_putpeer(peer); - return; - } + net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd)); - ip_select_fb_ident(iph); + hash = jhash_3words((__force u32)iph->daddr, + (__force u32)iph->saddr, + iph->protocol, + ip_idents_hashrnd); + id = ip_idents_reserve(hash, segs); + iph->id = htons(id); } EXPORT_SYMBOL(__ip_select_ident); @@ -993,6 +1000,9 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, struct flowi4 fl4; struct rtable *rt; + if (!mark) + mark = IP4_REPLY_MARK(net, skb->mark); + __build_flow_key(&fl4, NULL, iph, oif, RT_TOS(iph->tos), protocol, mark, flow_flags); rt = __ip_route_output_key(net, &fl4); @@ -1010,6 +1020,10 @@ static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) struct rtable *rt; __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); + + if (!fl4.flowi4_mark) + fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark); + rt = __ip_route_output_key(sock_net(sk), &fl4); if (!IS_ERR(rt)) { __ip_rt_update_pmtu(rt, &fl4, mtu); @@ -1022,7 +1036,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) const struct iphdr *iph = (const struct iphdr *) skb->data; struct flowi4 fl4; struct rtable *rt; - struct dst_entry *dst; + struct dst_entry *odst = NULL; bool new = false; bh_lock_sock(sk); @@ -1030,16 +1044,17 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) if (!ip_sk_accept_pmtu(sk)) goto out; - rt = (struct rtable *) __sk_dst_get(sk); + odst = sk_dst_get(sk); - if (sock_owned_by_user(sk) || !rt) { + if (sock_owned_by_user(sk) || !odst) { __ipv4_sk_update_pmtu(skb, sk, mtu); goto out; } __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); - if (!__sk_dst_check(sk, 0)) { + rt = (struct rtable *)odst; + if (odst->obsolete && odst->ops->check(odst, 0) == NULL) { rt = ip_route_output_flow(sock_net(sk), &fl4, sk); if (IS_ERR(rt)) goto out; @@ -1049,8 +1064,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu); - dst = dst_check(&rt->dst, 0); - if (!dst) { + if (!dst_check(&rt->dst, 0)) { if (new) dst_release(&rt->dst); @@ -1062,10 +1076,11 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) } if (new) - __sk_dst_set(sk, &rt->dst); + sk_dst_set(sk, &rt->dst); out: bh_unlock_sock(sk); + dst_release(odst); } EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); @@ -1519,7 +1534,7 @@ static int __mkroute_input(struct sk_buff *skb, struct in_device *out_dev; unsigned int flags = 0; bool do_cache; - u32 itag; + u32 itag = 0; /* get a working reference to the output device */ out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res)); @@ -2704,6 +2719,12 @@ int __init ip_rt_init(void) { int rc = 0; + ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL); + if (!ip_idents) + panic("IP: failed to allocate ip_idents\n"); + + prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents)); + #ifdef CONFIG_IP_ROUTE_CLASSID ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); if (!ip_rt_acct) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index f2ed13c2125f..c0c75688896e 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -170,7 +170,8 @@ u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th, } EXPORT_SYMBOL_GPL(__cookie_v4_init_sequence); -__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) +__u32 cookie_v4_init_sequence(struct sock *sk, const struct sk_buff *skb, + __u16 *mssp) { const struct iphdr *iph = ip_hdr(skb); const struct tcphdr *th = tcp_hdr(skb); @@ -303,6 +304,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, ireq->ir_rmt_port = th->source; ireq->ir_loc_addr = ip_hdr(skb)->daddr; ireq->ir_rmt_addr = ip_hdr(skb)->saddr; + ireq->ir_mark = inet_request_mark(sk, skb); ireq->ecn_ok = ecn_ok; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->sack_ok = tcp_opt.sack_ok; @@ -339,7 +341,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, * hasn't changed since we received the original syn, but I see * no easy way to do this. */ - flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark, + flowi4_init_output(&fl4, sk->sk_bound_dev_if, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), (opt && opt->srr) ? opt->faddr : ireq->ir_rmt_addr, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 44eba052b43d..79a007c52558 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -45,10 +45,10 @@ static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; /* Update system visible IP port range */ static void set_local_port_range(struct net *net, int range[2]) { - write_seqlock(&net->ipv4.sysctl_local_ports.lock); - net->ipv4.sysctl_local_ports.range[0] = range[0]; - net->ipv4.sysctl_local_ports.range[1] = range[1]; - write_sequnlock(&net->ipv4.sysctl_local_ports.lock); + write_seqlock(&net->ipv4.ip_local_ports.lock); + net->ipv4.ip_local_ports.range[0] = range[0]; + net->ipv4.ip_local_ports.range[1] = range[1]; + write_sequnlock(&net->ipv4.ip_local_ports.lock); } /* Validate changes from /proc interface. */ @@ -57,7 +57,7 @@ static int ipv4_local_port_range(struct ctl_table *table, int write, size_t *lenp, loff_t *ppos) { struct net *net = - container_of(table->data, struct net, ipv4.sysctl_local_ports.range); + container_of(table->data, struct net, ipv4.ip_local_ports.range); int ret; int range[2]; struct ctl_table tmp = { @@ -87,14 +87,14 @@ static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low { kgid_t *data = table->data; struct net *net = - container_of(table->data, struct net, ipv4.sysctl_ping_group_range); + container_of(table->data, struct net, ipv4.ping_group_range.range); unsigned int seq; do { - seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock); + seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); *low = data[0]; *high = data[1]; - } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq)); + } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq)); } /* Update system visible IP port range */ @@ -102,11 +102,11 @@ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t hig { kgid_t *data = table->data; struct net *net = - container_of(table->data, struct net, ipv4.sysctl_ping_group_range); - write_seqlock(&net->ipv4.sysctl_local_ports.lock); + container_of(table->data, struct net, ipv4.ping_group_range.range); + write_seqlock(&net->ipv4.ip_local_ports.lock); data[0] = low; data[1] = high; - write_sequnlock(&net->ipv4.sysctl_local_ports.lock); + write_sequnlock(&net->ipv4.ip_local_ports.lock); } /* Validate changes from /proc interface. */ @@ -437,13 +437,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .procname = "ip_local_reserved_ports", - .data = NULL, /* initialized in sysctl_ipv4_init */ - .maxlen = 65536, - .mode = 0644, - .proc_handler = proc_do_large_bitmap, - }, - { .procname = "igmp_max_memberships", .data = &sysctl_igmp_max_memberships, .maxlen = sizeof(int), @@ -805,7 +798,7 @@ static struct ctl_table ipv4_net_table[] = { }, { .procname = "ping_group_range", - .data = &init_net.ipv4.sysctl_ping_group_range, + .data = &init_net.ipv4.ping_group_range.range, .maxlen = sizeof(gid_t)*2, .mode = 0644, .proc_handler = ipv4_ping_group_range, @@ -819,12 +812,19 @@ static struct ctl_table ipv4_net_table[] = { }, { .procname = "ip_local_port_range", - .maxlen = sizeof(init_net.ipv4.sysctl_local_ports.range), - .data = &init_net.ipv4.sysctl_local_ports.range, + .maxlen = sizeof(init_net.ipv4.ip_local_ports.range), + .data = &init_net.ipv4.ip_local_ports.range, .mode = 0644, .proc_handler = ipv4_local_port_range, }, { + .procname = "ip_local_reserved_ports", + .data = &init_net.ipv4.sysctl_local_reserved_ports, + .maxlen = 65536, + .mode = 0644, + .proc_handler = proc_do_large_bitmap, + }, + { .procname = "ip_no_pmtu_disc", .data = &init_net.ipv4.sysctl_ip_no_pmtu_disc, .maxlen = sizeof(int), @@ -838,6 +838,20 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "fwmark_reflect", + .data = &init_net.ipv4.sysctl_fwmark_reflect, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "tcp_fwmark_accept", + .data = &init_net.ipv4.sysctl_tcp_fwmark_accept, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { } }; @@ -858,26 +872,18 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) table[i].data += (void *)net - (void *)&init_net; } - /* - * Sane defaults - nobody may create ping sockets. - * Boot scripts should set this to distro-specific group. - */ - net->ipv4.sysctl_ping_group_range[0] = make_kgid(&init_user_ns, 1); - net->ipv4.sysctl_ping_group_range[1] = make_kgid(&init_user_ns, 0); - - /* - * Set defaults for local port range - */ - seqlock_init(&net->ipv4.sysctl_local_ports.lock); - net->ipv4.sysctl_local_ports.range[0] = 32768; - net->ipv4.sysctl_local_ports.range[1] = 61000; - net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); if (net->ipv4.ipv4_hdr == NULL) goto err_reg; + net->ipv4.sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL); + if (!net->ipv4.sysctl_local_reserved_ports) + goto err_ports; + return 0; +err_ports: + unregister_net_sysctl_table(net->ipv4.ipv4_hdr); err_reg: if (!net_eq(net, &init_net)) kfree(table); @@ -889,6 +895,7 @@ static __net_exit void ipv4_sysctl_exit_net(struct net *net) { struct ctl_table *table; + kfree(net->ipv4.sysctl_local_reserved_ports); table = net->ipv4.ipv4_hdr->ctl_table_arg; unregister_net_sysctl_table(net->ipv4.ipv4_hdr); kfree(table); @@ -902,16 +909,6 @@ static __net_initdata struct pernet_operations ipv4_sysctl_ops = { static __init int sysctl_ipv4_init(void) { struct ctl_table_header *hdr; - struct ctl_table *i; - - for (i = ipv4_table; i->procname; i++) { - if (strcmp(i->procname, "ip_local_reserved_ports") == 0) { - i->data = sysctl_local_reserved_ports; - break; - } - } - if (!i->procname) - return -EINVAL; hdr = register_net_sysctl(&init_net, "net/ipv4", ipv4_table); if (hdr == NULL) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4bd6d52eeffb..744af67a5989 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -426,6 +426,15 @@ void tcp_init_sock(struct sock *sk) } EXPORT_SYMBOL(tcp_init_sock); +void tcp_tx_timestamp(struct sock *sk, struct sk_buff *skb) +{ + struct skb_shared_info *shinfo = skb_shinfo(skb); + + sock_tx_timestamp(sk, &shinfo->tx_flags); + if (shinfo->tx_flags & SKBTX_ANY_SW_TSTAMP) + shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1; +} + /* * Wait for a TCP event. * @@ -523,7 +532,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) } /* This barrier is coupled with smp_wmb() in tcp_reset() */ smp_rmb(); - if (sk->sk_err) + if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) mask |= POLLERR; return mask; @@ -959,8 +968,10 @@ new_segment: copied += copy; offset += copy; - if (!(size -= copy)) + if (!(size -= copy)) { + tcp_tx_timestamp(sk, skb); goto out; + } if (skb->len < size_goal || (flags & MSG_OOB)) continue; @@ -1108,7 +1119,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (unlikely(tp->repair)) { if (tp->repair_queue == TCP_RECV_QUEUE) { copied = tcp_send_rcvq(sk, msg, size); - goto out; + goto out_nopush; } err = -EINVAL; @@ -1252,8 +1263,10 @@ new_segment: from += copy; copied += copy; - if ((seglen -= copy) == 0 && iovlen == 0) + if ((seglen -= copy) == 0 && iovlen == 0) { + tcp_tx_timestamp(sk, skb); goto out; + } if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair)) continue; @@ -1282,6 +1295,7 @@ wait_for_memory: out: if (copied) tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); +out_nopush: release_sock(sk); return copied + copied_syn; @@ -1616,6 +1630,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct sk_buff *skb; u32 urg_hole = 0; + if (unlikely(flags & MSG_ERRQUEUE)) + return ip_recv_error(sk, msg, len, addr_len); + if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) && (sk->sk_state == TCP_ESTABLISHED)) sk_busy_loop(sk, nonblock); @@ -2916,6 +2933,14 @@ static int do_tcp_getsockopt(struct sock *sk, int level, case TCP_USER_TIMEOUT: val = jiffies_to_msecs(icsk->icsk_user_timeout); break; + + case TCP_FASTOPEN: + if (icsk->icsk_accept_queue.fastopenq != NULL) + val = icsk->icsk_accept_queue.fastopenq->max_qlen; + else + val = 0; + break; + case TCP_TIMESTAMP: val = tcp_time_stamp + tp->tsoffset; break; diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 821846fb0a7e..d5de69bc04f5 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -140,13 +140,12 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) ca->cnt = 1; } -static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, - u32 in_flight) +static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); - if (!tcp_is_cwnd_limited(sk, in_flight)) + if (!tcp_is_cwnd_limited(sk)) return; if (tp->snd_cwnd <= tp->snd_ssthresh) diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 2b9464c93b88..7b09d8b49fa5 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -276,26 +276,6 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) return err; } -/* RFC2861 Check whether we are limited by application or congestion window - * This is the inverse of cwnd check in tcp_tso_should_defer - */ -bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) -{ - const struct tcp_sock *tp = tcp_sk(sk); - u32 left; - - if (in_flight >= tp->snd_cwnd) - return true; - - left = tp->snd_cwnd - in_flight; - if (sk_can_gso(sk) && - left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd && - left < tp->xmit_size_goal_segs) - return true; - return left <= tcp_max_tso_deferred_mss(tp); -} -EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited); - /* Slow start is used when congestion window is no greater than the slow start * threshold. We base on RFC2581 and also handle stretch ACKs properly. * We do not implement RFC3465 Appropriate Byte Counting (ABC) per se but @@ -337,11 +317,11 @@ EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai); /* This is Jacobson's slow start and congestion avoidance. * SIGCOMM '88, p. 328. */ -void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight) +void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); - if (!tcp_is_cwnd_limited(sk, in_flight)) + if (!tcp_is_cwnd_limited(sk)) return; /* In "safe" area, increase. */ diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 8bf224516ba2..a9bd8a4828a9 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -304,13 +304,12 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) ca->cnt = 1; } -static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, - u32 in_flight) +static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); - if (!tcp_is_cwnd_limited(sk, in_flight)) + if (!tcp_is_cwnd_limited(sk)) return; if (tp->snd_cwnd <= tp->snd_ssthresh) { @@ -409,7 +408,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) ratio -= ca->delayed_ack >> ACK_RATIO_SHIFT; ratio += cnt; - ca->delayed_ack = min(ratio, ACK_RATIO_LIMIT); + ca->delayed_ack = clamp(ratio, 1U, ACK_RATIO_LIMIT); } /* Some calls are for duplicates without timetamps */ diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index f195d9316e55..9771563ab564 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -72,25 +72,224 @@ error: kfree(ctx); return err; } -/* Computes the fastopen cookie for the IP path. - * The path is a 128 bits long (pad with zeros for IPv4). - * - * The caller must check foc->len to determine if a valid cookie - * has been generated successfully. -*/ -void tcp_fastopen_cookie_gen(__be32 src, __be32 dst, - struct tcp_fastopen_cookie *foc) +static bool __tcp_fastopen_cookie_gen(const void *path, + struct tcp_fastopen_cookie *foc) { - __be32 path[4] = { src, dst, 0, 0 }; struct tcp_fastopen_context *ctx; + bool ok = false; tcp_fastopen_init_key_once(true); rcu_read_lock(); ctx = rcu_dereference(tcp_fastopen_ctx); if (ctx) { - crypto_cipher_encrypt_one(ctx->tfm, foc->val, (__u8 *)path); + crypto_cipher_encrypt_one(ctx->tfm, foc->val, path); foc->len = TCP_FASTOPEN_COOKIE_SIZE; + ok = true; } rcu_read_unlock(); + return ok; +} + +/* Generate the fastopen cookie by doing aes128 encryption on both + * the source and destination addresses. Pad 0s for IPv4 or IPv4-mapped-IPv6 + * addresses. For the longer IPv6 addresses use CBC-MAC. + * + * XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE. + */ +static bool tcp_fastopen_cookie_gen(struct request_sock *req, + struct sk_buff *syn, + struct tcp_fastopen_cookie *foc) +{ + if (req->rsk_ops->family == AF_INET) { + const struct iphdr *iph = ip_hdr(syn); + + __be32 path[4] = { iph->saddr, iph->daddr, 0, 0 }; + return __tcp_fastopen_cookie_gen(path, foc); + } + +#if IS_ENABLED(CONFIG_IPV6) + if (req->rsk_ops->family == AF_INET6) { + const struct ipv6hdr *ip6h = ipv6_hdr(syn); + struct tcp_fastopen_cookie tmp; + + if (__tcp_fastopen_cookie_gen(&ip6h->saddr, &tmp)) { + struct in6_addr *buf = (struct in6_addr *) tmp.val; + int i = 4; + + for (i = 0; i < 4; i++) + buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i]; + return __tcp_fastopen_cookie_gen(buf, foc); + } + } +#endif + return false; +} + +static bool tcp_fastopen_create_child(struct sock *sk, + struct sk_buff *skb, + struct dst_entry *dst, + struct request_sock *req) +{ + struct tcp_sock *tp; + struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; + struct sock *child; + + req->num_retrans = 0; + req->num_timeout = 0; + req->sk = NULL; + + child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); + if (child == NULL) + return false; + + spin_lock(&queue->fastopenq->lock); + queue->fastopenq->qlen++; + spin_unlock(&queue->fastopenq->lock); + + /* Initialize the child socket. Have to fix some values to take + * into account the child is a Fast Open socket and is created + * only out of the bits carried in the SYN packet. + */ + tp = tcp_sk(child); + + tp->fastopen_rsk = req; + /* Do a hold on the listner sk so that if the listener is being + * closed, the child that has been accepted can live on and still + * access listen_lock. + */ + sock_hold(sk); + tcp_rsk(req)->listener = sk; + + /* RFC1323: The window in SYN & SYN/ACK segments is never + * scaled. So correct it appropriately. + */ + tp->snd_wnd = ntohs(tcp_hdr(skb)->window); + + /* Activate the retrans timer so that SYNACK can be retransmitted. + * The request socket is not added to the SYN table of the parent + * because it's been added to the accept queue directly. + */ + inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, + TCP_TIMEOUT_INIT, TCP_RTO_MAX); + + /* Add the child socket directly into the accept queue */ + inet_csk_reqsk_queue_add(sk, req, child); + + /* Now finish processing the fastopen child socket. */ + inet_csk(child)->icsk_af_ops->rebuild_header(child); + tcp_init_congestion_control(child); + tcp_mtup_init(child); + tcp_init_metrics(child); + tcp_init_buffer_space(child); + + /* Queue the data carried in the SYN packet. We need to first + * bump skb's refcnt because the caller will attempt to free it. + * + * XXX (TFO) - we honor a zero-payload TFO request for now, + * (any reason not to?) but no need to queue the skb since + * there is no data. How about SYN+FIN? + */ + if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1) { + skb = skb_get(skb); + skb_dst_drop(skb); + __skb_pull(skb, tcp_hdr(skb)->doff * 4); + skb_set_owner_r(skb, child); + __skb_queue_tail(&child->sk_receive_queue, skb); + tp->syn_data_acked = 1; + } + tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + sk->sk_data_ready(sk); + bh_unlock_sock(child); + sock_put(child); + WARN_ON(req->sk == NULL); + return true; +} +EXPORT_SYMBOL(tcp_fastopen_create_child); + +static bool tcp_fastopen_queue_check(struct sock *sk) +{ + struct fastopen_queue *fastopenq; + + /* Make sure the listener has enabled fastopen, and we don't + * exceed the max # of pending TFO requests allowed before trying + * to validating the cookie in order to avoid burning CPU cycles + * unnecessarily. + * + * XXX (TFO) - The implication of checking the max_qlen before + * processing a cookie request is that clients can't differentiate + * between qlen overflow causing Fast Open to be disabled + * temporarily vs a server not supporting Fast Open at all. + */ + fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq; + if (fastopenq == NULL || fastopenq->max_qlen == 0) + return false; + + if (fastopenq->qlen >= fastopenq->max_qlen) { + struct request_sock *req1; + spin_lock(&fastopenq->lock); + req1 = fastopenq->rskq_rst_head; + if ((req1 == NULL) || time_after(req1->expires, jiffies)) { + spin_unlock(&fastopenq->lock); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPFASTOPENLISTENOVERFLOW); + return false; + } + fastopenq->rskq_rst_head = req1->dl_next; + fastopenq->qlen--; + spin_unlock(&fastopenq->lock); + reqsk_free(req1); + } + return true; +} + +/* Returns true if we should perform Fast Open on the SYN. The cookie (foc) + * may be updated and return the client in the SYN-ACK later. E.g., Fast Open + * cookie request (foc->len == 0). + */ +bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct tcp_fastopen_cookie *foc, + struct dst_entry *dst) +{ + struct tcp_fastopen_cookie valid_foc = { .len = -1 }; + bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; + + if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) && + (syn_data || foc->len >= 0) && + tcp_fastopen_queue_check(sk))) { + foc->len = -1; + return false; + } + + if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD)) + goto fastopen; + + if (tcp_fastopen_cookie_gen(req, skb, &valid_foc) && + foc->len == TCP_FASTOPEN_COOKIE_SIZE && + foc->len == valid_foc.len && + !memcmp(foc->val, valid_foc.val, foc->len)) { + /* Cookie is valid. Create a (full) child socket to accept + * the data in SYN before returning a SYN-ACK to ack the + * data. If we fail to create the socket, fall back and + * ack the ISN only but includes the same cookie. + * + * Note: Data-less SYN with valid cookie is allowed to send + * data in SYN_RECV state. + */ +fastopen: + if (tcp_fastopen_create_child(sk, skb, dst, req)) { + foc->len = -1; + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPFASTOPENPASSIVE); + return true; + } + } + + NET_INC_STATS_BH(sock_net(sk), foc->len ? + LINUX_MIB_TCPFASTOPENPASSIVEFAIL : + LINUX_MIB_TCPFASTOPENCOOKIEREQD); + *foc = valid_foc; + return false; } +EXPORT_SYMBOL(tcp_try_fastopen); diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 8b9e7bad77c0..1c4908280d92 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -109,12 +109,12 @@ static void hstcp_init(struct sock *sk) tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); } -static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight) +static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct hstcp *ca = inet_csk_ca(sk); - if (!tcp_is_cwnd_limited(sk, in_flight)) + if (!tcp_is_cwnd_limited(sk)) return; if (tp->snd_cwnd <= tp->snd_ssthresh) diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 4a194acfd923..031361311a8b 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -227,12 +227,12 @@ static u32 htcp_recalc_ssthresh(struct sock *sk) return max((tp->snd_cwnd * ca->beta) >> 7, 2U); } -static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight) +static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct htcp *ca = inet_csk_ca(sk); - if (!tcp_is_cwnd_limited(sk, in_flight)) + if (!tcp_is_cwnd_limited(sk)) return; if (tp->snd_cwnd <= tp->snd_ssthresh) diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index a15a799bf768..d8f8f05a4951 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -87,8 +87,7 @@ static inline u32 hybla_fraction(u32 odds) * o Give cwnd a new value based on the model proposed * o remember increments <1 */ -static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked, - u32 in_flight) +static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct hybla *ca = inet_csk_ca(sk); @@ -101,11 +100,11 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked, ca->minrtt_us = tp->srtt_us; } - if (!tcp_is_cwnd_limited(sk, in_flight)) + if (!tcp_is_cwnd_limited(sk)) return; if (!ca->hybla_en) { - tcp_reno_cong_avoid(sk, ack, acked, in_flight); + tcp_reno_cong_avoid(sk, ack, acked); return; } diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 863d105e3015..5999b3972e64 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -255,8 +255,7 @@ static void tcp_illinois_state(struct sock *sk, u8 new_state) /* * Increase window in response to successful acknowledgment. */ -static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked, - u32 in_flight) +static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct illinois *ca = inet_csk_ca(sk); @@ -265,7 +264,7 @@ static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked, update_params(sk); /* RFC2861 only increase cwnd if fully utilized */ - if (!tcp_is_cwnd_limited(sk, in_flight)) + if (!tcp_is_cwnd_limited(sk)) return; /* In slow start */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d6b46eb2f94c..a3d47af01906 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -74,6 +74,7 @@ #include <linux/ipsec.h> #include <asm/unaligned.h> #include <net/netdma.h> +#include <linux/errqueue.h> int sysctl_tcp_timestamps __read_mostly = 1; int sysctl_tcp_window_scaling __read_mostly = 1; @@ -1106,7 +1107,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, } /* D-SACK for already forgotten data... Do dumb counting. */ - if (dup_sack && tp->undo_marker && tp->undo_retrans && + if (dup_sack && tp->undo_marker && tp->undo_retrans > 0 && !after(end_seq_0, prior_snd_una) && after(end_seq_0, tp->undo_marker)) tp->undo_retrans--; @@ -1162,12 +1163,12 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, unsigned int new_len = (pkt_len / mss) * mss; if (!in_sack && new_len < pkt_len) { new_len += mss; - if (new_len > skb->len) + if (new_len >= skb->len) return 0; } pkt_len = new_len; } - err = tcp_fragment(sk, skb, pkt_len, mss); + err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC); if (err < 0) return err; } @@ -1187,7 +1188,7 @@ static u8 tcp_sacktag_one(struct sock *sk, /* Account D-SACK for retransmitted packet. */ if (dup_sack && (sacked & TCPCB_RETRANS)) { - if (tp->undo_marker && tp->undo_retrans && + if (tp->undo_marker && tp->undo_retrans > 0 && after(end_seq, tp->undo_marker)) tp->undo_retrans--; if (sacked & TCPCB_SACKED_ACKED) @@ -1893,7 +1894,7 @@ static void tcp_clear_retrans_partial(struct tcp_sock *tp) tp->lost_out = 0; tp->undo_marker = 0; - tp->undo_retrans = 0; + tp->undo_retrans = -1; } void tcp_clear_retrans(struct tcp_sock *tp) @@ -1904,16 +1905,17 @@ void tcp_clear_retrans(struct tcp_sock *tp) tp->sacked_out = 0; } -/* Enter Loss state. If "how" is not zero, forget all SACK information +/* Enter Loss state. If we detect SACK reneging, forget all SACK information * and reset tags completely, otherwise preserve SACKs. If receiver * dropped its ofo queue, we will know this due to reneging detection. */ -void tcp_enter_loss(struct sock *sk, int how) +void tcp_enter_loss(struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; bool new_recovery = false; + bool is_reneg; /* is receiver reneging on SACKs? */ /* Reduce ssthresh if it has not yet been made inside this window. */ if (icsk->icsk_ca_state <= TCP_CA_Disorder || @@ -1934,7 +1936,11 @@ void tcp_enter_loss(struct sock *sk, int how) tcp_reset_reno_sack(tp); tp->undo_marker = tp->snd_una; - if (how) { + + skb = tcp_write_queue_head(sk); + is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED); + if (is_reneg) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); tp->sacked_out = 0; tp->fackets_out = 0; } @@ -1948,7 +1954,7 @@ void tcp_enter_loss(struct sock *sk, int how) tp->undo_marker = 0; TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED; - if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) { + if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || is_reneg) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); @@ -1981,19 +1987,21 @@ void tcp_enter_loss(struct sock *sk, int how) * remembered SACKs do not reflect real state of receiver i.e. * receiver _host_ is heavily congested (or buggy). * - * Do processing similar to RTO timeout. + * To avoid big spurious retransmission bursts due to transient SACK + * scoreboard oddities that look like reneging, we give the receiver a + * little time (max(RTT/2, 10ms)) to send us some more ACKs that will + * restore sanity to the SACK scoreboard. If the apparent reneging + * persists until this RTO then we'll clear the SACK scoreboard. */ static bool tcp_check_sack_reneging(struct sock *sk, int flag) { if (flag & FLAG_SACK_RENEGING) { - struct inet_connection_sock *icsk = inet_csk(sk); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); + struct tcp_sock *tp = tcp_sk(sk); + unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4), + msecs_to_jiffies(10)); - tcp_enter_loss(sk, 1); - icsk->icsk_retransmits++; - tcp_retransmit_skb(sk, tcp_write_queue_head(sk)); inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - icsk->icsk_rto, TCP_RTO_MAX); + delay, TCP_RTO_MAX); return true; } return false; @@ -2241,7 +2249,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) break; mss = skb_shinfo(skb)->gso_size; - err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, mss); + err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, + mss, GFP_ATOMIC); if (err < 0) break; cnt = packets; @@ -2474,7 +2483,7 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) * losses and/or application stalls), do not perform any further cwnd * reductions, but instead slow start up to ssthresh. */ -static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh) +static void tcp_init_cwnd_reduction(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -2484,8 +2493,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh) tp->prior_cwnd = tp->snd_cwnd; tp->prr_delivered = 0; tp->prr_out = 0; - if (set_ssthresh) - tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); + tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); TCP_ECN_queue_cwr(tp); } @@ -2527,14 +2535,14 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk) } /* Enter CWR state. Disable cwnd undo since congestion is proven with ECN */ -void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) +void tcp_enter_cwr(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); tp->prior_ssthresh = 0; if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { tp->undo_marker = 0; - tcp_init_cwnd_reduction(sk, set_ssthresh); + tcp_init_cwnd_reduction(sk); tcp_set_ca_state(sk, TCP_CA_CWR); } } @@ -2563,7 +2571,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked) tp->retrans_stamp = 0; if (flag & FLAG_ECE) - tcp_enter_cwr(sk, 1); + tcp_enter_cwr(sk); if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { tcp_try_keep_open(sk); @@ -2664,12 +2672,12 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) tp->prior_ssthresh = 0; tp->undo_marker = tp->snd_una; - tp->undo_retrans = tp->retrans_out; + tp->undo_retrans = tp->retrans_out ? : -1; if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { if (!ece_ack) tp->prior_ssthresh = tcp_current_ssthresh(sk); - tcp_init_cwnd_reduction(sk, true); + tcp_init_cwnd_reduction(sk); } tcp_set_ca_state(sk, TCP_CA_Recovery); } @@ -2684,13 +2692,12 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) bool recovered = !before(tp->snd_una, tp->high_seq); if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */ - if (flag & FLAG_ORIG_SACK_ACKED) { - /* Step 3.b. A timeout is spurious if not all data are - * lost, i.e., never-retransmitted data are (s)acked. - */ - tcp_try_undo_loss(sk, true); + /* Step 3.b. A timeout is spurious if not all data are + * lost, i.e., never-retransmitted data are (s)acked. + */ + if (tcp_try_undo_loss(sk, flag & FLAG_ORIG_SACK_ACKED)) return; - } + if (after(tp->snd_nxt, tp->high_seq) && (flag & FLAG_DATA_SACKED || is_dupack)) { tp->frto = 0; /* Loss was real: 2nd part of step 3.a */ @@ -2938,10 +2945,11 @@ static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp) tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt_us, -1L); } -static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight) +static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) { const struct inet_connection_sock *icsk = inet_csk(sk); - icsk->icsk_ca_ops->cong_avoid(sk, ack, acked, in_flight); + + icsk->icsk_ca_ops->cong_avoid(sk, ack, acked); tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp; } @@ -3099,6 +3107,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tp->retrans_stamp = 0; } + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_ACK_TSTAMP) && + between(skb_shinfo(skb)->tskey, prior_snd_una, + tp->snd_una + 1)) + __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK); + if (!fully_acked) break; @@ -3345,7 +3358,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) tp->tlp_high_seq = 0; /* Don't reduce cwnd if DSACK arrives for TLP retrans. */ if (!(flag & FLAG_DSACKING_ACK)) { - tcp_init_cwnd_reduction(sk, true); + tcp_init_cwnd_reduction(sk); tcp_set_ca_state(sk, TCP_CA_CWR); tcp_end_cwnd_reduction(sk); tcp_try_keep_open(sk); @@ -3364,7 +3377,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; bool is_dupack = false; - u32 prior_in_flight; u32 prior_fackets; int prior_packets = tp->packets_out; const int prior_unsacked = tp->packets_out - tp->sacked_out; @@ -3397,7 +3409,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) flag |= FLAG_SND_UNA_ADVANCED; prior_fackets = tp->fackets_out; - prior_in_flight = tcp_packets_in_flight(tp); /* ts_recent update must be made after we are sure that the packet * is in window. @@ -3452,7 +3463,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) /* Advance cwnd if state allows */ if (tcp_may_raise_cwnd(sk, flag)) - tcp_cong_avoid(sk, ack, acked, prior_in_flight); + tcp_cong_avoid(sk, ack, acked); if (tcp_ack_is_dubious(sk, flag)) { is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); @@ -4703,28 +4714,6 @@ static int tcp_prune_queue(struct sock *sk) return -1; } -/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto. - * As additional protections, we do not touch cwnd in retransmission phases, - * and if application hit its sndbuf limit recently. - */ -void tcp_cwnd_application_limited(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open && - sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { - /* Limited by application or receiver window. */ - u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk)); - u32 win_used = max(tp->snd_cwnd_used, init_win); - if (win_used < tp->snd_cwnd) { - tp->snd_ssthresh = tcp_current_ssthresh(sk); - tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1; - } - tp->snd_cwnd_used = 0; - } - tp->snd_cwnd_stamp = tcp_time_stamp; -} - static bool tcp_should_expand_sndbuf(const struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); @@ -5900,3 +5889,153 @@ discard: return 0; } EXPORT_SYMBOL(tcp_rcv_state_process); + +static inline void pr_drop_req(struct request_sock *req, __u16 port, int family) +{ + struct inet_request_sock *ireq = inet_rsk(req); + + if (family == AF_INET) + LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"), + &ireq->ir_rmt_addr, port); +#if IS_ENABLED(CONFIG_IPV6) + else if (family == AF_INET6) + LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI6/%u\n"), + &ireq->ir_v6_rmt_addr, port); +#endif +} + +int tcp_conn_request(struct request_sock_ops *rsk_ops, + const struct tcp_request_sock_ops *af_ops, + struct sock *sk, struct sk_buff *skb) +{ + struct tcp_options_received tmp_opt; + struct request_sock *req; + struct tcp_sock *tp = tcp_sk(sk); + struct dst_entry *dst = NULL; + __u32 isn = TCP_SKB_CB(skb)->when; + bool want_cookie = false, fastopen; + struct flowi fl; + struct tcp_fastopen_cookie foc = { .len = -1 }; + int err; + + + /* TW buckets are converted to open requests without + * limitations, they conserve resources and peer is + * evidently real one. + */ + if ((sysctl_tcp_syncookies == 2 || + inet_csk_reqsk_queue_is_full(sk)) && !isn) { + want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name); + if (!want_cookie) + goto drop; + } + + + /* Accept backlog is full. If we have already queued enough + * of warm entries in syn queue, drop request. It is better than + * clogging syn queue with openreqs with exponentially increasing + * timeout. + */ + if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); + goto drop; + } + + req = inet_reqsk_alloc(rsk_ops); + if (!req) + goto drop; + + tcp_rsk(req)->af_specific = af_ops; + + tcp_clear_options(&tmp_opt); + tmp_opt.mss_clamp = af_ops->mss_clamp; + tmp_opt.user_mss = tp->rx_opt.user_mss; + tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc); + + if (want_cookie && !tmp_opt.saw_tstamp) + tcp_clear_options(&tmp_opt); + + tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; + tcp_openreq_init(req, &tmp_opt, skb, sk); + + af_ops->init_req(req, sk, skb); + + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + + if (!want_cookie || tmp_opt.tstamp_ok) + TCP_ECN_create_request(req, skb, sock_net(sk)); + + if (want_cookie) { + isn = cookie_init_sequence(af_ops, sk, skb, &req->mss); + req->cookie_ts = tmp_opt.tstamp_ok; + } else if (!isn) { + /* VJ's idea. We save last timestamp seen + * from the destination in peer table, when entering + * state TIME-WAIT, and check against it before + * accepting new connection request. + * + * If "isn" is not zero, this request hit alive + * timewait bucket, so that all the necessary checks + * are made in the function processing timewait state. + */ + if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle) { + bool strict; + + dst = af_ops->route_req(sk, &fl, req, &strict); + if (dst && strict && + !tcp_peer_is_proven(req, dst, true)) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); + goto drop_and_release; + } + } + /* Kill the following clause, if you dislike this way. */ + else if (!sysctl_tcp_syncookies && + (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < + (sysctl_max_syn_backlog >> 2)) && + !tcp_peer_is_proven(req, dst, false)) { + /* Without syncookies last quarter of + * backlog is filled with destinations, + * proven to be alive. + * It means that we continue to communicate + * to destinations, already remembered + * to the moment of synflood. + */ + pr_drop_req(req, ntohs(tcp_hdr(skb)->source), + rsk_ops->family); + goto drop_and_release; + } + + isn = af_ops->init_seq(skb); + } + if (!dst) { + dst = af_ops->route_req(sk, &fl, req, NULL); + if (!dst) + goto drop_and_free; + } + + tcp_rsk(req)->snt_isn = isn; + tcp_openreq_init_rwin(req, sk, dst); + fastopen = !want_cookie && + tcp_try_fastopen(sk, skb, req, &foc, dst); + err = af_ops->send_synack(sk, dst, &fl, req, + skb_get_queue_mapping(skb), &foc); + if (!fastopen) { + if (err || want_cookie) + goto drop_and_free; + + tcp_rsk(req)->listener = NULL; + af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT); + } + + return 0; + +drop_and_release: + dst_release(dst); +drop_and_free: + reqsk_free(req); +drop: + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); + return 0; +} +EXPORT_SYMBOL(tcp_conn_request); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 438f3b95143d..992a1f926009 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -99,7 +99,7 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, struct inet_hashinfo tcp_hashinfo; EXPORT_SYMBOL(tcp_hashinfo); -static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb) +static __u32 tcp_v4_init_sequence(const struct sk_buff *skb) { return secure_tcp_sequence_number(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, @@ -208,6 +208,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_dport = usin->sin_port; inet->inet_daddr = daddr; + inet_set_txhash(sk); + inet_csk(sk)->icsk_ext_hdr_len = 0; if (inet_opt) inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; @@ -336,17 +338,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) const int code = icmp_hdr(icmp_skb)->code; struct sock *sk; struct sk_buff *skb; - struct request_sock *req; - __u32 seq; + struct request_sock *fastopen; + __u32 seq, snd_una; __u32 remaining; int err; struct net *net = dev_net(icmp_skb->dev); - if (icmp_skb->len < (iph->ihl << 2) + 8) { - ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); - return; - } - sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest, iph->saddr, th->source, inet_iif(icmp_skb)); if (!sk) { @@ -378,12 +375,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) icsk = inet_csk(sk); tp = tcp_sk(sk); - req = tp->fastopen_rsk; seq = ntohl(th->seq); + /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ + fastopen = tp->fastopen_rsk; + snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; if (sk->sk_state != TCP_LISTEN && - !between(seq, tp->snd_una, tp->snd_nxt) && - (req == NULL || seq != tcp_rsk(req)->snt_isn)) { - /* For a Fast Open socket, allow seq to be snt_isn. */ + !between(seq, snd_una, tp->snd_nxt)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -426,11 +423,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH) break; if (seq != tp->snd_una || !icsk->icsk_retransmits || - !icsk->icsk_backoff) + !icsk->icsk_backoff || fastopen) break; - /* XXX (TFO) - revisit the following logic for TFO */ - if (sock_owned_by_user(sk)) break; @@ -462,14 +457,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) goto out; } - /* XXX (TFO) - if it's a TFO socket and has been accepted, rather - * than following the TCP_SYN_RECV case and closing the socket, - * we ignore the ICMP error and keep trying like a fully established - * socket. Is this the right thing to do? - */ - if (req && req->sk == NULL) - goto out; - switch (sk->sk_state) { struct request_sock *req, **prev; case TCP_LISTEN: @@ -502,10 +489,13 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) goto out; case TCP_SYN_SENT: - case TCP_SYN_RECV: /* Cannot happen. - It can f.e. if SYNs crossed, - or Fast Open. - */ + case TCP_SYN_RECV: + /* Only in fast or simultaneous open. If a fast open socket is + * is already accepted it is treated as a connected one below. + */ + if (fastopen && fastopen->sk == NULL) + break; + if (!sock_owned_by_user(sk)) { sk->sk_err = err; @@ -821,8 +811,10 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, * socket. */ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, + struct flowi *fl, struct request_sock *req, - u16 queue_mapping) + u16 queue_mapping, + struct tcp_fastopen_cookie *foc) { const struct inet_request_sock *ireq = inet_rsk(req); struct flowi4 fl4; @@ -833,7 +825,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) return -1; - skb = tcp_make_synack(sk, dst, req, NULL); + skb = tcp_make_synack(sk, dst, req, foc); if (skb) { __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); @@ -843,24 +835,11 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, ireq->ir_rmt_addr, ireq->opt); err = net_xmit_eval(err); - if (!tcp_rsk(req)->snt_synack && !err) - tcp_rsk(req)->snt_synack = tcp_time_stamp; } return err; } -static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req) -{ - int res = tcp_v4_send_synack(sk, NULL, req, 0); - - if (!res) { - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); - } - return res; -} - /* * IPv4 request_sock destructor. */ @@ -1070,7 +1049,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, if (sin->sin_family != AF_INET) return -EINVAL; - if (!cmd.tcpm_key || !cmd.tcpm_keylen) + if (!cmd.tcpm_keylen) return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, AF_INET); @@ -1243,371 +1222,68 @@ static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) #endif +static void tcp_v4_init_req(struct request_sock *req, struct sock *sk, + struct sk_buff *skb) +{ + struct inet_request_sock *ireq = inet_rsk(req); + + ireq->ir_loc_addr = ip_hdr(skb)->daddr; + ireq->ir_rmt_addr = ip_hdr(skb)->saddr; + ireq->no_srccheck = inet_sk(sk)->transparent; + ireq->opt = tcp_v4_save_options(skb); +} + +static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl, + const struct request_sock *req, + bool *strict) +{ + struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req); + + if (strict) { + if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr) + *strict = true; + else + *strict = false; + } + + return dst; +} + struct request_sock_ops tcp_request_sock_ops __read_mostly = { .family = PF_INET, .obj_size = sizeof(struct tcp_request_sock), - .rtx_syn_ack = tcp_v4_rtx_synack, + .rtx_syn_ack = tcp_rtx_synack, .send_ack = tcp_v4_reqsk_send_ack, .destructor = tcp_v4_reqsk_destructor, .send_reset = tcp_v4_send_reset, .syn_ack_timeout = tcp_syn_ack_timeout, }; -#ifdef CONFIG_TCP_MD5SIG static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { + .mss_clamp = TCP_MSS_DEFAULT, +#ifdef CONFIG_TCP_MD5SIG .md5_lookup = tcp_v4_reqsk_md5_lookup, .calc_md5_hash = tcp_v4_md5_hash_skb, -}; #endif - -static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, - struct tcp_fastopen_cookie *foc, - struct tcp_fastopen_cookie *valid_foc) -{ - bool skip_cookie = false; - struct fastopen_queue *fastopenq; - - if (likely(!fastopen_cookie_present(foc))) { - /* See include/net/tcp.h for the meaning of these knobs */ - if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) || - ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) && - (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1))) - skip_cookie = true; /* no cookie to validate */ - else - return false; - } - fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq; - /* A FO option is present; bump the counter. */ - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE); - - /* Make sure the listener has enabled fastopen, and we don't - * exceed the max # of pending TFO requests allowed before trying - * to validating the cookie in order to avoid burning CPU cycles - * unnecessarily. - * - * XXX (TFO) - The implication of checking the max_qlen before - * processing a cookie request is that clients can't differentiate - * between qlen overflow causing Fast Open to be disabled - * temporarily vs a server not supporting Fast Open at all. - */ - if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 || - fastopenq == NULL || fastopenq->max_qlen == 0) - return false; - - if (fastopenq->qlen >= fastopenq->max_qlen) { - struct request_sock *req1; - spin_lock(&fastopenq->lock); - req1 = fastopenq->rskq_rst_head; - if ((req1 == NULL) || time_after(req1->expires, jiffies)) { - spin_unlock(&fastopenq->lock); - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPFASTOPENLISTENOVERFLOW); - /* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/ - foc->len = -1; - return false; - } - fastopenq->rskq_rst_head = req1->dl_next; - fastopenq->qlen--; - spin_unlock(&fastopenq->lock); - reqsk_free(req1); - } - if (skip_cookie) { - tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - return true; - } - - if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) { - if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) { - tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, - ip_hdr(skb)->daddr, valid_foc); - if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) || - memcmp(&foc->val[0], &valid_foc->val[0], - TCP_FASTOPEN_COOKIE_SIZE) != 0) - return false; - valid_foc->len = -1; - } - /* Acknowledge the data received from the peer. */ - tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - return true; - } else if (foc->len == 0) { /* Client requesting a cookie */ - tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, - ip_hdr(skb)->daddr, valid_foc); - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPFASTOPENCOOKIEREQD); - } else { - /* Client sent a cookie with wrong size. Treat it - * the same as invalid and return a valid one. - */ - tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, - ip_hdr(skb)->daddr, valid_foc); - } - return false; -} - -static int tcp_v4_conn_req_fastopen(struct sock *sk, - struct sk_buff *skb, - struct sk_buff *skb_synack, - struct request_sock *req) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; - const struct inet_request_sock *ireq = inet_rsk(req); - struct sock *child; - int err; - - req->num_retrans = 0; - req->num_timeout = 0; - req->sk = NULL; - - child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); - if (child == NULL) { - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPFASTOPENPASSIVEFAIL); - kfree_skb(skb_synack); - return -1; - } - err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr, - ireq->ir_rmt_addr, ireq->opt); - err = net_xmit_eval(err); - if (!err) - tcp_rsk(req)->snt_synack = tcp_time_stamp; - /* XXX (TFO) - is it ok to ignore error and continue? */ - - spin_lock(&queue->fastopenq->lock); - queue->fastopenq->qlen++; - spin_unlock(&queue->fastopenq->lock); - - /* Initialize the child socket. Have to fix some values to take - * into account the child is a Fast Open socket and is created - * only out of the bits carried in the SYN packet. - */ - tp = tcp_sk(child); - - tp->fastopen_rsk = req; - /* Do a hold on the listner sk so that if the listener is being - * closed, the child that has been accepted can live on and still - * access listen_lock. - */ - sock_hold(sk); - tcp_rsk(req)->listener = sk; - - /* RFC1323: The window in SYN & SYN/ACK segments is never - * scaled. So correct it appropriately. - */ - tp->snd_wnd = ntohs(tcp_hdr(skb)->window); - - /* Activate the retrans timer so that SYNACK can be retransmitted. - * The request socket is not added to the SYN table of the parent - * because it's been added to the accept queue directly. - */ - inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, - TCP_TIMEOUT_INIT, TCP_RTO_MAX); - - /* Add the child socket directly into the accept queue */ - inet_csk_reqsk_queue_add(sk, req, child); - - /* Now finish processing the fastopen child socket. */ - inet_csk(child)->icsk_af_ops->rebuild_header(child); - tcp_init_congestion_control(child); - tcp_mtup_init(child); - tcp_init_metrics(child); - tcp_init_buffer_space(child); - - /* Queue the data carried in the SYN packet. We need to first - * bump skb's refcnt because the caller will attempt to free it. - * - * XXX (TFO) - we honor a zero-payload TFO request for now. - * (Any reason not to?) - */ - if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) { - /* Don't queue the skb if there is no payload in SYN. - * XXX (TFO) - How about SYN+FIN? - */ - tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - } else { - skb = skb_get(skb); - skb_dst_drop(skb); - __skb_pull(skb, tcp_hdr(skb)->doff * 4); - skb_set_owner_r(skb, child); - __skb_queue_tail(&child->sk_receive_queue, skb); - tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - tp->syn_data_acked = 1; - } - sk->sk_data_ready(sk); - bh_unlock_sock(child); - sock_put(child); - WARN_ON(req->sk == NULL); - return 0; -} + .init_req = tcp_v4_init_req, +#ifdef CONFIG_SYN_COOKIES + .cookie_init_seq = cookie_v4_init_sequence, +#endif + .route_req = tcp_v4_route_req, + .init_seq = tcp_v4_init_sequence, + .send_synack = tcp_v4_send_synack, + .queue_hash_add = inet_csk_reqsk_queue_hash_add, +}; int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { - struct tcp_options_received tmp_opt; - struct request_sock *req; - struct inet_request_sock *ireq; - struct tcp_sock *tp = tcp_sk(sk); - struct dst_entry *dst = NULL; - __be32 saddr = ip_hdr(skb)->saddr; - __be32 daddr = ip_hdr(skb)->daddr; - __u32 isn = TCP_SKB_CB(skb)->when; - bool want_cookie = false; - struct flowi4 fl4; - struct tcp_fastopen_cookie foc = { .len = -1 }; - struct tcp_fastopen_cookie valid_foc = { .len = -1 }; - struct sk_buff *skb_synack; - int do_fastopen; - /* Never answer to SYNs send to broadcast or multicast */ if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) goto drop; - /* TW buckets are converted to open requests without - * limitations, they conserve resources and peer is - * evidently real one. - */ - if ((sysctl_tcp_syncookies == 2 || - inet_csk_reqsk_queue_is_full(sk)) && !isn) { - want_cookie = tcp_syn_flood_action(sk, skb, "TCP"); - if (!want_cookie) - goto drop; - } - - /* Accept backlog is full. If we have already queued enough - * of warm entries in syn queue, drop request. It is better than - * clogging syn queue with openreqs with exponentially increasing - * timeout. - */ - if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); - goto drop; - } - - req = inet_reqsk_alloc(&tcp_request_sock_ops); - if (!req) - goto drop; - -#ifdef CONFIG_TCP_MD5SIG - tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops; -#endif - - tcp_clear_options(&tmp_opt); - tmp_opt.mss_clamp = TCP_MSS_DEFAULT; - tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc); - - if (want_cookie && !tmp_opt.saw_tstamp) - tcp_clear_options(&tmp_opt); - - tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; - tcp_openreq_init(req, &tmp_opt, skb); - - ireq = inet_rsk(req); - ireq->ir_loc_addr = daddr; - ireq->ir_rmt_addr = saddr; - ireq->no_srccheck = inet_sk(sk)->transparent; - ireq->opt = tcp_v4_save_options(skb); + return tcp_conn_request(&tcp_request_sock_ops, + &tcp_request_sock_ipv4_ops, sk, skb); - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_free; - - if (!want_cookie || tmp_opt.tstamp_ok) - TCP_ECN_create_request(req, skb, sock_net(sk)); - - if (want_cookie) { - isn = cookie_v4_init_sequence(sk, skb, &req->mss); - req->cookie_ts = tmp_opt.tstamp_ok; - } else if (!isn) { - /* VJ's idea. We save last timestamp seen - * from the destination in peer table, when entering - * state TIME-WAIT, and check against it before - * accepting new connection request. - * - * If "isn" is not zero, this request hit alive - * timewait bucket, so that all the necessary checks - * are made in the function processing timewait state. - */ - if (tmp_opt.saw_tstamp && - tcp_death_row.sysctl_tw_recycle && - (dst = inet_csk_route_req(sk, &fl4, req)) != NULL && - fl4.daddr == saddr) { - if (!tcp_peer_is_proven(req, dst, true)) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); - goto drop_and_release; - } - } - /* Kill the following clause, if you dislike this way. */ - else if (!sysctl_tcp_syncookies && - (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < - (sysctl_max_syn_backlog >> 2)) && - !tcp_peer_is_proven(req, dst, false)) { - /* Without syncookies last quarter of - * backlog is filled with destinations, - * proven to be alive. - * It means that we continue to communicate - * to destinations, already remembered - * to the moment of synflood. - */ - LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"), - &saddr, ntohs(tcp_hdr(skb)->source)); - goto drop_and_release; - } - - isn = tcp_v4_init_sequence(skb); - } - tcp_rsk(req)->snt_isn = isn; - - if (dst == NULL) { - dst = inet_csk_route_req(sk, &fl4, req); - if (dst == NULL) - goto drop_and_free; - } - do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc); - - /* We don't call tcp_v4_send_synack() directly because we need - * to make sure a child socket can be created successfully before - * sending back synack! - * - * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack() - * (or better yet, call tcp_send_synack() in the child context - * directly, but will have to fix bunch of other code first) - * after syn_recv_sock() except one will need to first fix the - * latter to remove its dependency on the current implementation - * of tcp_v4_send_synack()->tcp_select_initial_window(). - */ - skb_synack = tcp_make_synack(sk, dst, req, - fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL); - - if (skb_synack) { - __tcp_v4_send_check(skb_synack, ireq->ir_loc_addr, ireq->ir_rmt_addr); - skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb)); - } else - goto drop_and_free; - - if (likely(!do_fastopen)) { - int err; - err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr, - ireq->ir_rmt_addr, ireq->opt); - err = net_xmit_eval(err); - if (err || want_cookie) - goto drop_and_free; - - tcp_rsk(req)->snt_synack = tcp_time_stamp; - tcp_rsk(req)->listener = NULL; - /* Add the request_sock to the SYN table */ - inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); - if (fastopen_cookie_present(&foc) && foc.len != 0) - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPFASTOPENPASSIVEFAIL); - } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req)) - goto drop_and_free; - - return 0; - -drop_and_release: - dst_release(dst); -drop_and_free: - reqsk_free(req); drop: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); return 0; @@ -1655,6 +1331,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newinet->mc_ttl = ip_hdr(skb)->ttl; newinet->rcv_tos = ip_hdr(skb)->tos; inet_csk(newsk)->icsk_ext_hdr_len = 0; + inet_set_txhash(newsk); if (inet_opt) inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; newinet->inet_id = newtp->write_seq ^ jiffies; @@ -1744,28 +1421,6 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) return sk; } -static __sum16 tcp_v4_checksum_init(struct sk_buff *skb) -{ - const struct iphdr *iph = ip_hdr(skb); - - if (skb->ip_summed == CHECKSUM_COMPLETE) { - if (!tcp_v4_check(skb->len, iph->saddr, - iph->daddr, skb->csum)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - return 0; - } - } - - skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, - skb->len, IPPROTO_TCP, 0); - - if (skb->len <= 76) { - return __skb_checksum_complete(skb); - } - return 0; -} - - /* The socket must have it's spinlock held when we get * here. * @@ -1960,7 +1615,8 @@ int tcp_v4_rcv(struct sk_buff *skb) * Packet length and doff are validated by header prediction, * provided case of th->doff==0 is eliminated. * So, we defer the checks. */ - if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb)) + + if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo)) goto csum_error; th = tcp_hdr(skb); diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index c9aecae31327..1e70fa8fa793 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -115,13 +115,12 @@ static void tcp_lp_init(struct sock *sk) * Will only call newReno CA when away from inference. * From TCP-LP's paper, this will be handled in additive increasement. */ -static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 acked, - u32 in_flight) +static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct lp *lp = inet_csk_ca(sk); if (!(lp->flag & LP_WITHIN_INF)) - tcp_reno_cong_avoid(sk, ack, acked, in_flight); + tcp_reno_cong_avoid(sk, ack, acked); } /** diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index d4f015ad6c84..3af522622fad 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -102,17 +102,19 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) return 0; } -static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, - char *buffer) +static ssize_t tcp_cgroup_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { - struct mem_cgroup *memcg = mem_cgroup_from_css(css); + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); unsigned long long val; int ret = 0; - switch (cft->private) { + buf = strstrip(buf); + + switch (of_cft(of)->private) { case RES_LIMIT: /* see memcontrol.c */ - ret = res_counter_memparse_write_strategy(buffer, &val); + ret = res_counter_memparse_write_strategy(buf, &val); if (ret) break; ret = tcp_update_limit(memcg, val); @@ -121,7 +123,7 @@ static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, ret = -EINVAL; break; } - return ret; + return ret ?: nbytes; } static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val) @@ -168,17 +170,18 @@ static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft) return val; } -static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event) +static ssize_t tcp_cgroup_reset(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { struct mem_cgroup *memcg; struct cg_proto *cg_proto; - memcg = mem_cgroup_from_css(css); + memcg = mem_cgroup_from_css(of_css(of)); cg_proto = tcp_prot.proto_cgroup(memcg); if (!cg_proto) - return 0; + return nbytes; - switch (event) { + switch (of_cft(of)->private) { case RES_MAX_USAGE: res_counter_reset_max(&cg_proto->memory_allocated); break; @@ -187,13 +190,13 @@ static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event) break; } - return 0; + return nbytes; } static struct cftype tcp_files[] = { { .name = "kmem.tcp.limit_in_bytes", - .write_string = tcp_cgroup_write, + .write = tcp_cgroup_write, .read_u64 = tcp_cgroup_read, .private = RES_LIMIT, }, @@ -205,13 +208,13 @@ static struct cftype tcp_files[] = { { .name = "kmem.tcp.failcnt", .private = RES_FAILCNT, - .trigger = tcp_cgroup_reset, + .write = tcp_cgroup_reset, .read_u64 = tcp_cgroup_read, }, { .name = "kmem.tcp.max_usage_in_bytes", .private = RES_MAX_USAGE, - .trigger = tcp_cgroup_reset, + .write = tcp_cgroup_reset, .read_u64 = tcp_cgroup_read, }, { } /* terminate */ @@ -219,7 +222,7 @@ static struct cftype tcp_files[] = { static int __init tcp_memcontrol_init(void) { - WARN_ON(cgroup_add_cftypes(&memory_cgrp_subsys, tcp_files)); + WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys, tcp_files)); return 0; } __initcall(tcp_memcontrol_init); diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index dcaf72f10216..0d54e59b9ea8 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -1093,7 +1093,6 @@ static const struct genl_ops tcp_metrics_nl_ops[] = { .doit = tcp_metrics_nl_cmd_get, .dumpit = tcp_metrics_nl_dump, .policy = tcp_metrics_nl_policy, - .flags = GENL_ADMIN_PERM, }, { .cmd = TCP_METRICS_CMD_DEL, @@ -1159,10 +1158,7 @@ static void __net_exit tcp_net_metrics_exit(struct net *net) tm = next; } } - if (is_vmalloc_addr(net->ipv4.tcp_metrics_hash)) - vfree(net->ipv4.tcp_metrics_hash); - else - kfree(net->ipv4.tcp_metrics_hash); + kvfree(net->ipv4.tcp_metrics_hash); } static __net_initdata struct pernet_operations tcp_net_metrics_ops = { diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 05c1b155251d..1649988bd1b6 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -298,7 +298,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr; tw->tw_tclass = np->tclass; tw->tw_flowlabel = np->flow_label >> 12; - tw->tw_ipv6only = np->ipv6only; + tw->tw_ipv6only = sk->sk_ipv6only; } #endif @@ -362,6 +362,37 @@ void tcp_twsk_destructor(struct sock *sk) } EXPORT_SYMBOL_GPL(tcp_twsk_destructor); +void tcp_openreq_init_rwin(struct request_sock *req, + struct sock *sk, struct dst_entry *dst) +{ + struct inet_request_sock *ireq = inet_rsk(req); + struct tcp_sock *tp = tcp_sk(sk); + __u8 rcv_wscale; + int mss = dst_metric_advmss(dst); + + if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) + mss = tp->rx_opt.user_mss; + + /* Set this up on the first call only */ + req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); + + /* limit the window selection if the user enforce a smaller rx buffer */ + if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && + (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0)) + req->window_clamp = tcp_full_space(sk); + + /* tcp_full_space because it is guaranteed to be the first packet */ + tcp_select_initial_window(tcp_full_space(sk), + mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), + &req->rcv_wnd, + &req->window_clamp, + ireq->wscale_ok, + &rcv_wscale, + dst_metric(dst, RTAX_INITRWND)); + ireq->rcv_wscale = rcv_wscale; +} +EXPORT_SYMBOL(tcp_openreq_init_rwin); + static inline void TCP_ECN_openreq_child(struct tcp_sock *tp, struct request_sock *req) { diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index b92b81718ca4..f597119fc4e7 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -14,6 +14,21 @@ #include <net/tcp.h> #include <net/protocol.h> +void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, unsigned int seq, + unsigned int mss) +{ + while (skb) { + if (ts_seq < (__u64) seq + mss) { + skb_shinfo(skb)->tx_flags = SKBTX_SW_TSTAMP; + skb_shinfo(skb)->tskey = ts_seq; + return; + } + + skb = skb->next; + seq += mss; + } +} + struct sk_buff *tcp_gso_segment(struct sk_buff *skb, netdev_features_t features) { @@ -57,10 +72,12 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, SKB_GSO_TCP_ECN | SKB_GSO_TCPV6 | SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | SKB_GSO_SIT | SKB_GSO_MPLS | SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM | 0) || !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) goto out; @@ -89,6 +106,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, th = tcp_hdr(skb); seq = ntohl(th->seq); + if (unlikely(skb_shinfo(gso_skb)->tx_flags & SKBTX_SW_TSTAMP)) + tcp_gso_tstamp(segs, skb_shinfo(gso_skb)->tskey, seq, mss); + newcheck = ~csum_fold((__force __wsum)((__force u32)th->check + (__force u32)delta)); @@ -97,9 +117,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, th->check = newcheck; if (skb->ip_summed != CHECKSUM_PARTIAL) - th->check = - csum_fold(csum_partial(skb_transport_header(skb), - thlen, skb->csum)); + th->check = gso_make_checksum(skb, ~th->check); seq += mss; if (copy_destructor) { @@ -133,8 +151,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, th->check = ~csum_fold((__force __wsum)((__force u32)th->check + (__force u32)delta)); if (skb->ip_summed != CHECKSUM_PARTIAL) - th->check = csum_fold(csum_partial(skb_transport_header(skb), - thlen, skb->csum)); + th->check = gso_make_checksum(skb, ~th->check); out: return segs; } @@ -310,7 +327,7 @@ static int tcp4_gro_complete(struct sk_buff *skb, int thoff) th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr, iph->daddr, 0); - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; return tcp_gro_complete(skb); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 025e25093984..8fcfc91964ec 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -627,7 +627,7 @@ static unsigned int tcp_synack_options(struct sock *sk, if (unlikely(!ireq->tstamp_ok)) remaining -= TCPOLEN_SACKPERM_ALIGNED; } - if (foc != NULL) { + if (foc != NULL && foc->len >= 0) { u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len; need = (need + 3) & ~3U; /* Align to 32 bits */ if (remaining >= need) { @@ -878,15 +878,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, BUG_ON(!skb || !tcp_skb_pcount(skb)); if (clone_it) { - const struct sk_buff *fclone = skb + 1; - skb_mstamp_get(&skb->skb_mstamp); - if (unlikely(skb->fclone == SKB_FCLONE_ORIG && - fclone->fclone == SKB_FCLONE_CLONE)) - NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); - if (unlikely(skb_cloned(skb))) skb = pskb_copy(skb, gfp_mask); else @@ -923,6 +916,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb_orphan(skb); skb->sk = sk; skb->destructor = tcp_wfree; + skb_set_hash_from_sk(skb, sk); atomic_add(skb->truesize, &sk->sk_wmem_alloc); /* Build TCP header and checksum it. */ @@ -985,7 +979,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, if (likely(err <= 0)) return err; - tcp_enter_cwr(sk, 1); + tcp_enter_cwr(sk); return net_xmit_eval(err); } @@ -1081,7 +1075,7 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de * Remember, these are still headerless SKBs at this point. */ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, - unsigned int mss_now) + unsigned int mss_now, gfp_t gfp) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *buff; @@ -1096,11 +1090,11 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, if (nsize < 0) nsize = 0; - if (skb_unclone(skb, GFP_ATOMIC)) + if (skb_unclone(skb, gfp)) return -ENOMEM; /* Get a new skb... force flag on. */ - buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC); + buff = sk_stream_alloc_skb(sk, nsize, gfp); if (buff == NULL) return -ENOMEM; /* We'll just try again later. */ @@ -1387,12 +1381,43 @@ unsigned int tcp_current_mss(struct sock *sk) return mss_now; } -/* Congestion window validation. (RFC2861) */ -static void tcp_cwnd_validate(struct sock *sk) +/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto. + * As additional protections, we do not touch cwnd in retransmission phases, + * and if application hit its sndbuf limit recently. + */ +static void tcp_cwnd_application_limited(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open && + sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { + /* Limited by application or receiver window. */ + u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk)); + u32 win_used = max(tp->snd_cwnd_used, init_win); + if (win_used < tp->snd_cwnd) { + tp->snd_ssthresh = tcp_current_ssthresh(sk); + tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1; + } + tp->snd_cwnd_used = 0; + } + tp->snd_cwnd_stamp = tcp_time_stamp; +} + +static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) { struct tcp_sock *tp = tcp_sk(sk); - if (tp->packets_out >= tp->snd_cwnd) { + /* Track the maximum number of outstanding packets in each + * window, and remember whether we were cwnd-limited then. + */ + if (!before(tp->snd_una, tp->max_packets_seq) || + tp->packets_out > tp->max_packets_out) { + tp->max_packets_out = tp->packets_out; + tp->max_packets_seq = tp->snd_nxt; + tp->is_cwnd_limited = is_cwnd_limited; + } + + if (tcp_is_cwnd_limited(sk)) { /* Network is feed fully. */ tp->snd_cwnd_used = 0; tp->snd_cwnd_stamp = tcp_time_stamp; @@ -1601,7 +1626,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, /* All of a TSO frame must be composed of paged data. */ if (skb->len != skb->data_len) - return tcp_fragment(sk, skb, len, mss_now); + return tcp_fragment(sk, skb, len, mss_now, gfp); buff = sk_stream_alloc_skb(sk, 0, gfp); if (unlikely(buff == NULL)) @@ -1644,7 +1669,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, * * This algorithm is from John Heffner. */ -static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) +static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, + bool *is_cwnd_limited) { struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); @@ -1708,6 +1734,9 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) if (!tp->tso_deferred) tp->tso_deferred = 1 | (jiffies << 1); + if (cong_win < send_win && cong_win < skb->len) + *is_cwnd_limited = true; + return true; send_now: @@ -1868,6 +1897,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, unsigned int tso_segs, sent_pkts; int cwnd_quota; int result; + bool is_cwnd_limited = false; sent_pkts = 0; @@ -1892,6 +1922,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, cwnd_quota = tcp_cwnd_test(tp, skb); if (!cwnd_quota) { + is_cwnd_limited = true; if (push_one == 2) /* Force out a loss probe pkt. */ cwnd_quota = 1; @@ -1908,7 +1939,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, nonagle : TCP_NAGLE_PUSH)))) break; } else { - if (!push_one && tcp_tso_should_defer(sk, skb)) + if (!push_one && + tcp_tso_should_defer(sk, skb, &is_cwnd_limited)) break; } @@ -1930,10 +1962,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, /* It is possible TX completion already happened * before we set TSQ_THROTTLED, so we must * test again the condition. - * We abuse smp_mb__after_clear_bit() because - * there is no smp_mb__after_set_bit() yet */ - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); if (atomic_read(&sk->sk_wmem_alloc) > limit) break; } @@ -1975,7 +2005,7 @@ repair: /* Send one loss probe per tail loss episode. */ if (push_one != 2) tcp_schedule_loss_probe(sk); - tcp_cwnd_validate(sk); + tcp_cwnd_validate(sk, is_cwnd_limited); return false; } return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk)); @@ -2039,6 +2069,25 @@ bool tcp_schedule_loss_probe(struct sock *sk) return true; } +/* Thanks to skb fast clones, we can detect if a prior transmit of + * a packet is still in a qdisc or driver queue. + * In this case, there is very little point doing a retransmit ! + * Note: This is called from BH context only. + */ +static bool skb_still_in_host_queue(const struct sock *sk, + const struct sk_buff *skb) +{ + const struct sk_buff *fclone = skb + 1; + + if (unlikely(skb->fclone == SKB_FCLONE_ORIG && + fclone->fclone == SKB_FCLONE_CLONE)) { + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); + return true; + } + return false; +} + /* When probe timeout (PTO) fires, send a new segment if one exists, else * retransmit the last segment. */ @@ -2064,12 +2113,16 @@ void tcp_send_loss_probe(struct sock *sk) if (WARN_ON(!skb)) goto rearm_timer; + if (skb_still_in_host_queue(sk, skb)) + goto rearm_timer; + pcount = tcp_skb_pcount(skb); if (WARN_ON(!pcount)) goto rearm_timer; if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) { - if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss))) + if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss, + GFP_ATOMIC))) goto rearm_timer; skb = tcp_write_queue_tail(sk); } @@ -2077,9 +2130,7 @@ void tcp_send_loss_probe(struct sock *sk) if (WARN_ON(!skb || !tcp_skb_pcount(skb))) goto rearm_timer; - /* Probe with zero data doesn't trigger fast recovery. */ - if (skb->len > 0) - err = __tcp_retransmit_skb(sk, skb); + err = __tcp_retransmit_skb(sk, skb); /* Record snd_nxt for loss detection. */ if (likely(!err)) @@ -2385,6 +2436,9 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf)) return -EAGAIN; + if (skb_still_in_host_queue(sk, skb)) + return -EBUSY; + if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) { if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) BUG(); @@ -2407,7 +2461,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) return -EAGAIN; if (skb->len > cur_mss) { - if (tcp_fragment(sk, skb, cur_mss, cur_mss)) + if (tcp_fragment(sk, skb, cur_mss, cur_mss, GFP_ATOMIC)) return -ENOMEM; /* We'll try again later. */ } else { int oldpcount = tcp_skb_pcount(skb); @@ -2441,8 +2495,14 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); } - if (likely(!err)) + if (likely(!err)) { TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; + /* Update global TCP statistics. */ + TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); + tp->total_retrans++; + } return err; } @@ -2452,12 +2512,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) int err = __tcp_retransmit_skb(sk, skb); if (err == 0) { - /* Update global TCP statistics. */ - TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); - if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); - tp->total_retrans++; - #if FASTRETRANS_DEBUG > 0 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { net_dbg_ratelimited("retrans_out leaked\n"); @@ -2472,15 +2526,17 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) if (!tp->retrans_stamp) tp->retrans_stamp = TCP_SKB_CB(skb)->when; - tp->undo_retrans += tcp_skb_pcount(skb); - /* snd_nxt is stored to detect loss of retransmitted segment, * see tcp_input.c tcp_sacktag_write_queue(). */ TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt; - } else { + } else if (err != -EBUSY) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); } + + if (tp->undo_retrans < 0) + tp->undo_retrans = 0; + tp->undo_retrans += tcp_skb_pcount(skb); return err; } @@ -2756,27 +2812,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) mss = tp->rx_opt.user_mss; - if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ - __u8 rcv_wscale; - /* Set this up on the first call only */ - req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); - - /* limit the window selection if the user enforce a smaller rx buffer */ - if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && - (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0)) - req->window_clamp = tcp_full_space(sk); - - /* tcp_full_space because it is guaranteed to be the first packet */ - tcp_select_initial_window(tcp_full_space(sk), - mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), - &req->rcv_wnd, - &req->window_clamp, - ireq->wscale_ok, - &rcv_wscale, - dst_metric(dst, RTAX_INITRWND)); - ireq->rcv_wscale = rcv_wscale; - } - memset(&opts, 0, sizeof(opts)); #ifdef CONFIG_SYN_COOKIES if (unlikely(req->cookie_ts)) @@ -3209,7 +3244,7 @@ int tcp_write_wakeup(struct sock *sk) skb->len > mss) { seg_size = min(seg_size, mss); TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; - if (tcp_fragment(sk, skb, seg_size, mss)) + if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC)) return -1; } else if (!tcp_skb_pcount(skb)) tcp_set_skb_tso_segs(sk, skb, mss); @@ -3267,3 +3302,18 @@ void tcp_send_probe0(struct sock *sk) TCP_RTO_MAX); } } + +int tcp_rtx_synack(struct sock *sk, struct request_sock *req) +{ + const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific; + struct flowi fl; + int res; + + res = af_ops->send_synack(sk, NULL, &fl, req, 0, NULL); + if (!res) { + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); + } + return res; +} +EXPORT_SYMBOL(tcp_rtx_synack); diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 0ac50836da4d..8250949b8853 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -15,12 +15,11 @@ #define TCP_SCALABLE_AI_CNT 50U #define TCP_SCALABLE_MD_SCALE 3 -static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked, - u32 in_flight) +static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); - if (!tcp_is_cwnd_limited(sk, in_flight)) + if (!tcp_is_cwnd_limited(sk)) return; if (tp->snd_cwnd <= tp->snd_ssthresh) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 286227abed10..df90cd1ce37f 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -391,7 +391,7 @@ void tcp_retransmit_timer(struct sock *sk) tcp_write_err(sk); goto out; } - tcp_enter_loss(sk, 0); + tcp_enter_loss(sk); tcp_retransmit_skb(sk, tcp_write_queue_head(sk)); __sk_dst_reset(sk); goto out_reset_timer; @@ -422,7 +422,7 @@ void tcp_retransmit_timer(struct sock *sk) NET_INC_STATS_BH(sock_net(sk), mib_idx); } - tcp_enter_loss(sk, 0); + tcp_enter_loss(sk); if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) { /* Retransmission failed because of local congestion, diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 48539fff6357..b40ad897f945 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -163,14 +163,13 @@ static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp) return min(tp->snd_ssthresh, tp->snd_cwnd-1); } -static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked, - u32 in_flight) +static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct vegas *vegas = inet_csk_ca(sk); if (!vegas->doing_vegas_now) { - tcp_reno_cong_avoid(sk, ack, acked, in_flight); + tcp_reno_cong_avoid(sk, ack, acked); return; } @@ -195,7 +194,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked, /* We don't have enough RTT samples to do the Vegas * calculation, so we'll behave like Reno. */ - tcp_reno_cong_avoid(sk, ack, acked, in_flight); + tcp_reno_cong_avoid(sk, ack, acked); } else { u32 rtt, diff; u64 target_cwnd; @@ -219,7 +218,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked, * This is: * (actual rate in segments) * baseRTT */ - target_cwnd = tp->snd_cwnd * vegas->baseRTT / rtt; + target_cwnd = (u64)tp->snd_cwnd * vegas->baseRTT; + do_div(target_cwnd, rtt); /* Calculate the difference between the window we had, * and the window we would like to have. This quantity diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 1b8e28fcd7e1..8276977d2c85 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -114,19 +114,18 @@ static void tcp_veno_cwnd_event(struct sock *sk, enum tcp_ca_event event) tcp_veno_init(sk); } -static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked, - u32 in_flight) +static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct veno *veno = inet_csk_ca(sk); if (!veno->doing_veno_now) { - tcp_reno_cong_avoid(sk, ack, acked, in_flight); + tcp_reno_cong_avoid(sk, ack, acked); return; } /* limited by applications */ - if (!tcp_is_cwnd_limited(sk, in_flight)) + if (!tcp_is_cwnd_limited(sk)) return; /* We do the Veno calculations only if we got enough rtt samples */ @@ -134,7 +133,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked, /* We don't have enough rtt samples to do the Veno * calculation, so we'll behave like Reno. */ - tcp_reno_cong_avoid(sk, ack, acked, in_flight); + tcp_reno_cong_avoid(sk, ack, acked); } else { u64 target_cwnd; u32 rtt; @@ -145,7 +144,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked, rtt = veno->minrtt; - target_cwnd = (tp->snd_cwnd * veno->basertt); + target_cwnd = (u64)tp->snd_cwnd * veno->basertt; target_cwnd <<= V_PARAM_SHIFT; do_div(target_cwnd, rtt); diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 5ede0e727945..599b79b8eac0 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -69,13 +69,12 @@ static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, s32 rtt_us) tcp_vegas_pkts_acked(sk, pkts_acked, rtt_us); } -static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked, - u32 in_flight) +static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct yeah *yeah = inet_csk_ca(sk); - if (!tcp_is_cwnd_limited(sk, in_flight)) + if (!tcp_is_cwnd_limited(sk)) return; if (tp->snd_cwnd <= tp->snd_ssthresh) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 4468e1adc094..f57c0e4c2326 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -246,7 +246,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, do { if (low <= snum && snum <= high && !test_bit(snum >> udptable->log, bitmap) && - !inet_is_reserved_local_port(snum)) + !inet_is_local_reserved_port(net, snum)) goto found; snum += rand; } while (snum != first); @@ -594,27 +594,6 @@ static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk, return true; } -static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk, - __be16 loc_port, __be32 loc_addr, - __be16 rmt_port, __be32 rmt_addr, - int dif) -{ - struct hlist_nulls_node *node; - struct sock *s = sk; - unsigned short hnum = ntohs(loc_port); - - sk_nulls_for_each_from(s, node) { - if (__udp_is_mcast_sock(net, s, - loc_port, loc_addr, - rmt_port, rmt_addr, - dif, hnum)) - goto found; - } - s = NULL; -found: - return s; -} - /* * This routine is called by the ICMP module when it gets some * sort of error condition. If err < 0 then the socket should @@ -727,13 +706,12 @@ EXPORT_SYMBOL(udp_flush_pending_frames); void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst) { struct udphdr *uh = udp_hdr(skb); - struct sk_buff *frags = skb_shinfo(skb)->frag_list; int offset = skb_transport_offset(skb); int len = skb->len - offset; int hlen = len; __wsum csum = 0; - if (!frags) { + if (!skb_has_frag_list(skb)) { /* * Only one fragment on the socket. */ @@ -742,15 +720,17 @@ void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst) uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); } else { + struct sk_buff *frags; + /* * HW-checksum won't work as there are two or more * fragments on the socket so that all csums of sk_buffs * should be together */ - do { + skb_walk_frags(skb, frags) { csum = csum_add(csum, frags->csum); hlen -= frags->len; - } while ((frags = frags->next)); + } csum = skb_checksum(skb, offset, hlen, csum); skb->ip_summed = CHECKSUM_NONE; @@ -762,6 +742,43 @@ void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst) } EXPORT_SYMBOL_GPL(udp4_hwcsum); +/* Function to set UDP checksum for an IPv4 UDP packet. This is intended + * for the simple case like when setting the checksum for a UDP tunnel. + */ +void udp_set_csum(bool nocheck, struct sk_buff *skb, + __be32 saddr, __be32 daddr, int len) +{ + struct udphdr *uh = udp_hdr(skb); + + if (nocheck) + uh->check = 0; + else if (skb_is_gso(skb)) + uh->check = ~udp_v4_check(len, saddr, daddr, 0); + else if (skb_dst(skb) && skb_dst(skb)->dev && + (skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) { + + BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); + + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + uh->check = ~udp_v4_check(len, saddr, daddr, 0); + } else { + __wsum csum; + + BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); + + uh->check = 0; + csum = skb_checksum(skb, 0, len, 0); + uh->check = udp_v4_check(len, saddr, daddr, csum); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + } +} +EXPORT_SYMBOL(udp_set_csum); + static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) { struct sock *sk = skb->sk; @@ -785,7 +802,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) if (is_udplite) /* UDP-Lite */ csum = udplite_csum(skb); - else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ + else if (sk->sk_no_check_tx) { /* UDP csum disabled */ skb->ip_summed = CHECKSUM_NONE; goto send; @@ -1495,6 +1512,10 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) { int ret; + /* Verify checksum before giving to encap */ + if (udp_lib_checksum_complete(skb)) + goto csum_error; + ret = encap_rcv(sk, skb); if (ret <= 0) { UDP_INC_STATS_BH(sock_net(sk), @@ -1546,8 +1567,11 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) goto csum_error; - if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) + if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { + UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, + is_udplite); goto drop; + } rc = 0; @@ -1595,6 +1619,8 @@ static void flush_stack(struct sock **stack, unsigned int count, if (skb1 && udp_queue_rcv_skb(sk, skb1) <= 0) skb1 = NULL; + + sock_put(sk); } if (unlikely(skb1)) kfree_skb(skb1); @@ -1623,41 +1649,50 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, struct udp_table *udptable) { struct sock *sk, *stack[256 / sizeof(struct sock *)]; - struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest)); - int dif; - unsigned int i, count = 0; + struct hlist_nulls_node *node; + unsigned short hnum = ntohs(uh->dest); + struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum); + int dif = skb->dev->ifindex; + unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node); + unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10); + + if (use_hash2) { + hash2_any = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum) & + udp_table.mask; + hash2 = udp4_portaddr_hash(net, daddr, hnum) & udp_table.mask; +start_lookup: + hslot = &udp_table.hash2[hash2]; + offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); + } spin_lock(&hslot->lock); - sk = sk_nulls_head(&hslot->head); - dif = skb->dev->ifindex; - sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); - while (sk) { - stack[count++] = sk; - sk = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest, - daddr, uh->source, saddr, dif); - if (unlikely(count == ARRAY_SIZE(stack))) { - if (!sk) - break; - flush_stack(stack, count, skb, ~0); - count = 0; + sk_nulls_for_each_entry_offset(sk, node, &hslot->head, offset) { + if (__udp_is_mcast_sock(net, sk, + uh->dest, daddr, + uh->source, saddr, + dif, hnum)) { + if (unlikely(count == ARRAY_SIZE(stack))) { + flush_stack(stack, count, skb, ~0); + count = 0; + } + stack[count++] = sk; + sock_hold(sk); } } - /* - * before releasing chain lock, we must take a reference on sockets - */ - for (i = 0; i < count; i++) - sock_hold(stack[i]); spin_unlock(&hslot->lock); + /* Also lookup *:port if we are using hash2 and haven't done so yet. */ + if (use_hash2 && hash2 != hash2_any) { + hash2 = hash2_any; + goto start_lookup; + } + /* * do the slow work with no lock held */ if (count) { flush_stack(stack, count, skb, count - 1); - - for (i = 0; i < count; i++) - sock_put(stack[i]); } else { kfree_skb(skb); } @@ -1672,7 +1707,6 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) { - const struct iphdr *iph; int err; UDP_SKB_CB(skb)->partial_cov = 0; @@ -1684,22 +1718,8 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, return err; } - iph = ip_hdr(skb); - if (uh->check == 0) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - } else if (skb->ip_summed == CHECKSUM_COMPLETE) { - if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, - proto, skb->csum)) - skb->ip_summed = CHECKSUM_UNNECESSARY; - } - if (!skb_csum_unnecessary(skb)) - skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, - skb->len, proto, 0); - /* Probably, we should checksum udp header (it should be in cache - * in any case) and data in tiny packets (< rx copybreak). - */ - - return 0; + return skb_checksum_init_zero_check(skb, proto, uh->check, + inet_compute_pseudo); } /* @@ -1834,6 +1854,10 @@ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net, unsigned int count, slot = udp_hashfn(net, hnum, udp_table.mask); struct udp_hslot *hslot = &udp_table.hash[slot]; + /* Do not bother scanning a too big list */ + if (hslot->count > 10) + return NULL; + rcu_read_lock(); begin: count = 0; @@ -1886,7 +1910,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net, unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum); unsigned int slot2 = hash2 & udp_table.mask; struct udp_hslot *hslot2 = &udp_table.hash2[slot2]; - INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr) + INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr); const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum); rcu_read_lock(); @@ -1979,7 +2003,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, int (*push_pending_frames)(struct sock *)) { struct udp_sock *up = udp_sk(sk); - int val; + int val, valbool; int err = 0; int is_udplite = IS_UDPLITE(sk); @@ -1989,6 +2013,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, if (get_user(val, (int __user *)optval)) return -EFAULT; + valbool = val ? 1 : 0; + switch (optname) { case UDP_CORK: if (val != 0) { @@ -2018,6 +2044,14 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, } break; + case UDP_NO_CHECK6_TX: + up->no_check6_tx = valbool; + break; + + case UDP_NO_CHECK6_RX: + up->no_check6_rx = valbool; + break; + /* * UDP-Lite's partial checksum coverage (RFC 3828). */ @@ -2100,6 +2134,14 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, val = up->encap_type; break; + case UDP_NO_CHECK6_TX: + val = up->no_check6_tx; + break; + + case UDP_NO_CHECK6_RX: + val = up->no_check6_rx; + break; + /* The following two cannot be changed on UDP sockets, the return is * always 0 (which corresponds to the full checksum coverage of UDP). */ case UDPLITE_SEND_CSCOV: @@ -2474,81 +2516,3 @@ void __init udp_init(void) sysctl_udp_rmem_min = SK_MEM_QUANTUM; sysctl_udp_wmem_min = SK_MEM_QUANTUM; } - -struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, - netdev_features_t features) -{ - struct sk_buff *segs = ERR_PTR(-EINVAL); - u16 mac_offset = skb->mac_header; - int mac_len = skb->mac_len; - int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); - __be16 protocol = skb->protocol; - netdev_features_t enc_features; - int outer_hlen; - - if (unlikely(!pskb_may_pull(skb, tnl_hlen))) - goto out; - - skb->encapsulation = 0; - __skb_pull(skb, tnl_hlen); - skb_reset_mac_header(skb); - skb_set_network_header(skb, skb_inner_network_offset(skb)); - skb->mac_len = skb_inner_network_offset(skb); - skb->protocol = htons(ETH_P_TEB); - - /* segment inner packet. */ - enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); - segs = skb_mac_gso_segment(skb, enc_features); - if (!segs || IS_ERR(segs)) { - skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, - mac_len); - goto out; - } - - outer_hlen = skb_tnl_header_len(skb); - skb = segs; - do { - struct udphdr *uh; - int udp_offset = outer_hlen - tnl_hlen; - - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - - skb->mac_len = mac_len; - - skb_push(skb, outer_hlen); - skb_reset_mac_header(skb); - skb_set_network_header(skb, mac_len); - skb_set_transport_header(skb, udp_offset); - uh = udp_hdr(skb); - uh->len = htons(skb->len - udp_offset); - - /* csum segment if tunnel sets skb with csum. */ - if (protocol == htons(ETH_P_IP) && unlikely(uh->check)) { - struct iphdr *iph = ip_hdr(skb); - - uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, - skb->len - udp_offset, - IPPROTO_UDP, 0); - uh->check = csum_fold(skb_checksum(skb, udp_offset, - skb->len - udp_offset, 0)); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - - } else if (protocol == htons(ETH_P_IPV6)) { - struct ipv6hdr *ipv6h = ipv6_hdr(skb); - u32 len = skb->len - udp_offset; - - uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, - len, IPPROTO_UDP, 0); - uh->check = csum_fold(skb_checksum(skb, udp_offset, len, 0)); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - skb->ip_summed = CHECKSUM_NONE; - } - - skb->protocol = protocol; - } while ((skb = skb->next)); -out: - return segs; -} diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 88b4023ecfcf..59035bc3008d 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -47,6 +47,82 @@ static int udp4_ufo_send_check(struct sk_buff *skb) return 0; } +struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + u16 mac_offset = skb->mac_header; + int mac_len = skb->mac_len; + int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); + __be16 protocol = skb->protocol; + netdev_features_t enc_features; + int udp_offset, outer_hlen; + unsigned int oldlen; + bool need_csum; + + oldlen = (u16)~skb->len; + + if (unlikely(!pskb_may_pull(skb, tnl_hlen))) + goto out; + + skb->encapsulation = 0; + __skb_pull(skb, tnl_hlen); + skb_reset_mac_header(skb); + skb_set_network_header(skb, skb_inner_network_offset(skb)); + skb->mac_len = skb_inner_network_offset(skb); + skb->protocol = htons(ETH_P_TEB); + + need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM); + if (need_csum) + skb->encap_hdr_csum = 1; + + /* segment inner packet. */ + enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); + segs = skb_mac_gso_segment(skb, enc_features); + if (IS_ERR_OR_NULL(segs)) { + skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, + mac_len); + goto out; + } + + outer_hlen = skb_tnl_header_len(skb); + udp_offset = outer_hlen - tnl_hlen; + skb = segs; + do { + struct udphdr *uh; + int len; + + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + + skb->mac_len = mac_len; + + skb_push(skb, outer_hlen); + skb_reset_mac_header(skb); + skb_set_network_header(skb, mac_len); + skb_set_transport_header(skb, udp_offset); + len = skb->len - udp_offset; + uh = udp_hdr(skb); + uh->len = htons(len); + + if (need_csum) { + __be32 delta = htonl(oldlen + len); + + uh->check = ~csum_fold((__force __wsum) + ((__force u32)uh->check + + (__force u32)delta)); + uh->check = gso_make_checksum(skb, ~uh->check); + + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + } + + skb->protocol = protocol; + } while ((skb = skb->next)); +out: + return segs; +} + static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, netdev_features_t features) { @@ -56,7 +132,8 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, __wsum csum; if (skb->encapsulation && - skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) { + (skb_shinfo(skb)->gso_type & + (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) { segs = skb_udp_tunnel_segment(skb, features); goto out; } @@ -71,8 +148,10 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_IPIP | - SKB_GSO_GRE | SKB_GSO_MPLS) || + SKB_GSO_GRE | SKB_GSO_GRE_CSUM | + SKB_GSO_MPLS) || !(type & (SKB_GSO_UDP)))) goto out; @@ -197,6 +276,7 @@ unflush: } skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */ + skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); pp = uo_priv->offload->callbacks.gro_receive(head, skb); out_unlock: diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c new file mode 100644 index 000000000000..61ec1a65207e --- /dev/null +++ b/net/ipv4/udp_tunnel.c @@ -0,0 +1,100 @@ +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/socket.h> +#include <linux/udp.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <net/udp.h> +#include <net/udp_tunnel.h> +#include <net/net_namespace.h> + +int udp_sock_create(struct net *net, struct udp_port_cfg *cfg, + struct socket **sockp) +{ + int err = -EINVAL; + struct socket *sock = NULL; + +#if IS_ENABLED(CONFIG_IPV6) + if (cfg->family == AF_INET6) { + struct sockaddr_in6 udp6_addr; + + err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock); + if (err < 0) + goto error; + + sk_change_net(sock->sk, net); + + udp6_addr.sin6_family = AF_INET6; + memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6, + sizeof(udp6_addr.sin6_addr)); + udp6_addr.sin6_port = cfg->local_udp_port; + err = kernel_bind(sock, (struct sockaddr *)&udp6_addr, + sizeof(udp6_addr)); + if (err < 0) + goto error; + + if (cfg->peer_udp_port) { + udp6_addr.sin6_family = AF_INET6; + memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6, + sizeof(udp6_addr.sin6_addr)); + udp6_addr.sin6_port = cfg->peer_udp_port; + err = kernel_connect(sock, + (struct sockaddr *)&udp6_addr, + sizeof(udp6_addr), 0); + } + if (err < 0) + goto error; + + udp_set_no_check6_tx(sock->sk, !cfg->use_udp6_tx_checksums); + udp_set_no_check6_rx(sock->sk, !cfg->use_udp6_rx_checksums); + } else +#endif + if (cfg->family == AF_INET) { + struct sockaddr_in udp_addr; + + err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock); + if (err < 0) + goto error; + + sk_change_net(sock->sk, net); + + udp_addr.sin_family = AF_INET; + udp_addr.sin_addr = cfg->local_ip; + udp_addr.sin_port = cfg->local_udp_port; + err = kernel_bind(sock, (struct sockaddr *)&udp_addr, + sizeof(udp_addr)); + if (err < 0) + goto error; + + if (cfg->peer_udp_port) { + udp_addr.sin_family = AF_INET; + udp_addr.sin_addr = cfg->peer_ip; + udp_addr.sin_port = cfg->peer_udp_port; + err = kernel_connect(sock, + (struct sockaddr *)&udp_addr, + sizeof(udp_addr), 0); + if (err < 0) + goto error; + } + + sock->sk->sk_no_check_tx = !cfg->use_udp_checksums; + } else { + return -EPFNOSUPPORT; + } + + + *sockp = sock; + + return 0; + +error: + if (sock) { + kernel_sock_shutdown(sock, SHUT_RDWR); + sk_release_kernel(sock->sk); + } + *sockp = NULL; + return err; +} +EXPORT_SYMBOL(udp_sock_create); + +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 2c46acd4cc36..3b3efbda48e1 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -70,7 +70,6 @@ static struct inet_protosw udplite4_protosw = { .protocol = IPPROTO_UDPLITE, .prot = &udplite_prot, .ops = &inet_dgram_ops, - .no_check = 0, /* must checksum (RFC 3828) */ .flags = INET_PROTOSW_PERMANENT, }; diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 05f2b484954f..91771a7c802f 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -58,12 +58,12 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); - ip_select_ident(skb, dst->child, NULL); top_iph->ttl = ip4_dst_hoplimit(dst->child); top_iph->saddr = x->props.saddr.a4; top_iph->daddr = x->id.daddr.a4; + ip_select_ident(skb, NULL); return 0; } diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 40e701f2e1e0..d5f6bd9a210a 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -25,7 +25,7 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE) goto out; - if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df) + if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df) goto out; mtu = dst_mtu(skb_dst(skb)); @@ -62,10 +62,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) if (err) return err; - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED; - - skb->protocol = htons(ETH_P_IP); + IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; return x->outer_mode->output2(x, skb); } @@ -73,27 +70,34 @@ EXPORT_SYMBOL(xfrm4_prepare_output); int xfrm4_output_finish(struct sk_buff *skb) { + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + skb->protocol = htons(ETH_P_IP); + +#ifdef CONFIG_NETFILTER + IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; +#endif + + return xfrm_output(skb); +} + +static int __xfrm4_output(struct sk_buff *skb) +{ + struct xfrm_state *x = skb_dst(skb)->xfrm; + #ifdef CONFIG_NETFILTER - if (!skb_dst(skb)->xfrm) { + if (!x) { IPCB(skb)->flags |= IPSKB_REROUTED; return dst_output(skb); } - - IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; #endif - skb->protocol = htons(ETH_P_IP); - return xfrm_output(skb); + return x->outer_mode->afinfo->output_finish(skb); } int xfrm4_output(struct sock *sk, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb); - struct xfrm_state *x = dst->xfrm; - return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, - NULL, dst->dev, - x->outer_mode->afinfo->output_finish, + NULL, skb_dst(skb)->dev, __xfrm4_output, !(IPCB(skb)->flags & IPSKB_REROUTED)); } diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c index 7f7b243e8139..dccefa9d84cf 100644 --- a/net/ipv4/xfrm4_protocol.c +++ b/net/ipv4/xfrm4_protocol.c @@ -50,8 +50,12 @@ int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err) { int ret; struct xfrm4_protocol *handler; + struct xfrm4_protocol __rcu **head = proto_handlers(protocol); - for_each_protocol_rcu(*proto_handlers(protocol), handler) + if (!head) + return 0; + + for_each_protocol_rcu(*head, handler) if ((ret = handler->cb_handler(skb, err)) <= 0) return ret; @@ -64,15 +68,20 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, { int ret; struct xfrm4_protocol *handler; + struct xfrm4_protocol __rcu **head = proto_handlers(nexthdr); XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; XFRM_SPI_SKB_CB(skb)->family = AF_INET; XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); - for_each_protocol_rcu(*proto_handlers(nexthdr), handler) + if (!head) + goto out; + + for_each_protocol_rcu(*head, handler) if ((ret = handler->input_handler(skb, nexthdr, spi, encap_type)) != -EINVAL) return ret; +out: icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); kfree_skb(skb); @@ -115,7 +124,7 @@ static int xfrm4_ah_rcv(struct sk_buff *skb) for_each_protocol_rcu(ah4_handlers, handler) if ((ret = handler->handler(skb)) != -EINVAL) - return ret;; + return ret; icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); @@ -208,6 +217,9 @@ int xfrm4_protocol_register(struct xfrm4_protocol *handler, int ret = -EEXIST; int priority = handler->priority; + if (!proto_handlers(protocol) || !netproto(protocol)) + return -EINVAL; + mutex_lock(&xfrm4_protocol_mutex); if (!rcu_dereference_protected(*proto_handlers(protocol), @@ -250,6 +262,9 @@ int xfrm4_protocol_deregister(struct xfrm4_protocol *handler, struct xfrm4_protocol *t; int ret = -ENOENT; + if (!proto_handlers(protocol) || !netproto(protocol)) + return -EINVAL; + mutex_lock(&xfrm4_protocol_mutex); for (pprev = proto_handlers(protocol); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6c7fa0853fc7..0b239fc1816e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -108,11 +108,12 @@ static inline u32 cstamp_delta(unsigned long cstamp) } #ifdef CONFIG_SYSCTL -static void addrconf_sysctl_register(struct inet6_dev *idev); +static int addrconf_sysctl_register(struct inet6_dev *idev); static void addrconf_sysctl_unregister(struct inet6_dev *idev); #else -static inline void addrconf_sysctl_register(struct inet6_dev *idev) +static inline int addrconf_sysctl_register(struct inet6_dev *idev) { + return 0; } static inline void addrconf_sysctl_unregister(struct inet6_dev *idev) @@ -186,6 +187,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .max_desync_factor = MAX_DESYNC_FACTOR, .max_addresses = IPV6_MAX_ADDRESSES, .accept_ra_defrtr = 1, + .accept_ra_from_local = 0, .accept_ra_pinfo = 1, #ifdef CONFIG_IPV6_ROUTER_PREF .accept_ra_rtr_pref = 1, @@ -222,6 +224,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .max_desync_factor = MAX_DESYNC_FACTOR, .max_addresses = IPV6_MAX_ADDRESSES, .accept_ra_defrtr = 1, + .accept_ra_from_local = 0, .accept_ra_pinfo = 1, #ifdef CONFIG_IPV6_ROUTER_PREF .accept_ra_rtr_pref = 1, @@ -275,19 +278,14 @@ static int snmp6_alloc_dev(struct inet6_dev *idev) { int i; - if (snmp_mib_init((void __percpu **)idev->stats.ipv6, - sizeof(struct ipstats_mib), - __alignof__(struct ipstats_mib)) < 0) + idev->stats.ipv6 = alloc_percpu(struct ipstats_mib); + if (!idev->stats.ipv6) goto err_ip; for_each_possible_cpu(i) { struct ipstats_mib *addrconf_stats; - addrconf_stats = per_cpu_ptr(idev->stats.ipv6[0], i); - u64_stats_init(&addrconf_stats->syncp); -#if SNMP_ARRAY_SZ == 2 - addrconf_stats = per_cpu_ptr(idev->stats.ipv6[1], i); + addrconf_stats = per_cpu_ptr(idev->stats.ipv6, i); u64_stats_init(&addrconf_stats->syncp); -#endif } @@ -305,7 +303,7 @@ static int snmp6_alloc_dev(struct inet6_dev *idev) err_icmpmsg: kfree(idev->stats.icmpv6dev); err_icmp: - snmp_mib_free((void __percpu **)idev->stats.ipv6); + free_percpu(idev->stats.ipv6); err_ip: return -ENOMEM; } @@ -313,16 +311,16 @@ err_ip: static struct inet6_dev *ipv6_add_dev(struct net_device *dev) { struct inet6_dev *ndev; + int err = -ENOMEM; ASSERT_RTNL(); if (dev->mtu < IPV6_MIN_MTU) - return NULL; + return ERR_PTR(-EINVAL); ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL); - if (ndev == NULL) - return NULL; + return ERR_PTR(err); rwlock_init(&ndev->lock); ndev->dev = dev; @@ -335,7 +333,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); if (ndev->nd_parms == NULL) { kfree(ndev); - return NULL; + return ERR_PTR(err); } if (ndev->cnf.forwarding) dev_disable_lro(dev); @@ -349,17 +347,14 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) neigh_parms_release(&nd_tbl, ndev->nd_parms); dev_put(dev); kfree(ndev); - return NULL; + return ERR_PTR(err); } if (snmp6_register_dev(ndev) < 0) { ADBG(KERN_WARNING "%s: cannot create /proc/net/dev_snmp6/%s\n", __func__, dev->name); - neigh_parms_release(&nd_tbl, ndev->nd_parms); - ndev->dead = 1; - in6_dev_finish_destroy(ndev); - return NULL; + goto err_release; } /* One reference from device. We must do this before @@ -397,7 +392,12 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ipv6_mc_init_dev(ndev); ndev->tstamp = jiffies; - addrconf_sysctl_register(ndev); + err = addrconf_sysctl_register(ndev); + if (err) { + ipv6_mc_destroy_dev(ndev); + del_timer(&ndev->regen_timer); + goto err_release; + } /* protected by rtnl_lock */ rcu_assign_pointer(dev->ip6_ptr, ndev); @@ -412,6 +412,12 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters); return ndev; + +err_release: + neigh_parms_release(&nd_tbl, ndev->nd_parms); + ndev->dead = 1; + in6_dev_finish_destroy(ndev); + return ERR_PTR(err); } static struct inet6_dev *ipv6_find_idev(struct net_device *dev) @@ -423,7 +429,7 @@ static struct inet6_dev *ipv6_find_idev(struct net_device *dev) idev = __in6_dev_get(dev); if (!idev) { idev = ipv6_add_dev(dev); - if (!idev) + if (IS_ERR(idev)) return NULL; } @@ -2504,8 +2510,8 @@ static int inet6_addr_add(struct net *net, int ifindex, return PTR_ERR(ifp); } -static int inet6_addr_del(struct net *net, int ifindex, const struct in6_addr *pfx, - unsigned int plen) +static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags, + const struct in6_addr *pfx, unsigned int plen) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; @@ -2528,7 +2534,12 @@ static int inet6_addr_del(struct net *net, int ifindex, const struct in6_addr *p in6_ifa_hold(ifp); read_unlock_bh(&idev->lock); + if (!(ifp->flags & IFA_F_TEMPORARY) && + (ifa_flags & IFA_F_MANAGETEMPADDR)) + manage_tempaddrs(idev, ifp, 0, 0, false, + jiffies); ipv6_del_addr(ifp); + addrconf_verify_rtnl(); return 0; } } @@ -2568,7 +2579,7 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg) return -EFAULT; rtnl_lock(); - err = inet6_addr_del(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, + err = inet6_addr_del(net, ireq.ifr6_ifindex, 0, &ireq.ifr6_addr, ireq.ifr6_prefixlen); rtnl_unlock(); return err; @@ -2728,9 +2739,25 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr } } +static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) +{ + if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) { + struct in6_addr addr; + + ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); + /* addrconf_add_linklocal also adds a prefix_route and we + * only need to care about prefix routes if ipv6_generate_eui64 + * couldn't generate one. + */ + if (ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) == 0) + addrconf_add_linklocal(idev, &addr); + else if (prefix_route) + addrconf_prefix_route(&addr, 64, idev->dev, 0, 0); + } +} + static void addrconf_dev_config(struct net_device *dev) { - struct in6_addr addr; struct inet6_dev *idev; ASSERT_RTNL(); @@ -2751,11 +2778,7 @@ static void addrconf_dev_config(struct net_device *dev) if (IS_ERR(idev)) return; - memset(&addr, 0, sizeof(struct in6_addr)); - addr.s6_addr32[0] = htonl(0xFE800000); - - if (ipv6_generate_eui64(addr.s6_addr + 8, dev) == 0) - addrconf_add_linklocal(idev, &addr); + addrconf_addr_gen(idev, false); } #if IS_ENABLED(CONFIG_IPV6_SIT) @@ -2777,11 +2800,7 @@ static void addrconf_sit_config(struct net_device *dev) } if (dev->priv_flags & IFF_ISATAP) { - struct in6_addr addr; - - ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); - if (!ipv6_generate_eui64(addr.s6_addr + 8, dev)) - addrconf_add_linklocal(idev, &addr); + addrconf_addr_gen(idev, false); return; } @@ -2796,7 +2815,6 @@ static void addrconf_sit_config(struct net_device *dev) static void addrconf_gre_config(struct net_device *dev) { struct inet6_dev *idev; - struct in6_addr addr; ASSERT_RTNL(); @@ -2805,26 +2823,10 @@ static void addrconf_gre_config(struct net_device *dev) return; } - ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); - if (!ipv6_generate_eui64(addr.s6_addr + 8, dev)) - addrconf_add_linklocal(idev, &addr); - else - addrconf_prefix_route(&addr, 64, dev, 0, 0); + addrconf_addr_gen(idev, true); } #endif -static inline int -ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev) -{ - struct in6_addr lladdr; - - if (!ipv6_get_lladdr(link_dev, &lladdr, IFA_F_TENTATIVE)) { - addrconf_add_linklocal(idev, &lladdr); - return 0; - } - return -1; -} - static int addrconf_notify(struct notifier_block *this, unsigned long event, void *ptr) { @@ -2837,8 +2839,8 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, case NETDEV_REGISTER: if (!idev && dev->mtu >= IPV6_MIN_MTU) { idev = ipv6_add_dev(dev); - if (!idev) - return notifier_from_errno(-ENOMEM); + if (IS_ERR(idev)) + return notifier_from_errno(PTR_ERR(idev)); } break; @@ -2858,7 +2860,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (!idev && dev->mtu >= IPV6_MIN_MTU) idev = ipv6_add_dev(dev); - if (idev) { + if (!IS_ERR_OR_NULL(idev)) { idev->if_flags |= IF_READY; run_pending = 1; } @@ -2901,7 +2903,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, break; } - if (idev) { + if (!IS_ERR_OR_NULL(idev)) { if (run_pending) addrconf_dad_run(idev); @@ -2936,7 +2938,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (!idev && dev->mtu >= IPV6_MIN_MTU) { idev = ipv6_add_dev(dev); - if (idev) + if (!IS_ERR(idev)) break; } @@ -2957,10 +2959,14 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (idev) { snmp6_unregister_dev(idev); addrconf_sysctl_unregister(idev); - addrconf_sysctl_register(idev); - err = snmp6_register_dev(idev); + err = addrconf_sysctl_register(idev); if (err) return notifier_from_errno(err); + err = snmp6_register_dev(idev); + if (err) { + addrconf_sysctl_unregister(idev); + return notifier_from_errno(err); + } } break; @@ -3743,6 +3749,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; struct in6_addr *pfx, *peer_pfx; + u32 ifa_flags; int err; err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); @@ -3754,7 +3761,13 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) if (pfx == NULL) return -EINVAL; - return inet6_addr_del(net, ifm->ifa_index, pfx, ifm->ifa_prefixlen); + ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags; + + /* We ignore other flags so far. */ + ifa_flags &= IFA_F_MANAGETEMPADDR; + + return inet6_addr_del(net, ifm->ifa_index, ifa_flags, pfx, + ifm->ifa_prefixlen); } static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags, @@ -4326,6 +4339,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao; array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify; array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc; + array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local; } static inline size_t inet6_ifla6_size(void) @@ -4363,7 +4377,7 @@ static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib, memset(&stats[items], 0, pad); } -static inline void __snmp6_fill_stats64(u64 *stats, void __percpu **mib, +static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib, int items, int bytes, size_t syncpoff) { int i; @@ -4383,7 +4397,7 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, { switch (attrtype) { case IFLA_INET6_STATS: - __snmp6_fill_stats64(stats, (void __percpu **)idev->stats.ipv6, + __snmp6_fill_stats64(stats, idev->stats.ipv6, IPSTATS_MIB_MAX, bytes, offsetof(struct ipstats_mib, syncp)); break; case IFLA_INET6_ICMP6STATS: @@ -4425,6 +4439,10 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev) nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr)); if (nla == NULL) goto nla_put_failure; + + if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->addr_gen_mode)) + goto nla_put_failure; + read_lock_bh(&idev->lock); memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla)); read_unlock_bh(&idev->lock); @@ -4529,8 +4547,21 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL) < 0) BUG(); - if (tb[IFLA_INET6_TOKEN]) + if (tb[IFLA_INET6_TOKEN]) { err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN])); + if (err) + return err; + } + + if (tb[IFLA_INET6_ADDR_GEN_MODE]) { + u8 mode = nla_get_u8(tb[IFLA_INET6_ADDR_GEN_MODE]); + + if (mode != IN6_ADDR_GEN_MODE_EUI64 && + mode != IN6_ADDR_GEN_MODE_NONE) + return -EINVAL; + idev->addr_gen_mode = mode; + err = 0; + } return err; } @@ -5173,6 +5204,13 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec }, { + .procname = "accept_ra_from_local", + .data = &ipv6_devconf.accept_ra_from_local, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { /* sentinel */ } }, @@ -5223,12 +5261,23 @@ static void __addrconf_sysctl_unregister(struct ipv6_devconf *p) kfree(t); } -static void addrconf_sysctl_register(struct inet6_dev *idev) +static int addrconf_sysctl_register(struct inet6_dev *idev) { - neigh_sysctl_register(idev->dev, idev->nd_parms, - &ndisc_ifinfo_sysctl_change); - __addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name, - idev, &idev->cnf); + int err; + + if (!sysctl_dev_name_is_allowed(idev->dev->name)) + return -EINVAL; + + err = neigh_sysctl_register(idev->dev, idev->nd_parms, + &ndisc_ifinfo_sysctl_change); + if (err) + return err; + err = __addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name, + idev, &idev->cnf); + if (err) + neigh_sysctl_unregister(idev->nd_parms); + + return err; } static void addrconf_sysctl_unregister(struct inet6_dev *idev) @@ -5313,6 +5362,7 @@ static struct rtnl_af_ops inet6_ops = { int __init addrconf_init(void) { + struct inet6_dev *idev; int i, err; err = ipv6_addr_label_init(); @@ -5351,11 +5401,12 @@ int __init addrconf_init(void) * device and it being up should be removed. */ rtnl_lock(); - if (!ipv6_add_dev(init_net.loopback_dev)) - err = -ENOMEM; + idev = ipv6_add_dev(init_net.loopback_dev); rtnl_unlock(); - if (err) + if (IS_ERR(idev)) { + err = PTR_ERR(idev); goto errlo; + } for (i = 0; i < IN6_ADDR_HSIZE; i++) INIT_HLIST_HEAD(&inet6_addr_lst[i]); diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 4c11cbcf8308..e6960457f625 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -123,7 +123,7 @@ static void snmp6_free_dev(struct inet6_dev *idev) { kfree(idev->stats.icmpv6msgdev); kfree(idev->stats.icmpv6dev); - snmp_mib_free((void __percpu **)idev->stats.ipv6); + free_percpu(idev->stats.ipv6); } /* Nobody refers to this device, we may destroy it. */ diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index d935889f1008..2daa3a133e49 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -106,7 +106,6 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol, struct inet_protosw *answer; struct proto *answer_prot; unsigned char answer_flags; - char answer_no_check; int try_loading_module = 0; int err; @@ -162,7 +161,6 @@ lookup_protocol: sock->ops = answer->ops; answer_prot = answer->prot; - answer_no_check = answer->no_check; answer_flags = answer->flags; rcu_read_unlock(); @@ -176,7 +174,6 @@ lookup_protocol: sock_init_data(sock, sk); err = 0; - sk->sk_no_check = answer_no_check; if (INET_PROTOSW_REUSE & answer_flags) sk->sk_reuse = SK_CAN_REUSE; @@ -200,7 +197,7 @@ lookup_protocol: np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; np->mc_loop = 1; np->pmtudisc = IPV6_PMTUDISC_WANT; - np->ipv6only = net->ipv6.sysctl.bindv6only; + sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; /* Init the ipv4 part of the socket since we can have sockets * using v6 API for ipv4. @@ -297,7 +294,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* Binding to v4-mapped address on a v6-only socket * makes no sense */ - if (np->ipv6only) { + if (sk->sk_ipv6only) { err = -EINVAL; goto out; } @@ -374,7 +371,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_type != IPV6_ADDR_ANY) { sk->sk_userlocks |= SOCK_BINDADDR_LOCK; if (addr_type != IPV6_ADDR_MAPPED) - np->ipv6only = 1; + sk->sk_ipv6only = 1; } if (snum) sk->sk_userlocks |= SOCK_BINDPORT_LOCK; @@ -715,33 +712,25 @@ static int __net_init ipv6_init_mibs(struct net *net) { int i; - if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6, - sizeof(struct udp_mib), - __alignof__(struct udp_mib)) < 0) + net->mib.udp_stats_in6 = alloc_percpu(struct udp_mib); + if (!net->mib.udp_stats_in6) return -ENOMEM; - if (snmp_mib_init((void __percpu **)net->mib.udplite_stats_in6, - sizeof(struct udp_mib), - __alignof__(struct udp_mib)) < 0) + net->mib.udplite_stats_in6 = alloc_percpu(struct udp_mib); + if (!net->mib.udplite_stats_in6) goto err_udplite_mib; - if (snmp_mib_init((void __percpu **)net->mib.ipv6_statistics, - sizeof(struct ipstats_mib), - __alignof__(struct ipstats_mib)) < 0) + net->mib.ipv6_statistics = alloc_percpu(struct ipstats_mib); + if (!net->mib.ipv6_statistics) goto err_ip_mib; for_each_possible_cpu(i) { struct ipstats_mib *af_inet6_stats; - af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[0], i); + af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics, i); u64_stats_init(&af_inet6_stats->syncp); -#if SNMP_ARRAY_SZ == 2 - af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[1], i); - u64_stats_init(&af_inet6_stats->syncp); -#endif } - if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics, - sizeof(struct icmpv6_mib), - __alignof__(struct icmpv6_mib)) < 0) + net->mib.icmpv6_statistics = alloc_percpu(struct icmpv6_mib); + if (!net->mib.icmpv6_statistics) goto err_icmp_mib; net->mib.icmpv6msg_statistics = kzalloc(sizeof(struct icmpv6msg_mib), GFP_KERNEL); @@ -750,22 +739,22 @@ static int __net_init ipv6_init_mibs(struct net *net) return 0; err_icmpmsg_mib: - snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics); + free_percpu(net->mib.icmpv6_statistics); err_icmp_mib: - snmp_mib_free((void __percpu **)net->mib.ipv6_statistics); + free_percpu(net->mib.ipv6_statistics); err_ip_mib: - snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6); + free_percpu(net->mib.udplite_stats_in6); err_udplite_mib: - snmp_mib_free((void __percpu **)net->mib.udp_stats_in6); + free_percpu(net->mib.udp_stats_in6); return -ENOMEM; } static void ipv6_cleanup_mibs(struct net *net) { - snmp_mib_free((void __percpu **)net->mib.udp_stats_in6); - snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6); - snmp_mib_free((void __percpu **)net->mib.ipv6_statistics); - snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics); + free_percpu(net->mib.udp_stats_in6); + free_percpu(net->mib.udplite_stats_in6); + free_percpu(net->mib.ipv6_statistics); + free_percpu(net->mib.icmpv6_statistics); kfree(net->mib.icmpv6msg_statistics); } @@ -776,6 +765,7 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.bindv6only = 0; net->ipv6.sysctl.icmpv6_time = 1*HZ; net->ipv6.sysctl.flowlabel_consistency = 1; + net->ipv6.sysctl.auto_flowlabels = 0; atomic_set(&net->ipv6.rt_genid, 0); err = ipv6_init_mibs(net); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index c3bf2d2e519e..2753319524f1 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -199,6 +199,7 @@ ipv4_connected: NULL); sk->sk_state = TCP_ESTABLISHED; + ip6_set_txhash(sk); out: fl6_sock_release(flowlabel); return err; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 7b326529e6a2..06ba3e58320b 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -400,6 +400,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) int len; int hlimit; int err = 0; + u32 mark = IP6_REPLY_MARK(net, skb->mark); if ((u8 *)hdr < skb->head || (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb)) @@ -466,6 +467,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) fl6.daddr = hdr->saddr; if (saddr) fl6.saddr = *saddr; + fl6.flowi6_mark = mark; fl6.flowi6_oif = iif; fl6.fl6_icmp_type = type; fl6.fl6_icmp_code = code; @@ -474,6 +476,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) sk = icmpv6_xmit_lock(net); if (sk == NULL) return; + sk->sk_mark = mark; np = inet6_sk(sk); if (!icmpv6_xrlim_allow(sk, type, &fl6)) @@ -493,12 +496,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) if (IS_ERR(dst)) goto out; - if (ipv6_addr_is_multicast(&fl6.daddr)) - hlimit = np->mcast_hops; - else - hlimit = np->hop_limit; - if (hlimit < 0) - hlimit = ip6_dst_hoplimit(dst); + hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); msg.skb = skb; msg.offset = skb_network_offset(skb); @@ -556,6 +554,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) int err = 0; int hlimit; u8 tclass; + u32 mark = IP6_REPLY_MARK(net, skb->mark); saddr = &ipv6_hdr(skb)->daddr; @@ -574,11 +573,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb) fl6.saddr = *saddr; fl6.flowi6_oif = skb->dev->ifindex; fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; + fl6.flowi6_mark = mark; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); if (sk == NULL) return; + sk->sk_mark = mark; np = inet6_sk(sk); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) @@ -593,12 +594,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (IS_ERR(dst)) goto out; - if (ipv6_addr_is_multicast(&fl6.daddr)) - hlimit = np->mcast_hops; - else - hlimit = np->hop_limit; - if (hlimit < 0) - hlimit = ip6_dst_hoplimit(dst); + hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); idev = __in6_dev_get(skb->dev); @@ -630,9 +626,10 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) int inner_offset; __be16 frag_off; u8 nexthdr; + struct net *net = dev_net(skb->dev); if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) - return; + goto out; nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr; if (ipv6_ext_hdr(nexthdr)) { @@ -640,14 +637,14 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off); if (inner_offset<0) - return; + goto out; } else { inner_offset = sizeof(struct ipv6hdr); } /* Checkin header including 8 bytes of inner protocol header. */ if (!pskb_may_pull(skb, inner_offset+8)) - return; + goto out; /* BUGGG_FUTURE: we should try to parse exthdrs in this packet. Without this we will not able f.e. to make source routed @@ -656,13 +653,15 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) --ANK (980726) */ - rcu_read_lock(); ipprot = rcu_dereference(inet6_protos[nexthdr]); if (ipprot && ipprot->err_handler) ipprot->err_handler(skb, NULL, type, code, inner_offset, info); - rcu_read_unlock(); raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info); + return; + +out: + ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); } /* @@ -702,22 +701,11 @@ static int icmpv6_rcv(struct sk_buff *skb) saddr = &ipv6_hdr(skb)->saddr; daddr = &ipv6_hdr(skb)->daddr; - /* Perform checksum. */ - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, - skb->csum)) - break; - /* fall through */ - case CHECKSUM_NONE: - skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len, - IPPROTO_ICMPV6, 0)); - if (__skb_checksum_complete(skb)) { - LIMIT_NETDEBUG(KERN_DEBUG - "ICMPv6 checksum failed [%pI6c > %pI6c]\n", - saddr, daddr); - goto csum_error; - } + if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) { + LIMIT_NETDEBUG(KERN_DEBUG + "ICMPv6 checksum failed [%pI6c > %pI6c]\n", + saddr, daddr); + goto csum_error; } if (!pskb_pull(skb, sizeof(*hdr))) diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index d4ade34ab375..a245e5ddffbd 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -81,7 +81,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, final_p = fl6_update_dst(fl6, np->opt, &final); fl6->saddr = ireq->ir_v6_loc_addr; fl6->flowi6_oif = ireq->ir_iif; - fl6->flowi6_mark = sk->sk_mark; + fl6->flowi6_mark = ireq->ir_mark; fl6->fl6_dport = ireq->ir_rmt_port; fl6->fl6_sport = htons(ireq->ir_num); security_req_classify_flow(req, flowi6_to_flowi(fl6)); diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c index ee7a97f510cb..9a4d7322fb22 100644 --- a/net/ipv6/ip6_checksum.c +++ b/net/ipv6/ip6_checksum.c @@ -75,25 +75,50 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) return err; } - if (uh->check == 0) { - /* RFC 2460 section 8.1 says that we SHOULD log - this error. Well, it is reasonable. - */ - LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n", - &ipv6_hdr(skb)->saddr, ntohs(uh->source), - &ipv6_hdr(skb)->daddr, ntohs(uh->dest)); - return 1; - } - if (skb->ip_summed == CHECKSUM_COMPLETE && - !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, - skb->len, proto, skb->csum)) - skb->ip_summed = CHECKSUM_UNNECESSARY; + /* To support RFC 6936 (allow zero checksum in UDP/IPV6 for tunnels) + * we accept a checksum of zero here. When we find the socket + * for the UDP packet we'll check if that socket allows zero checksum + * for IPv6 (set by socket option). + */ + return skb_checksum_init_zero_check(skb, proto, uh->check, + ip6_compute_pseudo); +} +EXPORT_SYMBOL(udp6_csum_init); + +/* Function to set UDP checksum for an IPv6 UDP packet. This is intended + * for the simple case like when setting the checksum for a UDP tunnel. + */ +void udp6_set_csum(bool nocheck, struct sk_buff *skb, + const struct in6_addr *saddr, + const struct in6_addr *daddr, int len) +{ + struct udphdr *uh = udp_hdr(skb); + + if (nocheck) + uh->check = 0; + else if (skb_is_gso(skb)) + uh->check = ~udp_v6_check(len, saddr, daddr, 0); + else if (skb_dst(skb) && skb_dst(skb)->dev && + (skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) { - if (!skb_csum_unnecessary(skb)) - skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, - skb->len, proto, 0)); + BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); - return 0; + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + uh->check = ~udp_v6_check(len, saddr, daddr, 0); + } else { + __wsum csum; + + BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); + + uh->check = 0; + csum = skb_checksum(skb, 0, len, 0); + uh->check = udp_v6_check(len, saddr, daddr, csum); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + } } -EXPORT_SYMBOL(udp6_csum_init); +EXPORT_SYMBOL(udp6_set_csum); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 34e0ded5c14b..cb4459bd1d29 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -71,8 +71,7 @@ static DEFINE_RWLOCK(fib6_walker_lock); #define FWS_INIT FWS_L #endif -static void fib6_prune_clones(struct net *net, struct fib6_node *fn, - struct rt6_info *rt); +static void fib6_prune_clones(struct net *net, struct fib6_node *fn); static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn); static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn); static int fib6_walk(struct fib6_walker_t *w); @@ -941,7 +940,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info, if (!err) { fib6_start_gc(info->nl_net, rt); if (!(rt->rt6i_flags & RTF_CACHE)) - fib6_prune_clones(info->nl_net, pn, rt); + fib6_prune_clones(info->nl_net, pn); } out: @@ -1375,7 +1374,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) pn = pn->parent; } #endif - fib6_prune_clones(info->nl_net, pn, rt); + fib6_prune_clones(info->nl_net, pn); } /* @@ -1459,7 +1458,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w) if (w->skip) { w->skip--; - continue; + goto skip; } err = w->func(w); @@ -1469,6 +1468,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w) w->count++; continue; } +skip: w->state = FWS_U; case FWS_U: if (fn == w->root) @@ -1600,10 +1600,9 @@ static int fib6_prune_clone(struct rt6_info *rt, void *arg) return 0; } -static void fib6_prune_clones(struct net *net, struct fib6_node *fn, - struct rt6_info *rt) +static void fib6_prune_clones(struct net *net, struct fib6_node *fn) { - fib6_clean_tree(net, fn, fib6_prune_clone, 1, rt); + fib6_clean_tree(net, fn, fib6_prune_clone, 1, NULL); } /* diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 0961b5ef866d..4052694c6f2c 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -26,7 +26,6 @@ #include <net/sock.h> #include <net/ipv6.h> -#include <net/addrconf.h> #include <net/rawv6.h> #include <net/transp_v6.h> diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 9d921462b57f..5f19dfbc4c6a 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -72,6 +72,7 @@ struct ip6gre_net { }; static struct rtnl_link_ops ip6gre_link_ops __read_mostly; +static struct rtnl_link_ops ip6gre_tap_ops __read_mostly; static int ip6gre_tunnel_init(struct net_device *dev); static void ip6gre_tunnel_setup(struct net_device *dev); static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t); @@ -321,7 +322,8 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net, else strcpy(name, "ip6gre%d"); - dev = alloc_netdev(sizeof(*t), name, ip6gre_tunnel_setup); + dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, + ip6gre_tunnel_setup); if (!dev) return NULL; @@ -353,10 +355,10 @@ failed_free: static void ip6gre_tunnel_uninit(struct net_device *dev) { - struct net *net = dev_net(dev); - struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + struct ip6_tnl *t = netdev_priv(dev); + struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id); - ip6gre_tunnel_unlink(ign, netdev_priv(dev)); + ip6gre_tunnel_unlink(ign, t); dev_put(dev); } @@ -467,17 +469,7 @@ static int ip6gre_rcv(struct sk_buff *skb) goto drop; if (flags&GRE_CSUM) { - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - csum = csum_fold(skb->csum); - if (!csum) - break; - /* fall through */ - case CHECKSUM_NONE: - skb->csum = 0; - csum = __skb_checksum_complete(skb); - skb->ip_summed = CHECKSUM_COMPLETE; - } + csum = skb_checksum_simple_validate(skb); offset += 4; } if (flags&GRE_KEY) { @@ -611,8 +603,8 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, int encap_limit, __u32 *pmtu) { - struct net *net = dev_net(dev); struct ip6_tnl *tunnel = netdev_priv(dev); + struct net *net = tunnel->net; struct net_device *tdev; /* Device to other host */ struct ipv6hdr *ipv6h; /* Our new IP header */ unsigned int max_headroom = 0; /* The extra header space needed */ @@ -732,7 +724,8 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, * Push down and install the IP header. */ ipv6h = ipv6_hdr(skb); - ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel); + ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), + ip6_make_flowlabel(net, skb, fl6->flowlabel, false)); ipv6h->hop_limit = tunnel->parms.hop_limit; ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; @@ -979,7 +972,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) int strict = (ipv6_addr_type(&p->raddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); - struct rt6_info *rt = rt6_lookup(dev_net(dev), + struct rt6_info *rt = rt6_lookup(t->net, &p->raddr, &p->laddr, p->link, strict); @@ -1063,13 +1056,12 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, int err = 0; struct ip6_tnl_parm2 p; struct __ip6_tnl_parm p1; - struct ip6_tnl *t; - struct net *net = dev_net(dev); + struct ip6_tnl *t = netdev_priv(dev); + struct net *net = t->net; struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); switch (cmd) { case SIOCGETTUNNEL: - t = NULL; if (dev == ign->fb_tunnel_dev) { if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { err = -EFAULT; @@ -1077,9 +1069,9 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, } ip6gre_tnl_parm_from_user(&p1, &p); t = ip6gre_tunnel_locate(net, &p1, 0); + if (t == NULL) + t = netdev_priv(dev); } - if (t == NULL) - t = netdev_priv(dev); memset(&p, 0, sizeof(p)); ip6gre_tnl_parm_to_user(&p, &t->parms); if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) @@ -1184,7 +1176,9 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen); __be16 *p = (__be16 *)(ipv6h+1); - ip6_flow_hdr(ipv6h, 0, t->fl.u.ip6.flowlabel); + ip6_flow_hdr(ipv6h, 0, + ip6_make_flowlabel(dev_net(dev), skb, + t->fl.u.ip6.flowlabel, false)); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = NEXTHDR_GRE; ipv6h->saddr = t->parms.laddr; @@ -1242,7 +1236,6 @@ static void ip6gre_tunnel_setup(struct net_device *dev) dev->flags |= IFF_NOARP; dev->iflink = 0; dev->addr_len = sizeof(struct in6_addr); - dev->features |= NETIF_F_NETNS_LOCAL; dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; } @@ -1297,11 +1290,17 @@ static struct inet6_protocol ip6gre_protocol __read_mostly = { .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; -static void ip6gre_destroy_tunnels(struct ip6gre_net *ign, - struct list_head *head) +static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head) { + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + struct net_device *dev, *aux; int prio; + for_each_netdev_safe(net, dev, aux) + if (dev->rtnl_link_ops == &ip6gre_link_ops || + dev->rtnl_link_ops == &ip6gre_tap_ops) + unregister_netdevice_queue(dev, head); + for (prio = 0; prio < 4; prio++) { int h; for (h = 0; h < HASH_SIZE; h++) { @@ -1310,7 +1309,12 @@ static void ip6gre_destroy_tunnels(struct ip6gre_net *ign, t = rtnl_dereference(ign->tunnels[prio][h]); while (t != NULL) { - unregister_netdevice_queue(t->dev, head); + /* If dev is in the same netns, it has already + * been added to the list by the previous loop. + */ + if (!net_eq(dev_net(t->dev), net)) + unregister_netdevice_queue(t->dev, + head); t = rtnl_dereference(t->next); } } @@ -1323,12 +1327,18 @@ static int __net_init ip6gre_init_net(struct net *net) int err; ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0", - ip6gre_tunnel_setup); + NET_NAME_UNKNOWN, + ip6gre_tunnel_setup); if (!ign->fb_tunnel_dev) { err = -ENOMEM; goto err_alloc_dev; } dev_net_set(ign->fb_tunnel_dev, net); + /* FB netdevice is special: we have one, and only one per netns. + * Allowing to move it to another netns is clearly unsafe. + */ + ign->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; + ip6gre_fb_tunnel_init(ign->fb_tunnel_dev); ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops; @@ -1349,12 +1359,10 @@ err_alloc_dev: static void __net_exit ip6gre_exit_net(struct net *net) { - struct ip6gre_net *ign; LIST_HEAD(list); - ign = net_generic(net, ip6gre_net_id); rtnl_lock(); - ip6gre_destroy_tunnels(ign, &list); + ip6gre_destroy_tunnels(net, &list); unregister_netdevice_many(&list); rtnl_unlock(); } @@ -1531,15 +1539,14 @@ out: static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct ip6_tnl *t, *nt; - struct net *net = dev_net(dev); + struct ip6_tnl *t, *nt = netdev_priv(dev); + struct net *net = nt->net; struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); struct __ip6_tnl_parm p; if (dev == ign->fb_tunnel_dev) return -EINVAL; - nt = netdev_priv(dev); ip6gre_netlink_parms(data, &p); t = ip6gre_tunnel_locate(net, &p, 0); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 59f95affceb0..65eda2a8af48 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -97,9 +97,11 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | SKB_GSO_SIT | SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_MPLS | SKB_GSO_TCPV6 | 0))) @@ -196,7 +198,6 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, unsigned int off; u16 flush = 1; int proto; - __wsum csum; off = skb_gro_offset(skb); hlen = off + sizeof(*iph); @@ -264,13 +265,10 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, NAPI_GRO_CB(skb)->flush |= flush; - csum = skb->csum; - skb_postpull_rcsum(skb, iph, skb_network_header_len(skb)); + skb_gro_postpull_rcsum(skb, iph, nlen); pp = ops->callbacks.gro_receive(head, skb); - skb->csum = csum; - out_unlock: rcu_read_unlock(); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 40e7581374f7..315a55d66079 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -205,7 +205,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); - ip6_flow_hdr(hdr, tclass, fl6->flowlabel); + ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, + np->autoflowlabel)); hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; @@ -219,7 +220,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, skb->mark = sk->sk_mark; mtu = dst_mtu(dst); - if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { + if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUT, skb->len); return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, @@ -344,12 +345,16 @@ static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) { - if (skb->len <= mtu || skb->local_df) + if (skb->len <= mtu) return false; + /* ipv6 conntrack defrag sets max_frag_size + ignore_df */ if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) return true; + if (skb->ignore_df) + return false; + if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) return false; @@ -533,6 +538,20 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) skb_copy_secmark(to, from); } +static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) +{ + static u32 ip6_idents_hashrnd __read_mostly; + u32 hash, id; + + net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); + + hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd); + hash = __ipv6_addr_jhash(&rt->rt6i_src.addr, hash); + + id = ip_idents_reserve(hash, 1); + fhdr->identification = htonl(id); +} + int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct sk_buff *frag; @@ -555,7 +574,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) /* We must not fragment if the socket is set to force MTU discovery * or if the skb it not generated by a local socket. */ - if (unlikely(!skb->local_df && skb->len > mtu) || + if (unlikely(!skb->ignore_df && skb->len > mtu) || (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) { if (skb->sk && dst_allfrag(skb_dst(skb))) @@ -784,8 +803,8 @@ slow_path: /* * Copy a block of the IP datagram. */ - if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len)) - BUG(); + BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag), + len)); left -= len; fh->frag_off = htons(offset); @@ -1138,6 +1157,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int err; int offset = 0; __u8 tx_flags = 0; + u32 tskey = 0; if (flags&MSG_PROBE) return 0; @@ -1225,12 +1245,12 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, unsigned int maxnonfragsize, headersize; headersize = sizeof(struct ipv6hdr) + - (opt ? opt->tot_len : 0) + + (opt ? opt->opt_flen + opt->opt_nflen : 0) + (dst_allfrag(&rt->dst) ? sizeof(struct frag_hdr) : 0) + rt->rt6i_nfheader_len; - if (ip6_sk_local_df(sk)) + if (ip6_sk_ignore_df(sk)) maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; else maxnonfragsize = mtu; @@ -1253,9 +1273,12 @@ emsgsize: } } - /* For UDP, check if TX timestamp is enabled */ - if (sk->sk_type == SOCK_DGRAM) + if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) { sock_tx_timestamp(sk, &tx_flags); + if (tx_flags & SKBTX_ANY_SW_TSTAMP && + sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) + tskey = sk->sk_tskey++; + } /* * Let's try using as much space as possible. @@ -1363,12 +1386,6 @@ alloc_new_skb: sk->sk_allocation); if (unlikely(skb == NULL)) err = -ENOBUFS; - else { - /* Only the initial fragment - * is time stamped. - */ - tx_flags = 0; - } } if (skb == NULL) goto error; @@ -1382,8 +1399,11 @@ alloc_new_skb: skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + dst_exthdrlen); - if (sk->sk_type == SOCK_DGRAM) - skb_shinfo(skb)->tx_flags = tx_flags; + /* Only the initial fragment is time stamped */ + skb_shinfo(skb)->tx_flags = tx_flags; + tx_flags = 0; + skb_shinfo(skb)->tskey = tskey; + tskey = 0; /* * Find where to start putting bytes @@ -1540,7 +1560,7 @@ int ip6_push_pending_frames(struct sock *sk) } /* Allow local fragmentation. */ - skb->local_df = ip6_sk_local_df(sk); + skb->ignore_df = ip6_sk_ignore_df(sk); *final_dst = fl6->daddr; __skb_pull(skb, skb_network_header_len(skb)); @@ -1553,7 +1573,9 @@ int ip6_push_pending_frames(struct sock *sk) skb_reset_network_header(skb); hdr = ipv6_hdr(skb); - ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel); + ip6_flow_hdr(hdr, np->cork.tclass, + ip6_make_flowlabel(net, skb, fl6->flowlabel, + np->autoflowlabel)); hdr->hop_limit = np->cork.hop_limit; hdr->nexthdr = proto; hdr->saddr = fl6->saddr; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index b05b609f69d1..f9de5a695072 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -61,6 +61,7 @@ MODULE_AUTHOR("Ville Nuorvala"); MODULE_DESCRIPTION("IPv6 tunneling device"); MODULE_LICENSE("GPL"); +MODULE_ALIAS_RTNL_LINK("ip6tnl"); MODULE_ALIAS_NETDEV("ip6tnl0"); #ifdef IP6_TNL_DEBUG @@ -314,7 +315,8 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) else sprintf(name, "ip6tnl%%d"); - dev = alloc_netdev(sizeof (*t), name, ip6_tnl_dev_setup); + dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, + ip6_tnl_dev_setup); if (dev == NULL) goto failed; @@ -1045,7 +1047,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); - ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel); + ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), + ip6_make_flowlabel(net, skb, fl6->flowlabel, false)); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; @@ -1557,7 +1560,7 @@ static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[]) { u8 proto; - if (!data) + if (!data || !data[IFLA_IPTUN_PROTO]) return 0; proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); @@ -1771,7 +1774,7 @@ static int __net_init ip6_tnl_init_net(struct net *net) err = -ENOMEM; ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0", - ip6_tnl_dev_setup); + NET_NAME_UNKNOWN, ip6_tnl_dev_setup); if (!ip6n->fb_tnl_dev) goto err_alloc_dev; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index b7c0f827140b..7f52fd9fa7b0 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -204,7 +204,7 @@ static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p else sprintf(name, "ip6_vti%%d"); - dev = alloc_netdev(sizeof(*t), name, vti6_dev_setup); + dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, vti6_dev_setup); if (dev == NULL) goto failed; @@ -511,6 +511,7 @@ static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { __be32 spi; + __u32 mark; struct xfrm_state *x; struct ip6_tnl *t; struct ip_esp_hdr *esph; @@ -524,6 +525,8 @@ static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!t) return -1; + mark = be32_to_cpu(t->parms.o_key); + switch (protocol) { case IPPROTO_ESP: esph = (struct ip_esp_hdr *)(skb->data + offset); @@ -545,7 +548,7 @@ static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, type != NDISC_REDIRECT) return 0; - x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, + x = xfrm_state_lookup(net, mark, (const xfrm_address_t *)&iph->daddr, spi, protocol, AF_INET6); if (!x) return 0; @@ -792,15 +795,12 @@ static const struct net_device_ops vti6_netdev_ops = { **/ static void vti6_dev_setup(struct net_device *dev) { - struct ip6_tnl *t; - dev->netdev_ops = &vti6_netdev_ops; dev->destructor = vti6_dev_free; dev->type = ARPHRD_TUNNEL6; dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr); dev->mtu = ETH_DATA_LEN; - t = netdev_priv(dev); dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; @@ -1020,7 +1020,7 @@ static int __net_init vti6_init_net(struct net *net) err = -ENOMEM; ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6_vti0", - vti6_dev_setup); + NET_NAME_UNKNOWN, vti6_dev_setup); if (!ip6n->fb_tnl_dev) goto err_alloc_dev; @@ -1089,39 +1089,26 @@ static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = { **/ static int __init vti6_tunnel_init(void) { - int err; + const char *msg; + int err; + msg = "tunnel device"; err = register_pernet_device(&vti6_net_ops); if (err < 0) - goto out_pernet; + goto pernet_dev_failed; + msg = "tunnel protocols"; err = xfrm6_protocol_register(&vti_esp6_protocol, IPPROTO_ESP); - if (err < 0) { - unregister_pernet_device(&vti6_net_ops); - pr_err("%s: can't register vti6 protocol\n", __func__); - - goto out; - } - + if (err < 0) + goto xfrm_proto_esp_failed; err = xfrm6_protocol_register(&vti_ah6_protocol, IPPROTO_AH); - if (err < 0) { - xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); - unregister_pernet_device(&vti6_net_ops); - pr_err("%s: can't register vti6 protocol\n", __func__); - - goto out; - } - + if (err < 0) + goto xfrm_proto_ah_failed; err = xfrm6_protocol_register(&vti_ipcomp6_protocol, IPPROTO_COMP); - if (err < 0) { - xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH); - xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); - unregister_pernet_device(&vti6_net_ops); - pr_err("%s: can't register vti6 protocol\n", __func__); - - goto out; - } + if (err < 0) + goto xfrm_proto_comp_failed; + msg = "netlink interface"; err = rtnl_link_register(&vti6_link_ops); if (err < 0) goto rtnl_link_failed; @@ -1130,11 +1117,14 @@ static int __init vti6_tunnel_init(void) rtnl_link_failed: xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP); +xfrm_proto_comp_failed: xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH); +xfrm_proto_ah_failed: xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); -out: +xfrm_proto_esp_failed: unregister_pernet_device(&vti6_net_ops); -out_pernet: +pernet_dev_failed: + pr_err("vti6 init: failed to register %s\n", msg); return err; } @@ -1144,13 +1134,9 @@ out_pernet: static void __exit vti6_tunnel_cleanup(void) { rtnl_link_unregister(&vti6_link_ops); - if (xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP)) - pr_info("%s: can't deregister protocol\n", __func__); - if (xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH)) - pr_info("%s: can't deregister protocol\n", __func__); - if (xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP)) - pr_info("%s: can't deregister protocol\n", __func__); - + xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP); + xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH); + xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); unregister_pernet_device(&vti6_net_ops); } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 8659067da28e..f9a3fd320d1d 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -744,7 +744,7 @@ static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt) else sprintf(name, "pim6reg%u", mrt->id); - dev = alloc_netdev(0, name, reg_vif_setup); + dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); if (dev == NULL) return NULL; @@ -1633,7 +1633,7 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb) { struct mr6_table *mrt; struct flowi6 fl6 = { - .flowi6_iif = skb->skb_iif, + .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, .flowi6_oif = skb->dev->ifindex, .flowi6_mark = skb->mark, }; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index edb58aff4ae7..0c289982796d 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -235,7 +235,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (optlen < sizeof(int) || inet_sk(sk)->inet_num) goto e_inval; - np->ipv6only = valbool; + sk->sk_ipv6only = valbool; retv = 0; break; @@ -834,6 +834,10 @@ pref_skip_coa: np->dontfrag = valbool; retv = 0; break; + case IPV6_AUTOFLOWLABEL: + np->autoflowlabel = valbool; + retv = 0; + break; } release_sock(sk); @@ -1058,7 +1062,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, } case IPV6_V6ONLY: - val = np->ipv6only; + val = sk->sk_ipv6only; break; case IPV6_RECVPKTINFO: @@ -1158,7 +1162,6 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, return -EFAULT; return 0; - break; } case IPV6_TRANSPARENT: @@ -1273,6 +1276,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, val = np->dontfrag; break; + case IPV6_AUTOFLOWLABEL: + val = np->autoflowlabel; + break; + default: return -ENOPROTOOPT; } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 08b367c6b9cf..617f0958e164 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1301,8 +1301,17 @@ int igmp6_event_query(struct sk_buff *skb) len = ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr); len -= skb_network_header_len(skb); - /* Drop queries with not link local source */ - if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) + /* RFC3810 6.2 + * Upon reception of an MLD message that contains a Query, the node + * checks if the source address of the message is a valid link-local + * address, if the Hop Limit is set to 1, and if the Router Alert + * option is present in the Hop-By-Hop Options header of the IPv6 + * packet. If any of these checks fails, the packet is dropped. + */ + if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL) || + ipv6_hdr(skb)->hop_limit != 1 || + !(IP6CB(skb)->flags & IP6SKB_ROUTERALERT) || + IP6CB(skb)->ra != htons(IPV6_OPT_ROUTERALERT_MLD)) return -EINVAL; idev = __in6_dev_get(skb->dev); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 09a22f4f36c9..339078f95d1b 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -851,7 +851,7 @@ out: static void ndisc_recv_na(struct sk_buff *skb) { struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb); - const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; + struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr; u8 *lladdr = NULL; u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) + @@ -944,10 +944,7 @@ static void ndisc_recv_na(struct sk_buff *skb) /* * Change: router to host */ - struct rt6_info *rt; - rt = rt6_get_dflt_router(saddr, dev); - if (rt) - ip6_del_rt(rt); + rt6_clean_tohost(dev_net(dev), saddr); } out: @@ -1073,6 +1070,9 @@ static void ndisc_router_discovery(struct sk_buff *skb) optlen = (skb_tail_pointer(skb) - skb_transport_header(skb)) - sizeof(struct ra_msg); + ND_PRINTK(2, info, + "RA: %s, dev: %s\n", + __func__, skb->dev->name); if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { ND_PRINTK(2, warn, "RA: source address is not link-local\n"); return; @@ -1105,13 +1105,21 @@ static void ndisc_router_discovery(struct sk_buff *skb) return; } - if (!ipv6_accept_ra(in6_dev)) + if (!ipv6_accept_ra(in6_dev)) { + ND_PRINTK(2, info, + "RA: %s, did not accept ra for dev: %s\n", + __func__, skb->dev->name); goto skip_linkparms; + } #ifdef CONFIG_IPV6_NDISC_NODETYPE /* skip link-specific parameters from interior routers */ - if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) + if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) { + ND_PRINTK(2, info, + "RA: %s, nodetype is NODEFAULT, dev: %s\n", + __func__, skb->dev->name); goto skip_linkparms; + } #endif if (in6_dev->if_flags & IF_RS_SENT) { @@ -1133,11 +1141,24 @@ static void ndisc_router_discovery(struct sk_buff *skb) (ra_msg->icmph.icmp6_addrconf_other ? IF_RA_OTHERCONF : 0); - if (!in6_dev->cnf.accept_ra_defrtr) + if (!in6_dev->cnf.accept_ra_defrtr) { + ND_PRINTK(2, info, + "RA: %s, defrtr is false for dev: %s\n", + __func__, skb->dev->name); goto skip_defrtr; + } - if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, NULL, 0)) + /* Do not accept RA with source-addr found on local machine unless + * accept_ra_from_local is set to true. + */ + if (!in6_dev->cnf.accept_ra_from_local && + ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, + NULL, 0)) { + ND_PRINTK(2, info, + "RA from local address detected on dev: %s: default router ignored\n", + skb->dev->name); goto skip_defrtr; + } lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime); @@ -1166,8 +1187,10 @@ static void ndisc_router_discovery(struct sk_buff *skb) rt = NULL; } + ND_PRINTK(3, info, "RA: rt: %p lifetime: %d, for dev: %s\n", + rt, lifetime, skb->dev->name); if (rt == NULL && lifetime) { - ND_PRINTK(3, dbg, "RA: adding default router\n"); + ND_PRINTK(3, info, "RA: adding default router\n"); rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref); if (rt == NULL) { @@ -1263,12 +1286,22 @@ skip_linkparms: NEIGH_UPDATE_F_ISROUTER); } - if (!ipv6_accept_ra(in6_dev)) + if (!ipv6_accept_ra(in6_dev)) { + ND_PRINTK(2, info, + "RA: %s, accept_ra is false for dev: %s\n", + __func__, skb->dev->name); goto out; + } #ifdef CONFIG_IPV6_ROUTE_INFO - if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, NULL, 0)) + if (!in6_dev->cnf.accept_ra_from_local && + ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, + NULL, 0)) { + ND_PRINTK(2, info, + "RA from local address detected on dev: %s: router info ignored.\n", + skb->dev->name); goto skip_routeinfo; + } if (in6_dev->cnf.accept_ra_rtr_pref && ndopts.nd_opts_ri) { struct nd_opt_hdr *p; @@ -1296,8 +1329,12 @@ skip_routeinfo: #ifdef CONFIG_IPV6_NDISC_NODETYPE /* skip link-specific ndopts from interior routers */ - if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) + if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) { + ND_PRINTK(2, info, + "RA: %s, nodetype is NODEFAULT (interior routes), dev: %s\n", + __func__, skb->dev->name); goto out; + } #endif if (in6_dev->cnf.accept_ra_pinfo && ndopts.nd_opts_pi) { @@ -1731,7 +1768,7 @@ int __init ndisc_init(void) #ifdef CONFIG_SYSCTL err = neigh_sysctl_register(NULL, &nd_tbl.parms, - &ndisc_ifinfo_sysctl_change); + ndisc_ifinfo_sysctl_change); if (err) goto out_unregister_pernet; out: diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 95f3f1da0d7f..d38e6a8d8b9f 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -30,13 +30,15 @@ int ip6_route_me_harder(struct sk_buff *skb) .daddr = iph->daddr, .saddr = iph->saddr, }; + int err; dst = ip6_route_output(net, skb->sk, &fl6); - if (dst->error) { + err = dst->error; + if (err) { IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); dst_release(dst); - return dst->error; + return err; } /* Drop old route. */ diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 4bff1f297e39..ac93df16f5af 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -55,6 +55,11 @@ config NFT_REJECT_IPV6 default NFT_REJECT tristate +config NF_LOG_IPV6 + tristate "IPv6 packet logging" + depends on NETFILTER_ADVANCED + select NF_LOG_COMMON + config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" depends on INET && IPV6 diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 70d3dd66f2cd..c0b263104ed2 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -23,6 +23,9 @@ obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o +# logging +obj-$(CONFIG_NF_LOG_IPV6) += nf_log_ipv6.o + # nf_tables obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c index 54bd9790603f..8b147440fbdc 100644 --- a/net/ipv6/netfilter/ip6t_ipv6header.c +++ b/net/ipv6/netfilter/ip6t_ipv6header.c @@ -94,7 +94,6 @@ ipv6header_mt6(const struct sk_buff *skb, struct xt_action_param *par) break; default: return false; - break; } nexthdr = hp->nexthdr; diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index e0983f3648a6..790e0c6b19e1 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -33,6 +33,7 @@ static bool rpfilter_lookup_reverse6(const struct sk_buff *skb, struct ipv6hdr *iph = ipv6_hdr(skb); bool ret = false; struct flowi6 fl6 = { + .flowi6_iif = LOOPBACK_IFINDEX, .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK, .flowi6_proto = iph->nexthdr, .daddr = iph->saddr, diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 84c7f33d0cf8..387d8b8fc18d 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -90,17 +90,9 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, if (nf_ct_is_untracked(ct)) return NF_ACCEPT; - nat = nfct_nat(ct); - if (!nat) { - /* NAT module was loaded late. */ - if (nf_ct_is_confirmed(ct)) - return NF_ACCEPT; - nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); - if (nat == NULL) { - pr_debug("failed to add NAT extension\n"); - return NF_ACCEPT; - } - } + nat = nf_ct_nat_ext_add(ct); + if (nat == NULL) + return NF_ACCEPT; switch (ctinfo) { case IP_CT_RELATED: diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 767ab8da8218..6f187c8d8a1b 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -50,6 +50,7 @@ #include <linux/module.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> +static const char nf_frags_cache_name[] = "nf-frags"; struct nf_ct_frag6_skb_cb { @@ -63,6 +64,8 @@ struct nf_ct_frag6_skb_cb static struct inet_frags nf_frags; #ifdef CONFIG_SYSCTL +static int zero; + static struct ctl_table nf_ct_frag6_sysctl_table[] = { { .procname = "nf_conntrack_frag6_timeout", @@ -76,14 +79,17 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = { .data = &init_net.nf_frag.frags.low_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &init_net.nf_frag.frags.high_thresh }, { .procname = "nf_conntrack_frag6_high_thresh", .data = &init_net.nf_frag.frags.high_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &init_net.nf_frag.frags.low_thresh }, { } }; @@ -102,7 +108,10 @@ static int nf_ct_frag6_sysctl_register(struct net *net) table[0].data = &net->nf_frag.frags.timeout; table[1].data = &net->nf_frag.frags.low_thresh; + table[1].extra2 = &net->nf_frag.frags.high_thresh; table[2].data = &net->nf_frag.frags.high_thresh; + table[2].extra1 = &net->nf_frag.frags.low_thresh; + table[2].extra2 = &init_net.nf_frag.frags.high_thresh; } hdr = register_net_sysctl(net, "net/netfilter", table); @@ -147,16 +156,13 @@ static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) static unsigned int nf_hash_frag(__be32 id, const struct in6_addr *saddr, const struct in6_addr *daddr) { - u32 c; - net_get_random_once(&nf_frags.rnd, sizeof(nf_frags.rnd)); - c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr), - (__force u32)id, nf_frags.rnd); - return c & (INETFRAGS_HASHSZ - 1); + return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr), + (__force u32)id, nf_frags.rnd); } -static unsigned int nf_hashfn(struct inet_frag_queue *q) +static unsigned int nf_hashfn(const struct inet_frag_queue *q) { const struct frag_queue *nq; @@ -196,7 +202,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id, arg.dst = dst; arg.ecn = ecn; - read_lock_bh(&nf_frags.lock); + local_bh_disable(); hash = nf_hash_frag(id, src, dst); q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash); @@ -217,7 +223,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, int offset, end; u8 ecn; - if (fq->q.last_in & INET_FRAG_COMPLETE) { + if (fq->q.flags & INET_FRAG_COMPLETE) { pr_debug("Already completed\n"); goto err; } @@ -248,11 +254,11 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, * or have different end, the segment is corrupted. */ if (end < fq->q.len || - ((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len)) { + ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len)) { pr_debug("already received last fragment\n"); goto err; } - fq->q.last_in |= INET_FRAG_LAST_IN; + fq->q.flags |= INET_FRAG_LAST_IN; fq->q.len = end; } else { /* Check if the fragment is rounded to 8 bytes. @@ -267,7 +273,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, } if (end > fq->q.len) { /* Some bits beyond end -> corruption. */ - if (fq->q.last_in & INET_FRAG_LAST_IN) { + if (fq->q.flags & INET_FRAG_LAST_IN) { pr_debug("last packet already reached.\n"); goto err; } @@ -349,10 +355,9 @@ found: */ if (offset == 0) { fq->nhoffset = nhoff; - fq->q.last_in |= INET_FRAG_FIRST_IN; + fq->q.flags |= INET_FRAG_FIRST_IN; } - inet_frag_lru_move(&fq->q); return 0; discard_fq: @@ -451,7 +456,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) } sub_frag_mem_limit(&fq->q, head->truesize); - head->local_df = 1; + head->ignore_df = 1; head->next = NULL; head->dev = dev; head->tstamp = fq->q.stamp; @@ -597,10 +602,6 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) hdr = ipv6_hdr(clone); fhdr = (struct frag_hdr *)skb_transport_header(clone); - local_bh_disable(); - inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false); - local_bh_enable(); - fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, ip6_frag_ecn(hdr)); if (fq == NULL) { @@ -617,7 +618,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) goto ret_orig; } - if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && + if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && fq->q.meat == fq->q.len) { ret_skb = nf_ct_frag6_reasm(fq, dev); if (ret_skb == NULL) @@ -677,13 +678,15 @@ int nf_ct_frag6_init(void) nf_frags.qsize = sizeof(struct frag_queue); nf_frags.match = ip6_frag_match; nf_frags.frag_expire = nf_ct_frag6_expire; - nf_frags.secret_interval = 10 * 60 * HZ; - inet_frags_init(&nf_frags); - + nf_frags.frags_cache_name = nf_frags_cache_name; + ret = inet_frags_init(&nf_frags); + if (ret) + goto out; ret = register_pernet_subsys(&nf_ct_net_ops); if (ret) inet_frags_fini(&nf_frags); +out: return ret; } diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c new file mode 100644 index 000000000000..7b17a0be93e7 --- /dev/null +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -0,0 +1,417 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/skbuff.h> +#include <linux/if_arp.h> +#include <linux/ip.h> +#include <net/ipv6.h> +#include <net/icmp.h> +#include <net/udp.h> +#include <net/tcp.h> +#include <net/route.h> + +#include <linux/netfilter.h> +#include <linux/netfilter_ipv6/ip6_tables.h> +#include <linux/netfilter/xt_LOG.h> +#include <net/netfilter/nf_log.h> + +static struct nf_loginfo default_loginfo = { + .type = NF_LOG_TYPE_LOG, + .u = { + .log = { + .level = 5, + .logflags = NF_LOG_MASK, + }, + }, +}; + +/* One level of recursion won't kill us */ +static void dump_ipv6_packet(struct nf_log_buf *m, + const struct nf_loginfo *info, + const struct sk_buff *skb, unsigned int ip6hoff, + int recurse) +{ + u_int8_t currenthdr; + int fragment; + struct ipv6hdr _ip6h; + const struct ipv6hdr *ih; + unsigned int ptr; + unsigned int hdrlen = 0; + unsigned int logflags; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + else + logflags = NF_LOG_MASK; + + ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h); + if (ih == NULL) { + nf_log_buf_add(m, "TRUNCATED"); + return; + } + + /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */ + nf_log_buf_add(m, "SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr); + + /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */ + nf_log_buf_add(m, "LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ", + ntohs(ih->payload_len) + sizeof(struct ipv6hdr), + (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20, + ih->hop_limit, + (ntohl(*(__be32 *)ih) & 0x000fffff)); + + fragment = 0; + ptr = ip6hoff + sizeof(struct ipv6hdr); + currenthdr = ih->nexthdr; + while (currenthdr != NEXTHDR_NONE && ip6t_ext_hdr(currenthdr)) { + struct ipv6_opt_hdr _hdr; + const struct ipv6_opt_hdr *hp; + + hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); + if (hp == NULL) { + nf_log_buf_add(m, "TRUNCATED"); + return; + } + + /* Max length: 48 "OPT (...) " */ + if (logflags & XT_LOG_IPOPT) + nf_log_buf_add(m, "OPT ( "); + + switch (currenthdr) { + case IPPROTO_FRAGMENT: { + struct frag_hdr _fhdr; + const struct frag_hdr *fh; + + nf_log_buf_add(m, "FRAG:"); + fh = skb_header_pointer(skb, ptr, sizeof(_fhdr), + &_fhdr); + if (fh == NULL) { + nf_log_buf_add(m, "TRUNCATED "); + return; + } + + /* Max length: 6 "65535 " */ + nf_log_buf_add(m, "%u ", ntohs(fh->frag_off) & 0xFFF8); + + /* Max length: 11 "INCOMPLETE " */ + if (fh->frag_off & htons(0x0001)) + nf_log_buf_add(m, "INCOMPLETE "); + + nf_log_buf_add(m, "ID:%08x ", + ntohl(fh->identification)); + + if (ntohs(fh->frag_off) & 0xFFF8) + fragment = 1; + + hdrlen = 8; + + break; + } + case IPPROTO_DSTOPTS: + case IPPROTO_ROUTING: + case IPPROTO_HOPOPTS: + if (fragment) { + if (logflags & XT_LOG_IPOPT) + nf_log_buf_add(m, ")"); + return; + } + hdrlen = ipv6_optlen(hp); + break; + /* Max Length */ + case IPPROTO_AH: + if (logflags & XT_LOG_IPOPT) { + struct ip_auth_hdr _ahdr; + const struct ip_auth_hdr *ah; + + /* Max length: 3 "AH " */ + nf_log_buf_add(m, "AH "); + + if (fragment) { + nf_log_buf_add(m, ")"); + return; + } + + ah = skb_header_pointer(skb, ptr, sizeof(_ahdr), + &_ahdr); + if (ah == NULL) { + /* + * Max length: 26 "INCOMPLETE [65535 + * bytes] )" + */ + nf_log_buf_add(m, "INCOMPLETE [%u bytes] )", + skb->len - ptr); + return; + } + + /* Length: 15 "SPI=0xF1234567 */ + nf_log_buf_add(m, "SPI=0x%x ", ntohl(ah->spi)); + + } + + hdrlen = (hp->hdrlen+2)<<2; + break; + case IPPROTO_ESP: + if (logflags & XT_LOG_IPOPT) { + struct ip_esp_hdr _esph; + const struct ip_esp_hdr *eh; + + /* Max length: 4 "ESP " */ + nf_log_buf_add(m, "ESP "); + + if (fragment) { + nf_log_buf_add(m, ")"); + return; + } + + /* + * Max length: 26 "INCOMPLETE [65535 bytes] )" + */ + eh = skb_header_pointer(skb, ptr, sizeof(_esph), + &_esph); + if (eh == NULL) { + nf_log_buf_add(m, "INCOMPLETE [%u bytes] )", + skb->len - ptr); + return; + } + + /* Length: 16 "SPI=0xF1234567 )" */ + nf_log_buf_add(m, "SPI=0x%x )", + ntohl(eh->spi)); + } + return; + default: + /* Max length: 20 "Unknown Ext Hdr 255" */ + nf_log_buf_add(m, "Unknown Ext Hdr %u", currenthdr); + return; + } + if (logflags & XT_LOG_IPOPT) + nf_log_buf_add(m, ") "); + + currenthdr = hp->nexthdr; + ptr += hdrlen; + } + + switch (currenthdr) { + case IPPROTO_TCP: + if (nf_log_dump_tcp_header(m, skb, currenthdr, fragment, + ptr, logflags)) + return; + break; + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + if (nf_log_dump_udp_header(m, skb, currenthdr, fragment, ptr)) + return; + break; + case IPPROTO_ICMPV6: { + struct icmp6hdr _icmp6h; + const struct icmp6hdr *ic; + + /* Max length: 13 "PROTO=ICMPv6 " */ + nf_log_buf_add(m, "PROTO=ICMPv6 "); + + if (fragment) + break; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h); + if (ic == NULL) { + nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", + skb->len - ptr); + return; + } + + /* Max length: 18 "TYPE=255 CODE=255 " */ + nf_log_buf_add(m, "TYPE=%u CODE=%u ", + ic->icmp6_type, ic->icmp6_code); + + switch (ic->icmp6_type) { + case ICMPV6_ECHO_REQUEST: + case ICMPV6_ECHO_REPLY: + /* Max length: 19 "ID=65535 SEQ=65535 " */ + nf_log_buf_add(m, "ID=%u SEQ=%u ", + ntohs(ic->icmp6_identifier), + ntohs(ic->icmp6_sequence)); + break; + case ICMPV6_MGM_QUERY: + case ICMPV6_MGM_REPORT: + case ICMPV6_MGM_REDUCTION: + break; + + case ICMPV6_PARAMPROB: + /* Max length: 17 "POINTER=ffffffff " */ + nf_log_buf_add(m, "POINTER=%08x ", + ntohl(ic->icmp6_pointer)); + /* Fall through */ + case ICMPV6_DEST_UNREACH: + case ICMPV6_PKT_TOOBIG: + case ICMPV6_TIME_EXCEED: + /* Max length: 3+maxlen */ + if (recurse) { + nf_log_buf_add(m, "["); + dump_ipv6_packet(m, info, skb, + ptr + sizeof(_icmp6h), 0); + nf_log_buf_add(m, "] "); + } + + /* Max length: 10 "MTU=65535 " */ + if (ic->icmp6_type == ICMPV6_PKT_TOOBIG) { + nf_log_buf_add(m, "MTU=%u ", + ntohl(ic->icmp6_mtu)); + } + } + break; + } + /* Max length: 10 "PROTO=255 " */ + default: + nf_log_buf_add(m, "PROTO=%u ", currenthdr); + } + + /* Max length: 15 "UID=4294967295 " */ + if ((logflags & XT_LOG_UID) && recurse) + nf_log_dump_sk_uid_gid(m, skb->sk); + + /* Max length: 16 "MARK=0xFFFFFFFF " */ + if (recurse && skb->mark) + nf_log_buf_add(m, "MARK=0x%x ", skb->mark); +} + +static void dump_ipv6_mac_header(struct nf_log_buf *m, + const struct nf_loginfo *info, + const struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + unsigned int logflags = 0; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + + if (!(logflags & XT_LOG_MACDECODE)) + goto fallback; + + switch (dev->type) { + case ARPHRD_ETHER: + nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ", + eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, + ntohs(eth_hdr(skb)->h_proto)); + return; + default: + break; + } + +fallback: + nf_log_buf_add(m, "MAC="); + if (dev->hard_header_len && + skb->mac_header != skb->network_header) { + const unsigned char *p = skb_mac_header(skb); + unsigned int len = dev->hard_header_len; + unsigned int i; + + if (dev->type == ARPHRD_SIT) { + p -= ETH_HLEN; + + if (p < skb->head) + p = NULL; + } + + if (p != NULL) { + nf_log_buf_add(m, "%02x", *p++); + for (i = 1; i < len; i++) + nf_log_buf_add(m, ":%02x", *p++); + } + nf_log_buf_add(m, " "); + + if (dev->type == ARPHRD_SIT) { + const struct iphdr *iph = + (struct iphdr *)skb_mac_header(skb); + nf_log_buf_add(m, "TUNNEL=%pI4->%pI4 ", &iph->saddr, + &iph->daddr); + } + } else { + nf_log_buf_add(m, " "); + } +} + +static void nf_log_ip6_packet(struct net *net, u_int8_t pf, + unsigned int hooknum, const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) +{ + struct nf_log_buf *m; + + /* FIXME: Disabled from containers until syslog ns is supported */ + if (!net_eq(net, &init_net)) + return; + + m = nf_log_buf_open(); + + if (!loginfo) + loginfo = &default_loginfo; + + nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, + loginfo, prefix); + + if (in != NULL) + dump_ipv6_mac_header(m, loginfo, skb); + + dump_ipv6_packet(m, loginfo, skb, skb_network_offset(skb), 1); + + nf_log_buf_close(m); +} + +static struct nf_logger nf_ip6_logger __read_mostly = { + .name = "nf_log_ipv6", + .type = NF_LOG_TYPE_LOG, + .logfn = nf_log_ip6_packet, + .me = THIS_MODULE, +}; + +static int __net_init nf_log_ipv6_net_init(struct net *net) +{ + nf_log_set(net, NFPROTO_IPV6, &nf_ip6_logger); + return 0; +} + +static void __net_exit nf_log_ipv6_net_exit(struct net *net) +{ + nf_log_unset(net, &nf_ip6_logger); +} + +static struct pernet_operations nf_log_ipv6_net_ops = { + .init = nf_log_ipv6_net_init, + .exit = nf_log_ipv6_net_exit, +}; + +static int __init nf_log_ipv6_init(void) +{ + int ret; + + ret = register_pernet_subsys(&nf_log_ipv6_net_ops); + if (ret < 0) + return ret; + + nf_log_register(NFPROTO_IPV6, &nf_ip6_logger); + return 0; +} + +static void __exit nf_log_ipv6_exit(void) +{ + unregister_pernet_subsys(&nf_log_ipv6_net_ops); + nf_log_unregister(&nf_ip6_logger); +} + +module_init(nf_log_ipv6_init); +module_exit(nf_log_ipv6_exit); + +MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); +MODULE_DESCRIPTION("Netfilter IPv4 packet logging"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NF_LOGGER(AF_INET6, 0); diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index abfe75a2e316..fc8e49b2ff3e 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -158,6 +158,7 @@ static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb, htons(oldlen), htons(datalen), 1); } +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[], struct nf_nat_range *range) { @@ -175,6 +176,7 @@ static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[], return 0; } +#endif static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = { .l3proto = NFPROTO_IPV6, @@ -183,7 +185,9 @@ static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = { .manip_pkt = nf_nat_ipv6_manip_pkt, .csum_update = nf_nat_ipv6_csum_update, .csum_recalc = nf_nat_ipv6_csum_recalc, +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .nlattr_to_range = nf_nat_ipv6_nlattr_to_range, +#endif #ifdef CONFIG_XFRM .decode_session = nf_nat_ipv6_decode_session, #endif diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c index 9c3297a768fd..d189fcb437fe 100644 --- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c @@ -47,15 +47,9 @@ static unsigned int nf_nat_ipv6_fn(const struct nf_hook_ops *ops, if (ct == NULL || nf_ct_is_untracked(ct)) return NF_ACCEPT; - nat = nfct_nat(ct); - if (nat == NULL) { - /* Conntrack module was loaded late, can't add extension. */ - if (nf_ct_is_confirmed(ct)) - return NF_ACCEPT; - nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); - if (nat == NULL) - return NF_ACCEPT; - } + nat = nf_ct_nat_ext_add(ct); + if (nat == NULL) + return NF_ACCEPT; switch (ctinfo) { case IP_CT_RELATED: diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 6313abd53c9d..5ec867e4a8b7 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -8,37 +8,6 @@ #include <net/addrconf.h> #include <net/secure_seq.h> -void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) -{ - static atomic_t ipv6_fragmentation_id; - struct in6_addr addr; - int old, new; - -#if IS_ENABLED(CONFIG_IPV6) - struct inet_peer *peer; - struct net *net; - - net = dev_net(rt->dst.dev); - peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); - if (peer) { - fhdr->identification = htonl(inet_getid(peer, 0)); - inet_putpeer(peer); - return; - } -#endif - do { - old = atomic_read(&ipv6_fragmentation_id); - new = old + 1; - if (!new) - new = 1; - } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old); - - addr = rt->rt6i_dst.addr; - addr.s6_addr32[0] ^= (__force __be32)new; - fhdr->identification = htonl(secure_ipv6_id(addr.s6_addr32)); -} -EXPORT_SYMBOL(ipv6_select_ident); - int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) { u16 offset = sizeof(struct ipv6hdr); @@ -109,6 +78,7 @@ int __ip6_local_out(struct sk_buff *skb) if (len > IPV6_MAXPLEN) len = 0; ipv6_hdr(skb)->payload_len = htons(len); + IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev, dst_output); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index bda74291c3e0..5b7a1ed2aba9 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -51,7 +51,6 @@ static struct inet_protosw pingv6_protosw = { .protocol = IPPROTO_ICMPV6, .prot = &pingv6_prot, .ops = &inet6_dgram_ops, - .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_REUSE, }; @@ -168,12 +167,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, pfh.wcheck = 0; pfh.family = AF_INET6; - if (ipv6_addr_is_multicast(&fl6.daddr)) - hlimit = np->mcast_hops; - else - hlimit = np->hop_limit; - if (hlimit < 0) - hlimit = ip6_dst_hoplimit(dst); + hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); lock_sock(sk); err = ip6_append_data(sk, ping_getfrag, &pfh, len, diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 091d066a57b3..2d6f860e5c1e 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -33,6 +33,7 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; + unsigned int frag_mem = ip6_frag_mem(net); seq_printf(seq, "TCP6: inuse %d\n", sock_prot_inuse_get(net, &tcpv6_prot)); @@ -42,8 +43,7 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v) sock_prot_inuse_get(net, &udplitev6_prot)); seq_printf(seq, "RAW6: inuse %d\n", sock_prot_inuse_get(net, &rawv6_prot)); - seq_printf(seq, "FRAG6: inuse %d memory %d\n", - ip6_frag_nqueues(net), ip6_frag_mem(net)); + seq_printf(seq, "FRAG6: inuse %u memory %u\n", !!frag_mem, frag_mem); return 0; } @@ -186,7 +186,7 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, atomic_long_t *smib) /* can be called either with percpu mib (pcpumib != NULL), * or shared one (smib != NULL) */ -static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **pcpumib, +static void snmp6_seq_show_item(struct seq_file *seq, void __percpu *pcpumib, atomic_long_t *smib, const struct snmp_mib *itemlist) { @@ -201,7 +201,7 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **pcpumib, } } -static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu **mib, +static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib, const struct snmp_mib *itemlist, size_t syncpoff) { int i; @@ -215,14 +215,14 @@ static int snmp6_seq_show(struct seq_file *seq, void *v) { struct net *net = (struct net *)seq->private; - snmp6_seq_show_item64(seq, (void __percpu **)net->mib.ipv6_statistics, + snmp6_seq_show_item64(seq, net->mib.ipv6_statistics, snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp)); - snmp6_seq_show_item(seq, (void __percpu **)net->mib.icmpv6_statistics, + snmp6_seq_show_item(seq, net->mib.icmpv6_statistics, NULL, snmp6_icmp6_list); snmp6_seq_show_icmpv6msg(seq, net->mib.icmpv6msg_statistics->mibs); - snmp6_seq_show_item(seq, (void __percpu **)net->mib.udp_stats_in6, + snmp6_seq_show_item(seq, net->mib.udp_stats_in6, NULL, snmp6_udp6_list); - snmp6_seq_show_item(seq, (void __percpu **)net->mib.udplite_stats_in6, + snmp6_seq_show_item(seq, net->mib.udplite_stats_in6, NULL, snmp6_udplite6_list); return 0; } @@ -245,7 +245,7 @@ static int snmp6_dev_seq_show(struct seq_file *seq, void *v) struct inet6_dev *idev = (struct inet6_dev *)seq->private; seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex); - snmp6_seq_show_item64(seq, (void __percpu **)idev->stats.ipv6, + snmp6_seq_show_item64(seq, idev->stats.ipv6, snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp)); snmp6_seq_show_item(seq, NULL, idev->stats.icmpv6dev->mibs, snmp6_icmp6_list); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 1f29996e368a..39d44226e402 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -176,7 +176,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) goto out; net = dev_net(skb->dev); - sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, IP6CB(skb)->iif); + sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, inet6_iif(skb)); while (sk) { int filtered; @@ -220,7 +220,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) } } sk = __raw_v6_lookup(net, sk_next(sk), nexthdr, daddr, saddr, - IP6CB(skb)->iif); + inet6_iif(skb)); } out: read_unlock(&raw_v6_hashinfo.lock); @@ -375,7 +375,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr, net = dev_net(skb->dev); while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr, - IP6CB(skb)->iif))) { + inet6_iif(skb)))) { rawv6_err(sk, skb, NULL, type, code, inner_offset, info); sk = sk_next(sk); @@ -506,7 +506,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, sin6->sin6_addr = ipv6_hdr(skb)->saddr; sin6->sin6_flowinfo = 0; sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, - IP6CB(skb)->iif); + inet6_iif(skb)); *addr_len = sizeof(*sin6); } @@ -588,8 +588,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, } offset += skb_transport_offset(skb); - if (skb_copy_bits(skb, offset, &csum, 2)) - BUG(); + BUG_ON(skb_copy_bits(skb, offset, &csum, 2)); /* in case cksum was not initialized */ if (unlikely(csum)) @@ -601,8 +600,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, if (csum == 0 && fl6->flowi6_proto == IPPROTO_UDP) csum = CSUM_MANGLED_0; - if (skb_store_bits(skb, offset, &csum, 2)) - BUG(); + BUG_ON(skb_store_bits(skb, offset, &csum, 2)); send: err = ip6_push_pending_frames(sk); @@ -873,14 +871,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, err = PTR_ERR(dst); goto out; } - if (hlimit < 0) { - if (ipv6_addr_is_multicast(&fl6.daddr)) - hlimit = np->mcast_hops; - else - hlimit = np->hop_limit; - if (hlimit < 0) - hlimit = ip6_dst_hoplimit(dst); - } + if (hlimit < 0) + hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); if (tclass < 0) tclass = np->tclass; @@ -1328,7 +1320,6 @@ static struct inet_protosw rawv6_protosw = { .protocol = IPPROTO_IP, /* wild card */ .prot = &rawv6_prot, .ops = &inet6_sockraw_ops, - .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_REUSE, }; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index cc85a9ba5010..c6557d9f7808 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -60,6 +60,8 @@ #include <net/inet_frag.h> #include <net/inet_ecn.h> +static const char ip6_frag_cache_name[] = "ip6-frags"; + struct ip6frag_skb_cb { struct inet6_skb_parm h; @@ -85,27 +87,23 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr, const struct in6_addr *daddr) { - u32 c; - net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd)); - c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr), - (__force u32)id, ip6_frags.rnd); - - return c & (INETFRAGS_HASHSZ - 1); + return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr), + (__force u32)id, ip6_frags.rnd); } -static unsigned int ip6_hashfn(struct inet_frag_queue *q) +static unsigned int ip6_hashfn(const struct inet_frag_queue *q) { - struct frag_queue *fq; + const struct frag_queue *fq; fq = container_of(q, struct frag_queue, q); return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr); } -bool ip6_frag_match(struct inet_frag_queue *q, void *a) +bool ip6_frag_match(const struct inet_frag_queue *q, const void *a) { - struct frag_queue *fq; - struct ip6_create_arg *arg = a; + const struct frag_queue *fq; + const struct ip6_create_arg *arg = a; fq = container_of(q, struct frag_queue, q); return fq->id == arg->id && @@ -115,10 +113,10 @@ bool ip6_frag_match(struct inet_frag_queue *q, void *a) } EXPORT_SYMBOL(ip6_frag_match); -void ip6_frag_init(struct inet_frag_queue *q, void *a) +void ip6_frag_init(struct inet_frag_queue *q, const void *a) { struct frag_queue *fq = container_of(q, struct frag_queue, q); - struct ip6_create_arg *arg = a; + const struct ip6_create_arg *arg = a; fq->id = arg->id; fq->user = arg->user; @@ -135,7 +133,7 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, spin_lock(&fq->q.lock); - if (fq->q.last_in & INET_FRAG_COMPLETE) + if (fq->q.flags & INET_FRAG_COMPLETE) goto out; inet_frag_kill(&fq->q, frags); @@ -145,17 +143,20 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, if (!dev) goto out_rcu_unlock; - IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); + if (fq->q.flags & INET_FRAG_EVICTED) + goto out_rcu_unlock; + + IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); + /* Don't send error if the first segment did not arrive. */ - if (!(fq->q.last_in & INET_FRAG_FIRST_IN) || !fq->q.fragments) + if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !fq->q.fragments) goto out_rcu_unlock; - /* - But use as source device on which LAST ARRIVED - segment was received. And do not use fq->dev - pointer directly, device might already disappeared. + /* But use as source device on which LAST ARRIVED + * segment was received. And do not use fq->dev + * pointer directly, device might already disappeared. */ fq->q.fragments->dev = dev; icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0); @@ -192,7 +193,6 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src, arg.dst = dst; arg.ecn = ecn; - read_lock(&ip6_frags.lock); hash = inet6_hash_frag(id, src, dst); q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash); @@ -212,7 +212,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, struct net *net = dev_net(skb_dst(skb)->dev); u8 ecn; - if (fq->q.last_in & INET_FRAG_COMPLETE) + if (fq->q.flags & INET_FRAG_COMPLETE) goto err; offset = ntohs(fhdr->frag_off) & ~0x7; @@ -243,9 +243,9 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, * or have different end, the segment is corrupted. */ if (end < fq->q.len || - ((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len)) + ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len)) goto err; - fq->q.last_in |= INET_FRAG_LAST_IN; + fq->q.flags |= INET_FRAG_LAST_IN; fq->q.len = end; } else { /* Check if the fragment is rounded to 8 bytes. @@ -263,7 +263,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, } if (end > fq->q.len) { /* Some bits beyond end -> corruption. */ - if (fq->q.last_in & INET_FRAG_LAST_IN) + if (fq->q.flags & INET_FRAG_LAST_IN) goto err; fq->q.len = end; } @@ -338,10 +338,10 @@ found: */ if (offset == 0) { fq->nhoffset = nhoff; - fq->q.last_in |= INET_FRAG_FIRST_IN; + fq->q.flags |= INET_FRAG_FIRST_IN; } - if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && + if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && fq->q.meat == fq->q.len) { int res; unsigned long orefdst = skb->_skb_refdst; @@ -353,14 +353,13 @@ found: } skb_dst_drop(skb); - inet_frag_lru_move(&fq->q); return -1; discard_fq: inet_frag_kill(&fq->q, &ip6_frags); err: - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_REASMFAILS); + IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_REASMFAILS); kfree_skb(skb); return -1; } @@ -523,7 +522,6 @@ static int ipv6_frag_rcv(struct sk_buff *skb) struct frag_queue *fq; const struct ipv6hdr *hdr = ipv6_hdr(skb); struct net *net = dev_net(skb_dst(skb)->dev); - int evicted; if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED) goto fail_hdr; @@ -552,11 +550,6 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return 1; } - evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags, false); - if (evicted) - IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_REASMFAILS, evicted); - fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr, ip6_frag_ecn(hdr)); if (fq != NULL) { @@ -576,7 +569,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return -1; fail_hdr: - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb)); return -1; } @@ -588,20 +582,25 @@ static const struct inet6_protocol frag_protocol = }; #ifdef CONFIG_SYSCTL +static int zero; + static struct ctl_table ip6_frags_ns_ctl_table[] = { { .procname = "ip6frag_high_thresh", .data = &init_net.ipv6.frags.high_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &init_net.ipv6.frags.low_thresh }, { .procname = "ip6frag_low_thresh", .data = &init_net.ipv6.frags.low_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &init_net.ipv6.frags.high_thresh }, { .procname = "ip6frag_time", @@ -613,10 +612,12 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = { { } }; +/* secret interval has been deprecated */ +static int ip6_frags_secret_interval_unused; static struct ctl_table ip6_frags_ctl_table[] = { { .procname = "ip6frag_secret_interval", - .data = &ip6_frags.secret_interval, + .data = &ip6_frags_secret_interval_unused, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -636,7 +637,10 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net) goto err_alloc; table[0].data = &net->ipv6.frags.high_thresh; + table[0].extra1 = &net->ipv6.frags.low_thresh; + table[0].extra2 = &init_net.ipv6.frags.high_thresh; table[1].data = &net->ipv6.frags.low_thresh; + table[1].extra2 = &net->ipv6.frags.high_thresh; table[2].data = &net->ipv6.frags.timeout; /* Don't export sysctls to unprivileged users */ @@ -746,8 +750,10 @@ int __init ipv6_frag_init(void) ip6_frags.qsize = sizeof(struct frag_queue); ip6_frags.match = ip6_frag_match; ip6_frags.frag_expire = ip6_frag_expire; - ip6_frags.secret_interval = 10 * 60 * HZ; - inet_frags_init(&ip6_frags); + ip6_frags.frags_cache_name = ip6_frag_cache_name; + ret = inet_frags_init(&ip6_frags); + if (ret) + goto err_pernet; out: return ret; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4011617cca68..f23fbd28a501 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1176,7 +1176,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = oif; - fl6.flowi6_mark = mark; + fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark); fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); @@ -1273,6 +1273,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) struct flowi6 fl6; memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_iif = LOOPBACK_IFINDEX; fl6.flowi6_oif = oif; fl6.flowi6_mark = mark; fl6.daddr = iph->daddr; @@ -1294,6 +1295,7 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, struct flowi6 fl6; memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_iif = LOOPBACK_IFINDEX; fl6.flowi6_oif = oif; fl6.flowi6_mark = mark; fl6.daddr = msg->dest; @@ -1453,7 +1455,7 @@ static int ip6_dst_gc(struct dst_ops *ops) goto out; net->ipv6.ip6_rt_gc_expire++; - fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size); + fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true); entries = dst_entries_get_slow(ops); if (entries < ops->gc_thresh) net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; @@ -2232,6 +2234,27 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) fib6_clean_all(net, fib6_remove_prefsrc, &adni); } +#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY) +#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE) + +/* Remove routers and update dst entries when gateway turn into host. */ +static int fib6_clean_tohost(struct rt6_info *rt, void *arg) +{ + struct in6_addr *gateway = (struct in6_addr *)arg; + + if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) || + ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) && + ipv6_addr_equal(gateway, &rt->rt6i_gateway)) { + return -1; + } + return 0; +} + +void rt6_clean_tohost(struct net *net, struct in6_addr *gateway) +{ + fib6_clean_all(net, fib6_clean_tohost, gateway); +} + struct arg_dev_net { struct net_device *dev; struct net *net; @@ -2707,6 +2730,9 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh) if (tb[RTA_OIF]) oif = nla_get_u32(tb[RTA_OIF]); + if (tb[RTA_MARK]) + fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]); + if (iif) { struct net_device *dev; int flags = 0; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index e5a453ca302e..2e9ba035fb5f 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -250,7 +250,8 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, else strcpy(name, "sit%d"); - dev = alloc_netdev(sizeof(*t), name, ipip6_tunnel_setup); + dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, + ipip6_tunnel_setup); if (dev == NULL) return NULL; @@ -560,12 +561,12 @@ static int ipip6_err(struct sk_buff *skb, u32 info) if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->dev->ifindex, 0, IPPROTO_IPV6, 0); + t->parms.link, 0, IPPROTO_IPV6, 0); err = 0; goto out; } if (type == ICMP_REDIRECT) { - ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0, + ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, IPPROTO_IPV6, 0); err = 0; goto out; @@ -1729,6 +1730,7 @@ static int __net_init sit_init_net(struct net *net) sitn->tunnels[3] = sitn->tunnels_r_l; sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0", + NET_NAME_UNKNOWN, ipip6_tunnel_setup); if (!sitn->fb_tunnel_dev) { err = -ENOMEM; @@ -1828,4 +1830,5 @@ xfrm_tunnel_failed: module_init(sit_init); module_exit(sit_cleanup); MODULE_LICENSE("GPL"); +MODULE_ALIAS_RTNL_LINK("sit"); MODULE_ALIAS_NETDEV("sit0"); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index bb53a5e73c1a..83cea1d39466 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -187,7 +187,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) goto out; ret = NULL; - req = inet6_reqsk_alloc(&tcp6_request_sock_ops); + req = inet_reqsk_alloc(&tcp6_request_sock_ops); if (!req) goto out; @@ -216,6 +216,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) ireq->ir_iif = inet6_iif(skb); + ireq->ir_mark = inet_request_mark(sk, skb); + req->expires = 0UL; req->num_retrans = 0; ireq->ecn_ok = ecn_ok; @@ -242,7 +244,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) final_p = fl6_update_dst(&fl6, np->opt, &final); fl6.saddr = ireq->ir_v6_loc_addr; fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_mark = ireq->ir_mark; fl6.fl6_dport = ireq->ir_rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; security_req_classify_flow(req, flowi6_to_flowi(&fl6)); diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 7f405a168822..0c56c93619e0 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -38,6 +38,20 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "auto_flowlabels", + .data = &init_net.ipv6.sysctl.auto_flowlabels, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "fwmark_reflect", + .data = &init_net.ipv6.sysctl.fwmark_reflect, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; @@ -67,6 +81,8 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) ipv6_table[0].data = &net->ipv6.sysctl.bindv6only; ipv6_table[1].data = &net->ipv6.sysctl.anycast_src_echo_reply; ipv6_table[2].data = &net->ipv6.sysctl.flowlabel_consistency; + ipv6_table[3].data = &net->ipv6.sysctl.auto_flowlabels; + ipv6_table[4].data = &net->ipv6.sysctl.fwmark_reflect; ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e289830ed6e3..22055b098428 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -198,6 +198,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk->sk_v6_daddr = usin->sin6_addr; np->flow_label = fl6.flowlabel; + ip6_set_txhash(sk); + /* * TCP over IPv4 */ @@ -340,7 +342,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct sock *sk; int err; struct tcp_sock *tp; - __u32 seq; + struct request_sock *fastopen; + __u32 seq, snd_una; struct net *net = dev_net(skb->dev); sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr, @@ -371,8 +374,11 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, tp = tcp_sk(sk); seq = ntohl(th->seq); + /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ + fastopen = tp->fastopen_rsk; + snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; if (sk->sk_state != TCP_LISTEN && - !between(seq, tp->snd_una, tp->snd_nxt)) { + !between(seq, snd_una, tp->snd_nxt)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -436,8 +442,13 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; case TCP_SYN_SENT: - case TCP_SYN_RECV: /* Cannot happen. - It can, it SYNs are crossed. --ANK */ + case TCP_SYN_RECV: + /* Only in fast or simultaneous open. If a fast open socket is + * is already accepted it is treated as a connected one below. + */ + if (fastopen && fastopen->sk == NULL) + break; + if (!sock_owned_by_user(sk)) { sk->sk_err = err; sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ @@ -461,12 +472,14 @@ out: static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, - struct flowi6 *fl6, + struct flowi *fl, struct request_sock *req, - u16 queue_mapping) + u16 queue_mapping, + struct tcp_fastopen_cookie *foc) { struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); + struct flowi6 *fl6 = &fl->u.ip6; struct sk_buff *skb; int err = -ENOMEM; @@ -474,7 +487,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL) goto done; - skb = tcp_make_synack(sk, dst, req, NULL); + skb = tcp_make_synack(sk, dst, req, foc); if (skb) { __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, @@ -493,18 +506,6 @@ done: return err; } -static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req) -{ - struct flowi6 fl6; - int res; - - res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0); - if (!res) { - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); - } - return res; -} static void tcp_v6_reqsk_destructor(struct request_sock *req) { @@ -708,22 +709,66 @@ static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) } #endif +static void tcp_v6_init_req(struct request_sock *req, struct sock *sk, + struct sk_buff *skb) +{ + struct inet_request_sock *ireq = inet_rsk(req); + struct ipv6_pinfo *np = inet6_sk(sk); + + ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; + ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; + + ireq->ir_iif = sk->sk_bound_dev_if; + + /* So that link locals have meaning */ + if (!sk->sk_bound_dev_if && + ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) + ireq->ir_iif = inet6_iif(skb); + + if (!TCP_SKB_CB(skb)->when && + (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || + np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || + np->rxopt.bits.rxohlim || np->repflow)) { + atomic_inc(&skb->users); + ireq->pktopts = skb; + } +} + +static struct dst_entry *tcp_v6_route_req(struct sock *sk, struct flowi *fl, + const struct request_sock *req, + bool *strict) +{ + if (strict) + *strict = true; + return inet6_csk_route_req(sk, &fl->u.ip6, req); +} + struct request_sock_ops tcp6_request_sock_ops __read_mostly = { .family = AF_INET6, .obj_size = sizeof(struct tcp6_request_sock), - .rtx_syn_ack = tcp_v6_rtx_synack, + .rtx_syn_ack = tcp_rtx_synack, .send_ack = tcp_v6_reqsk_send_ack, .destructor = tcp_v6_reqsk_destructor, .send_reset = tcp_v6_send_reset, .syn_ack_timeout = tcp_syn_ack_timeout, }; -#ifdef CONFIG_TCP_MD5SIG static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { + .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - + sizeof(struct ipv6hdr), +#ifdef CONFIG_TCP_MD5SIG .md5_lookup = tcp_v6_reqsk_md5_lookup, .calc_md5_hash = tcp_v6_md5_hash_skb, -}; #endif + .init_req = tcp_v6_init_req, +#ifdef CONFIG_SYN_COOKIES + .cookie_init_seq = cookie_v6_init_sequence, +#endif + .route_req = tcp_v6_route_req, + .init_seq = tcp_v6_init_sequence, + .send_synack = tcp_v6_send_synack, + .queue_hash_add = inet6_csk_reqsk_queue_hash_add, +}; static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, @@ -802,6 +847,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, fl6.flowi6_oif = inet6_iif(skb); else fl6.flowi6_oif = oif; + fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); @@ -917,7 +963,12 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, struct request_sock *req) { - tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, + /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV + * sk->sk_state == TCP_SYN_RECV -> for Fast Open. + */ + tcp_v6_send_ack(skb, (sk->sk_state == TCP_LISTEN) ? + tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, + tcp_rsk(req)->rcv_nxt, req->rcv_wnd, tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if, tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0, 0); @@ -957,142 +1008,17 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) return sk; } -/* FIXME: this is substantially similar to the ipv4 code. - * Can some kind of merge be done? -- erics - */ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { - struct tcp_options_received tmp_opt; - struct request_sock *req; - struct inet_request_sock *ireq; - struct ipv6_pinfo *np = inet6_sk(sk); - struct tcp_sock *tp = tcp_sk(sk); - __u32 isn = TCP_SKB_CB(skb)->when; - struct dst_entry *dst = NULL; - struct flowi6 fl6; - bool want_cookie = false; - if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_conn_request(sk, skb); if (!ipv6_unicast_destination(skb)) goto drop; - if ((sysctl_tcp_syncookies == 2 || - inet_csk_reqsk_queue_is_full(sk)) && !isn) { - want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6"); - if (!want_cookie) - goto drop; - } + return tcp_conn_request(&tcp6_request_sock_ops, + &tcp_request_sock_ipv6_ops, sk, skb); - if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); - goto drop; - } - - req = inet6_reqsk_alloc(&tcp6_request_sock_ops); - if (req == NULL) - goto drop; - -#ifdef CONFIG_TCP_MD5SIG - tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops; -#endif - - tcp_clear_options(&tmp_opt); - tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); - tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, 0, NULL); - - if (want_cookie && !tmp_opt.saw_tstamp) - tcp_clear_options(&tmp_opt); - - tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; - tcp_openreq_init(req, &tmp_opt, skb); - - ireq = inet_rsk(req); - ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; - ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; - if (!want_cookie || tmp_opt.tstamp_ok) - TCP_ECN_create_request(req, skb, sock_net(sk)); - - ireq->ir_iif = sk->sk_bound_dev_if; - - /* So that link locals have meaning */ - if (!sk->sk_bound_dev_if && - ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) - ireq->ir_iif = inet6_iif(skb); - - if (!isn) { - if (ipv6_opt_accepted(sk, skb) || - np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || - np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim || - np->repflow) { - atomic_inc(&skb->users); - ireq->pktopts = skb; - } - - if (want_cookie) { - isn = cookie_v6_init_sequence(sk, skb, &req->mss); - req->cookie_ts = tmp_opt.tstamp_ok; - goto have_isn; - } - - /* VJ's idea. We save last timestamp seen - * from the destination in peer table, when entering - * state TIME-WAIT, and check against it before - * accepting new connection request. - * - * If "isn" is not zero, this request hit alive - * timewait bucket, so that all the necessary checks - * are made in the function processing timewait state. - */ - if (tmp_opt.saw_tstamp && - tcp_death_row.sysctl_tw_recycle && - (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL) { - if (!tcp_peer_is_proven(req, dst, true)) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); - goto drop_and_release; - } - } - /* Kill the following clause, if you dislike this way. */ - else if (!sysctl_tcp_syncookies && - (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < - (sysctl_max_syn_backlog >> 2)) && - !tcp_peer_is_proven(req, dst, false)) { - /* Without syncookies last quarter of - * backlog is filled with destinations, - * proven to be alive. - * It means that we continue to communicate - * to destinations, already remembered - * to the moment of synflood. - */ - LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n", - &ireq->ir_v6_rmt_addr, ntohs(tcp_hdr(skb)->source)); - goto drop_and_release; - } - - isn = tcp_v6_init_sequence(skb); - } -have_isn: - tcp_rsk(req)->snt_isn = isn; - - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_release; - - if (tcp_v6_send_synack(sk, dst, &fl6, req, - skb_get_queue_mapping(skb)) || - want_cookie) - goto drop_and_free; - - tcp_rsk(req)->snt_synack = tcp_time_stamp; - tcp_rsk(req)->listener = NULL; - inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); - return 0; - -drop_and_release: - dst_release(dst); -drop_and_free: - reqsk_free(req); drop: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); return 0; /* don't send reset */ @@ -1208,6 +1134,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; newsk->sk_bound_dev_if = ireq->ir_iif; + ip6_set_txhash(newsk); + /* Now IPv6 options... First: no IPv4 options. @@ -1294,25 +1222,6 @@ out: return NULL; } -static __sum16 tcp_v6_checksum_init(struct sk_buff *skb) -{ - if (skb->ip_summed == CHECKSUM_COMPLETE) { - if (!tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, skb->csum)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - return 0; - } - } - - skb->csum = ~csum_unfold(tcp_v6_check(skb->len, - &ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, 0)); - - if (skb->len <= 76) - return __skb_checksum_complete(skb); - return 0; -} - /* The socket must have it's spinlock held when we get * here. * @@ -1486,7 +1395,7 @@ static int tcp_v6_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, th->doff*4)) goto discard_it; - if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb)) + if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) goto csum_error; th = tcp_hdr(skb); @@ -1779,6 +1688,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) const struct inet_sock *inet = inet_sk(sp); const struct tcp_sock *tp = tcp_sk(sp); const struct inet_connection_sock *icsk = inet_csk(sp); + struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq; dest = &sp->sk_v6_daddr; src = &sp->sk_v6_rcv_saddr; @@ -1821,7 +1731,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, tp->snd_cwnd, - tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh + sp->sk_state == TCP_LISTEN ? + (fastopenq ? fastopenq->max_qlen : 0) : + (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) ); } @@ -1981,7 +1893,6 @@ static struct inet_protosw tcpv6_protosw = { .protocol = IPPROTO_TCP, .prot = &tcpv6_prot, .ops = &inet6_stream_ops, - .no_check = 0, .flags = INET_PROTOSW_PERMANENT | INET_PROTOSW_ICSK, }; diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index 0d78132ff18a..01b0ff9a0c2c 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -42,7 +42,7 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head, if (NAPI_GRO_CB(skb)->flush) goto skip_csum; - wsum = skb->csum; + wsum = NAPI_GRO_CB(skb)->csum; switch (skb->ip_summed) { case CHECKSUM_NONE: @@ -73,7 +73,7 @@ static int tcp6_gro_complete(struct sk_buff *skb, int thoff) th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr, &iph->daddr, 0); - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; + skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6; return tcp_gro_complete(skb); } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1e586d92260e..4836af8f582d 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -79,7 +79,6 @@ static unsigned int udp6_ehashfn(struct net *net, int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) { const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); - int sk_ipv6only = ipv6_only_sock(sk); int sk2_ipv6only = inet_v6_ipv6only(sk2); int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr); int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; @@ -95,7 +94,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) return 1; if (addr_type == IPV6_ADDR_ANY && - !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED)) + !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED)) return 1; if (sk2_rcv_saddr6 && @@ -473,7 +472,7 @@ try_again: sin6->sin6_addr = ipv6_hdr(skb)->saddr; sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, - IP6CB(skb)->iif); + inet6_iif(skb)); } *addr_len = sizeof(*sin6); } @@ -534,11 +533,15 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct udphdr *uh = (struct udphdr*)(skb->data+offset); struct sock *sk; int err; + struct net *net = dev_net(skb->dev); - sk = __udp6_lib_lookup(dev_net(skb->dev), daddr, uh->dest, + sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, inet6_iif(skb), udptable); - if (sk == NULL) + if (sk == NULL) { + ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), + ICMP6_MIB_INERRORS); return; + } if (type == ICMPV6_PKT_TOOBIG) { if (!ip6_sk_accept_pmtu(sk)) @@ -634,6 +637,10 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) { int ret; + /* Verify checksum before giving to encap */ + if (udp_lib_checksum_complete(skb)) + goto csum_error; + ret = encap_rcv(sk, skb); if (ret <= 0) { UDP_INC_STATS_BH(sock_net(sk), @@ -670,8 +677,11 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) goto csum_error; } - if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) + if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { + UDP6_INC_STATS_BH(sock_net(sk), + UDP_MIB_RCVBUFERRORS, is_udplite); goto drop; + } skb_dst_drop(skb); @@ -686,6 +696,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) bh_unlock_sock(sk); return rc; + csum_error: UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); drop: @@ -695,44 +706,26 @@ drop: return -1; } -static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, - __be16 loc_port, const struct in6_addr *loc_addr, - __be16 rmt_port, const struct in6_addr *rmt_addr, - int dif) +static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk, + __be16 loc_port, const struct in6_addr *loc_addr, + __be16 rmt_port, const struct in6_addr *rmt_addr, + int dif, unsigned short hnum) { - struct hlist_nulls_node *node; - struct sock *s = sk; - unsigned short num = ntohs(loc_port); - - sk_nulls_for_each_from(s, node) { - struct inet_sock *inet = inet_sk(s); - - if (!net_eq(sock_net(s), net)) - continue; - - if (udp_sk(s)->udp_port_hash == num && - s->sk_family == PF_INET6) { - if (inet->inet_dport) { - if (inet->inet_dport != rmt_port) - continue; - } - if (!ipv6_addr_any(&sk->sk_v6_daddr) && - !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) - continue; - - if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif) - continue; + struct inet_sock *inet = inet_sk(sk); - if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { - if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr)) - continue; - } - if (!inet6_mc_check(s, loc_addr, rmt_addr)) - continue; - return s; - } - } - return NULL; + if (!net_eq(sock_net(sk), net)) + return false; + + if (udp_sk(sk)->udp_port_hash != hnum || + sk->sk_family != PF_INET6 || + (inet->inet_dport && inet->inet_dport != rmt_port) || + (!ipv6_addr_any(&sk->sk_v6_daddr) && + !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) || + (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) + return false; + if (!inet6_mc_check(sk, loc_addr, rmt_addr)) + return false; + return true; } static void flush_stack(struct sock **stack, unsigned int count, @@ -756,10 +749,22 @@ static void flush_stack(struct sock **stack, unsigned int count, if (skb1 && udpv6_queue_rcv_skb(sk, skb1) <= 0) skb1 = NULL; + sock_put(sk); } if (unlikely(skb1)) kfree_skb(skb1); } + +static void udp6_csum_zero_error(struct sk_buff *skb) +{ + /* RFC 2460 section 8.1 says that we SHOULD log + * this error. Well, it is reasonable. + */ + LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n", + &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source), + &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest)); +} + /* * Note: called only from the BH handler context, * so we don't need to lock the hashes. @@ -770,38 +775,51 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, { struct sock *sk, *stack[256 / sizeof(struct sock *)]; const struct udphdr *uh = udp_hdr(skb); - struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest)); - int dif; - unsigned int i, count = 0; + struct hlist_nulls_node *node; + unsigned short hnum = ntohs(uh->dest); + struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum); + int dif = inet6_iif(skb); + unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node); + unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10); + + if (use_hash2) { + hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) & + udp_table.mask; + hash2 = udp6_portaddr_hash(net, daddr, hnum) & udp_table.mask; +start_lookup: + hslot = &udp_table.hash2[hash2]; + offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); + } spin_lock(&hslot->lock); - sk = sk_nulls_head(&hslot->head); - dif = inet6_iif(skb); - sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); - while (sk) { - stack[count++] = sk; - sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr, - uh->source, saddr, dif); - if (unlikely(count == ARRAY_SIZE(stack))) { - if (!sk) - break; - flush_stack(stack, count, skb, ~0); - count = 0; + sk_nulls_for_each_entry_offset(sk, node, &hslot->head, offset) { + if (__udp_v6_is_mcast_sock(net, sk, + uh->dest, daddr, + uh->source, saddr, + dif, hnum) && + /* If zero checksum and no_check is not on for + * the socket then skip it. + */ + (uh->check || udp_sk(sk)->no_check6_rx)) { + if (unlikely(count == ARRAY_SIZE(stack))) { + flush_stack(stack, count, skb, ~0); + count = 0; + } + stack[count++] = sk; + sock_hold(sk); } } - /* - * before releasing the lock, we must take reference on sockets - */ - for (i = 0; i < count; i++) - sock_hold(stack[i]); spin_unlock(&hslot->lock); + /* Also lookup *:port if we are using hash2 and haven't done so yet. */ + if (use_hash2 && hash2 != hash2_any) { + hash2 = hash2_any; + goto start_lookup; + } + if (count) { flush_stack(stack, count, skb, count - 1); - - for (i = 0; i < count; i++) - sock_put(stack[i]); } else { kfree_skb(skb); } @@ -867,6 +885,12 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (sk != NULL) { int ret; + if (!uh->check && !udp_sk(sk)->no_check6_rx) { + sock_put(sk); + udp6_csum_zero_error(skb); + goto csum_error; + } + ret = udpv6_queue_rcv_skb(sk, skb); sock_put(sk); @@ -879,6 +903,11 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, return 0; } + if (!uh->check) { + udp6_csum_zero_error(skb); + goto csum_error; + } + if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard; @@ -1006,7 +1035,10 @@ static int udp_v6_push_pending_frames(struct sock *sk) if (is_udplite) csum = udplite_csum_outgoing(sk, skb); - else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ + else if (up->no_check6_tx) { /* UDP csum disabled */ + skb->ip_summed = CHECKSUM_NONE; + goto send; + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr, up->len); goto send; @@ -1232,14 +1264,8 @@ do_udp_sendmsg: goto out; } - if (hlimit < 0) { - if (ipv6_addr_is_multicast(&fl6.daddr)) - hlimit = np->mcast_hops; - else - hlimit = np->hop_limit; - if (hlimit < 0) - hlimit = ip6_dst_hoplimit(dst); - } + if (hlimit < 0) + hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); if (tclass < 0) tclass = np->tclass; @@ -1479,7 +1505,6 @@ static struct inet_protosw udpv6_protosw = { .protocol = IPPROTO_UDP, .prot = &udpv6_prot, .ops = &inet6_dgram_ops, - .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_PERMANENT, }; diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index b261ee8b83fc..0ae3d98f83e0 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -63,7 +63,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | SKB_GSO_SIT | SKB_GSO_MPLS) || @@ -76,7 +78,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, goto out; } - if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) + if (skb->encapsulation && skb_shinfo(skb)->gso_type & + (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM)) segs = skb_udp_tunnel_segment(skb, features); else { /* Do software UFO. Complete and fill in the UDP checksum as HW cannot diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index dfcc4be46898..9cf097e206e9 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -64,7 +64,6 @@ static struct inet_protosw udplite6_protosw = { .protocol = IPPROTO_UDPLITE, .prot = &udplitev6_prot, .ops = &inet6_dgram_ops, - .no_check = 0, .flags = INET_PROTOSW_PERMANENT, }; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 19ef329bdbf8..433672d07d0b 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -78,7 +78,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - if (!skb->local_df && skb->len > mtu) { + if (!skb->ignore_df && skb->len > mtu) { skb->dev = dst->dev; if (xfrm6_local_dontfrag(skb)) @@ -114,13 +114,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) if (err) return err; - memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); -#ifdef CONFIG_NETFILTER - IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; -#endif - - skb->protocol = htons(ETH_P_IPV6); - skb->local_df = 1; + skb->ignore_df = 1; return x->outer_mode->output2(x, skb); } @@ -128,11 +122,13 @@ EXPORT_SYMBOL(xfrm6_prepare_output); int xfrm6_output_finish(struct sk_buff *skb) { + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + skb->protocol = htons(ETH_P_IPV6); + #ifdef CONFIG_NETFILTER IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; #endif - skb->protocol = htons(ETH_P_IPV6); return xfrm_output(skb); } @@ -142,6 +138,13 @@ static int __xfrm6_output(struct sk_buff *skb) struct xfrm_state *x = dst->xfrm; int mtu; +#ifdef CONFIG_NETFILTER + if (!x) { + IP6CB(skb)->flags |= IP6SKB_REROUTED; + return dst_output(skb); + } +#endif + if (skb->protocol == htons(ETH_P_IPV6)) mtu = ip6_skb_dst_mtu(skb); else @@ -150,7 +153,7 @@ static int __xfrm6_output(struct sk_buff *skb) if (skb->len > mtu && xfrm6_local_dontfrag(skb)) { xfrm6_local_rxpmtu(skb, mtu); return -EMSGSIZE; - } else if (!skb->local_df && skb->len > mtu && skb->sk) { + } else if (!skb->ignore_df && skb->len > mtu && skb->sk) { xfrm_local_error(skb, mtu); return -EMSGSIZE; } @@ -165,6 +168,7 @@ static int __xfrm6_output(struct sk_buff *skb) int xfrm6_output(struct sock *sk, struct sk_buff *skb) { - return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, - skb_dst(skb)->dev, __xfrm6_output); + return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, + NULL, skb_dst(skb)->dev, __xfrm6_output, + !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c index 6ab989c486f7..54d13f8dbbae 100644 --- a/net/ipv6/xfrm6_protocol.c +++ b/net/ipv6/xfrm6_protocol.c @@ -50,6 +50,10 @@ int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err) { int ret; struct xfrm6_protocol *handler; + struct xfrm6_protocol __rcu **head = proto_handlers(protocol); + + if (!head) + return 0; for_each_protocol_rcu(*proto_handlers(protocol), handler) if ((ret = handler->cb_handler(skb, err)) <= 0) @@ -184,10 +188,12 @@ int xfrm6_protocol_register(struct xfrm6_protocol *handler, struct xfrm6_protocol __rcu **pprev; struct xfrm6_protocol *t; bool add_netproto = false; - int ret = -EEXIST; int priority = handler->priority; + if (!proto_handlers(protocol) || !netproto(protocol)) + return -EINVAL; + mutex_lock(&xfrm6_protocol_mutex); if (!rcu_dereference_protected(*proto_handlers(protocol), @@ -230,6 +236,9 @@ int xfrm6_protocol_deregister(struct xfrm6_protocol *handler, struct xfrm6_protocol *t; int ret = -ENOENT; + if (!proto_handlers(protocol) || !netproto(protocol)) + return -EINVAL; + mutex_lock(&xfrm6_protocol_mutex); for (pprev = proto_handlers(protocol); diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 41e4e93cb3aa..91729b807c7d 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1353,7 +1353,7 @@ static int ipx_create(struct net *net, struct socket *sock, int protocol, sk_refcnt_debug_inc(sk); sock_init_data(sock, sk); - sk->sk_no_check = 1; /* Checksum off by default */ + sk->sk_no_check_tx = 1; /* Checksum off by default */ sock->ops = &ipx_dgram_ops; rc = 0; out: diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c index c1f03185c5e1..67e7ad3d46b1 100644 --- a/net/ipx/ipx_route.c +++ b/net/ipx/ipx_route.c @@ -236,7 +236,8 @@ int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx, } /* Apply checksum. Not allowed on 802.3 links. */ - if (sk->sk_no_check || intrfc->if_dlink_type == htons(IPX_FRAME_8023)) + if (sk->sk_no_check_tx || + intrfc->if_dlink_type == htons(IPX_FRAME_8023)) ipx->ipx_checksum = htons(0xFFFF); else ipx->ipx_checksum = ipx_cksum(ipx, len + sizeof(struct ipxhdr)); diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 54747c25c86c..92fafd485deb 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -674,7 +674,6 @@ static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name) self->daddr = DEV_ADDR_ANY; kfree(discoveries); return -EHOSTUNREACH; - break; } } /* Cleanup our copy of the discovery log */ diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 2ba8b9705bb7..61ceb4cdb4a2 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -320,8 +320,7 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, __FILE__, __LINE__, tty->driver->name, port->count); spin_lock_irqsave(&port->lock, flags); - if (!tty_hung_up_p(filp)) - port->count--; + port->count--; port->blocked_open++; spin_unlock_irqrestore(&port->lock, flags); @@ -458,8 +457,7 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) /* * If the port is the middle of closing, bail out now */ - if (tty_hung_up_p(filp) || - test_bit(ASYNCB_CLOSING, &self->port.flags)) { + if (test_bit(ASYNCB_CLOSING, &self->port.flags)) { /* Hm, why are we blocking on ASYNC_CLOSING if we * do return -EAGAIN/-ERESTARTSYS below anyway? diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c index 365b895da84b..9e0d909390fd 100644 --- a/net/irda/irda_device.c +++ b/net/irda/irda_device.c @@ -293,7 +293,8 @@ static void irda_device_setup(struct net_device *dev) */ struct net_device *alloc_irdadev(int sizeof_priv) { - return alloc_netdev(sizeof_priv, "irda%d", irda_device_setup); + return alloc_netdev(sizeof_priv, "irda%d", NET_NAME_UNKNOWN, + irda_device_setup); } EXPORT_SYMBOL(alloc_irdadev); diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c index 7ac4d1becbfc..1bc49edf2296 100644 --- a/net/irda/irlan/irlan_common.c +++ b/net/irda/irlan/irlan_common.c @@ -1024,7 +1024,6 @@ static int __irlan_insert_param(struct sk_buff *skb, char *param, int type, default: IRDA_DEBUG(2, "%s(), Unknown parameter type!\n", __func__ ); return 0; - break; } /* Insert at end of sk-buffer */ diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index ffcec225b5d9..dc13f1a45f2f 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -96,7 +96,7 @@ static void irlan_eth_setup(struct net_device *dev) */ struct net_device *alloc_irlandev(const char *name) { - return alloc_netdev(sizeof(struct irlan_cb), name, + return alloc_netdev(sizeof(struct irlan_cb), name, NET_NAME_UNKNOWN, irlan_eth_setup); } diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c index 98ad6ec4bd3c..a5f28d421ea8 100644 --- a/net/irda/irlmp.c +++ b/net/irda/irlmp.c @@ -1426,7 +1426,8 @@ __u8 *irlmp_hint_to_service(__u8 *hint) if (hint[1] & HINT_TELEPHONY) { IRDA_DEBUG(1, "Telephony "); service[i++] = S_TELEPHONY; - } if (hint[1] & HINT_FILE_SERVER) + } + if (hint[1] & HINT_FILE_SERVER) IRDA_DEBUG(1, "File Server "); if (hint[1] & HINT_COMM) { diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 01e77b0ae075..a089b6b91650 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -682,6 +682,18 @@ struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock) return NULL; } +static void __iucv_auto_name(struct iucv_sock *iucv) +{ + char name[12]; + + sprintf(name, "%08x", atomic_inc_return(&iucv_sk_list.autobind_name)); + while (__iucv_get_sock_by_name(name)) { + sprintf(name, "%08x", + atomic_inc_return(&iucv_sk_list.autobind_name)); + } + memcpy(iucv->src_name, name, 8); +} + /* Bind an unbound socket */ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) @@ -724,8 +736,12 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr, rcu_read_lock(); for_each_netdev_rcu(&init_net, dev) { if (!memcmp(dev->perm_addr, uid, 8)) { - memcpy(iucv->src_name, sa->siucv_name, 8); memcpy(iucv->src_user_id, sa->siucv_user_id, 8); + /* Check for unitialized siucv_name */ + if (strncmp(sa->siucv_name, " ", 8) == 0) + __iucv_auto_name(iucv); + else + memcpy(iucv->src_name, sa->siucv_name, 8); sk->sk_bound_dev_if = dev->ifindex; iucv->hs_dev = dev; dev_hold(dev); @@ -763,7 +779,6 @@ done: static int iucv_sock_autobind(struct sock *sk) { struct iucv_sock *iucv = iucv_sk(sk); - char name[12]; int err = 0; if (unlikely(!pr_iucv)) @@ -772,17 +787,9 @@ static int iucv_sock_autobind(struct sock *sk) memcpy(iucv->src_user_id, iucv_userid, 8); write_lock_bh(&iucv_sk_list.lock); - - sprintf(name, "%08x", atomic_inc_return(&iucv_sk_list.autobind_name)); - while (__iucv_get_sock_by_name(name)) { - sprintf(name, "%08x", - atomic_inc_return(&iucv_sk_list.autobind_name)); - } - + __iucv_auto_name(iucv); write_unlock_bh(&iucv_sk_list.lock); - memcpy(&iucv->src_name, name, 8); - if (!iucv->msglimit) iucv->msglimit = IUCV_QUEUELEN_DEFAULT; @@ -1096,7 +1103,6 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, default: err = -EINVAL; goto out; - break; } } @@ -1536,7 +1542,8 @@ static int iucv_sock_shutdown(struct socket *sock, int how) sk->sk_shutdown |= how; if (how == RCV_SHUTDOWN || how == SHUTDOWN_MASK) { - if (iucv->transport == AF_IUCV_TRANS_IUCV) { + if ((iucv->transport == AF_IUCV_TRANS_IUCV) && + iucv->path) { err = pr_iucv->path_quiesce(iucv->path, NULL); if (err) err = -ENOTCONN; @@ -1830,7 +1837,7 @@ static void iucv_callback_txdone(struct iucv_path *path, spin_lock_irqsave(&list->lock, flags); while (list_skb != (struct sk_buff *)list) { - if (msg->tag != IUCV_SKB_CB(list_skb)->tag) { + if (msg->tag == IUCV_SKB_CB(list_skb)->tag) { this = list_skb; break; } @@ -1936,11 +1943,10 @@ static int afiucv_hs_callback_syn(struct sock *sk, struct sk_buff *skb) sk_acceptq_is_full(sk) || !nsk) { /* error on server socket - connection refused */ - if (nsk) - sk_free(nsk); afiucv_swap_src_dest(skb); trans_hdr->flags = AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_FIN; err = dev_queue_xmit(skb); + iucv_sock_kill(nsk); bh_unlock_sock(sk); goto out; } diff --git a/net/key/af_key.c b/net/key/af_key.c index f3c83073afc4..1847ec4e3930 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -405,7 +405,6 @@ static int verify_address_len(const void *p) * XXX When it can, remove this -EINVAL. -DaveM */ return -EINVAL; - break; } return 0; @@ -536,7 +535,6 @@ pfkey_satype2proto(uint8_t satype) return IPPROTO_ESP; case SADB_X_SATYPE_IPCOMP: return IPPROTO_COMP; - break; default: return 0; } @@ -553,7 +551,6 @@ pfkey_proto2satype(uint16_t proto) return SADB_SATYPE_ESP; case IPPROTO_COMP: return SADB_X_SATYPE_IPCOMP; - break; default: return 0; } @@ -1476,9 +1473,7 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, const struct sadb_msg else err = xfrm_state_update(x); - xfrm_audit_state_add(x, err ? 0 : 1, - audit_get_loginuid(current), - audit_get_sessionid(current), 0); + xfrm_audit_state_add(x, err ? 0 : 1, true); if (err < 0) { x->km.state = XFRM_STATE_DEAD; @@ -1532,9 +1527,7 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, const struct sadb_ c.event = XFRM_MSG_DELSA; km_state_notify(x, &c); out: - xfrm_audit_state_delete(x, err ? 0 : 1, - audit_get_loginuid(current), - audit_get_sessionid(current), 0); + xfrm_audit_state_delete(x, err ? 0 : 1, true); xfrm_state_put(x); return err; @@ -1726,17 +1719,13 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_m struct net *net = sock_net(sk); unsigned int proto; struct km_event c; - struct xfrm_audit audit_info; int err, err2; proto = pfkey_satype2proto(hdr->sadb_msg_satype); if (proto == 0) return -EINVAL; - audit_info.loginuid = audit_get_loginuid(current); - audit_info.sessionid = audit_get_sessionid(current); - audit_info.secid = 0; - err = xfrm_state_flush(net, proto, &audit_info); + err = xfrm_state_flush(net, proto, true); err2 = unicast_flush_resp(sk, hdr); if (err || err2) { if (err == -ESRCH) /* empty table - go quietly */ @@ -2288,9 +2277,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_ err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp, hdr->sadb_msg_type != SADB_X_SPDUPDATE); - xfrm_audit_policy_add(xp, err ? 0 : 1, - audit_get_loginuid(current), - audit_get_sessionid(current), 0); + xfrm_audit_policy_add(xp, err ? 0 : 1, true); if (err) goto out; @@ -2372,9 +2359,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa if (xp == NULL) return -ENOENT; - xfrm_audit_policy_delete(xp, err ? 0 : 1, - audit_get_loginuid(current), - audit_get_sessionid(current), 0); + xfrm_audit_policy_delete(xp, err ? 0 : 1, true); if (err) goto out; @@ -2553,7 +2538,7 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, sel.sport_mask = htons(0xffff); /* set destination address info of selector */ - sa = ext_hdrs[SADB_EXT_ADDRESS_DST - 1], + sa = ext_hdrs[SADB_EXT_ADDRESS_DST - 1]; pfkey_sadb_addr2xfrm_addr(sa, &sel.daddr); sel.prefixlen_d = sa->sadb_address_prefixlen; sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); @@ -2622,9 +2607,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_ return -ENOENT; if (delete) { - xfrm_audit_policy_delete(xp, err ? 0 : 1, - audit_get_loginuid(current), - audit_get_sessionid(current), 0); + xfrm_audit_policy_delete(xp, err ? 0 : 1, true); if (err) goto out; @@ -2733,13 +2716,9 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad { struct net *net = sock_net(sk); struct km_event c; - struct xfrm_audit audit_info; int err, err2; - audit_info.loginuid = audit_get_loginuid(current); - audit_info.sessionid = audit_get_sessionid(current); - audit_info.secid = 0; - err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info); + err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true); err2 = unicast_flush_resp(sk, hdr); if (err || err2) { if (err == -ESRCH) /* empty table - old silent behavior */ diff --git a/net/l2tp/Kconfig b/net/l2tp/Kconfig index adb9843dd7cf..378c73b26093 100644 --- a/net/l2tp/Kconfig +++ b/net/l2tp/Kconfig @@ -6,6 +6,7 @@ menuconfig L2TP tristate "Layer Two Tunneling Protocol (L2TP)" depends on (IPV6 || IPV6=n) depends on INET + select NET_UDP_TUNNEL ---help--- Layer Two Tunneling Protocol diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index a4e37d7158dc..1109d3bb8dac 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -52,6 +52,7 @@ #include <net/dst.h> #include <net/ip.h> #include <net/udp.h> +#include <net/udp_tunnel.h> #include <net/inet_common.h> #include <net/xfrm.h> #include <net/protocol.h> @@ -495,52 +496,6 @@ out: spin_unlock_bh(&session->reorder_q.lock); } -static inline int l2tp_verify_udp_checksum(struct sock *sk, - struct sk_buff *skb) -{ - struct udphdr *uh = udp_hdr(skb); - u16 ulen = ntohs(uh->len); - __wsum psum; - - if (sk->sk_no_check || skb_csum_unnecessary(skb)) - return 0; - -#if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == PF_INET6 && !l2tp_tunnel(sk)->v4mapped) { - if (!uh->check) { - LIMIT_NETDEBUG(KERN_INFO "L2TP: IPv6: checksum is 0\n"); - return 1; - } - if ((skb->ip_summed == CHECKSUM_COMPLETE) && - !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, ulen, - IPPROTO_UDP, skb->csum)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - return 0; - } - skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, - skb->len, IPPROTO_UDP, - 0)); - } else -#endif - { - struct inet_sock *inet; - if (!uh->check) - return 0; - inet = inet_sk(sk); - psum = csum_tcpudp_nofold(inet->inet_saddr, inet->inet_daddr, - ulen, IPPROTO_UDP, 0); - - if ((skb->ip_summed == CHECKSUM_COMPLETE) && - !csum_fold(csum_add(psum, skb->csum))) - return 0; - skb->csum = psum; - } - - return __skb_checksum_complete(skb); -} - static int l2tp_seq_check_rx_window(struct l2tp_session *session, u32 nr) { u32 nws; @@ -895,8 +850,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, u16 version; int length; - if (tunnel->sock && l2tp_verify_udp_checksum(tunnel->sock, skb)) - goto discard_bad_csum; + /* UDP has verifed checksum */ /* UDP always verifies the packet length. */ __skb_pull(skb, sizeof(struct udphdr)); @@ -979,14 +933,6 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, return 0; -discard_bad_csum: - LIMIT_NETDEBUG("%s: UDP: bad checksum\n", tunnel->name); - UDP_INC_STATS_USER(tunnel->l2tp_net, UDP_MIB_INERRORS, 0); - atomic_long_inc(&tunnel->stats.rx_errors); - kfree_skb(skb); - - return 0; - error: /* Put UDP header back */ __skb_push(skb, sizeof(struct udphdr)); @@ -1128,7 +1074,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, } /* Queue the packet to IP for output */ - skb->local_df = 1; + skb->ignore_df = 1; #if IS_ENABLED(CONFIG_IPV6) if (tunnel->sock->sk_family == PF_INET6 && !tunnel->v4mapped) error = inet6_csk_xmit(tunnel->sock, skb, NULL); @@ -1150,31 +1096,6 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, return 0; } -#if IS_ENABLED(CONFIG_IPV6) -static void l2tp_xmit_ipv6_csum(struct sock *sk, struct sk_buff *skb, - int udp_len) -{ - struct ipv6_pinfo *np = inet6_sk(sk); - struct udphdr *uh = udp_hdr(skb); - - if (!skb_dst(skb) || !skb_dst(skb)->dev || - !(skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) { - __wsum csum = skb_checksum(skb, 0, udp_len, 0); - skb->ip_summed = CHECKSUM_UNNECESSARY; - uh->check = csum_ipv6_magic(&np->saddr, &sk->sk_v6_daddr, udp_len, - IPPROTO_UDP, csum); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - } else { - skb->ip_summed = CHECKSUM_PARTIAL; - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct udphdr, check); - uh->check = ~csum_ipv6_magic(&np->saddr, &sk->sk_v6_daddr, - udp_len, IPPROTO_UDP, 0); - } -} -#endif - /* If caller requires the skb to have a ppp header, the header must be * inserted in the skb data before calling this function. */ @@ -1186,7 +1107,6 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len struct flowi *fl; struct udphdr *uh; struct inet_sock *inet; - __wsum csum; int headroom; int uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0; int udp_len; @@ -1235,33 +1155,17 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len uh->dest = inet->inet_dport; udp_len = uhlen + hdr_len + data_len; uh->len = htons(udp_len); - uh->check = 0; /* Calculate UDP checksum if configured to do so */ #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) - l2tp_xmit_ipv6_csum(sk, skb, udp_len); + udp6_set_csum(udp_get_no_check6_tx(sk), + skb, &inet6_sk(sk)->saddr, + &sk->sk_v6_daddr, udp_len); else #endif - if (sk->sk_no_check == UDP_CSUM_NOXMIT) - skb->ip_summed = CHECKSUM_NONE; - else if ((skb_dst(skb) && skb_dst(skb)->dev) && - (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) { - skb->ip_summed = CHECKSUM_COMPLETE; - csum = skb_checksum(skb, 0, udp_len, 0); - uh->check = csum_tcpudp_magic(inet->inet_saddr, - inet->inet_daddr, - udp_len, IPPROTO_UDP, csum); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - } else { - skb->ip_summed = CHECKSUM_PARTIAL; - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct udphdr, check); - uh->check = ~csum_tcpudp_magic(inet->inet_saddr, - inet->inet_daddr, - udp_len, IPPROTO_UDP, 0); - } + udp_set_csum(sk->sk_no_check_tx, skb, inet->inet_saddr, + inet->inet_daddr, udp_len); break; case L2TP_ENCAPTYPE_IP: @@ -1455,76 +1359,46 @@ static int l2tp_tunnel_sock_create(struct net *net, { int err = -EINVAL; struct socket *sock = NULL; - struct sockaddr_in udp_addr = {0}; - struct sockaddr_l2tpip ip_addr = {0}; -#if IS_ENABLED(CONFIG_IPV6) - struct sockaddr_in6 udp6_addr = {0}; - struct sockaddr_l2tpip6 ip6_addr = {0}; -#endif + struct udp_port_cfg udp_conf; switch (cfg->encap) { case L2TP_ENCAPTYPE_UDP: + memset(&udp_conf, 0, sizeof(udp_conf)); + #if IS_ENABLED(CONFIG_IPV6) if (cfg->local_ip6 && cfg->peer_ip6) { - err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock); - if (err < 0) - goto out; - - sk_change_net(sock->sk, net); - - udp6_addr.sin6_family = AF_INET6; - memcpy(&udp6_addr.sin6_addr, cfg->local_ip6, - sizeof(udp6_addr.sin6_addr)); - udp6_addr.sin6_port = htons(cfg->local_udp_port); - err = kernel_bind(sock, (struct sockaddr *) &udp6_addr, - sizeof(udp6_addr)); - if (err < 0) - goto out; - - udp6_addr.sin6_family = AF_INET6; - memcpy(&udp6_addr.sin6_addr, cfg->peer_ip6, - sizeof(udp6_addr.sin6_addr)); - udp6_addr.sin6_port = htons(cfg->peer_udp_port); - err = kernel_connect(sock, - (struct sockaddr *) &udp6_addr, - sizeof(udp6_addr), 0); - if (err < 0) - goto out; + udp_conf.family = AF_INET6; + memcpy(&udp_conf.local_ip6, cfg->local_ip6, + sizeof(udp_conf.local_ip6)); + memcpy(&udp_conf.peer_ip6, cfg->peer_ip6, + sizeof(udp_conf.peer_ip6)); + udp_conf.use_udp6_tx_checksums = + cfg->udp6_zero_tx_checksums; + udp_conf.use_udp6_rx_checksums = + cfg->udp6_zero_rx_checksums; } else #endif { - err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock); - if (err < 0) - goto out; - - sk_change_net(sock->sk, net); - - udp_addr.sin_family = AF_INET; - udp_addr.sin_addr = cfg->local_ip; - udp_addr.sin_port = htons(cfg->local_udp_port); - err = kernel_bind(sock, (struct sockaddr *) &udp_addr, - sizeof(udp_addr)); - if (err < 0) - goto out; - - udp_addr.sin_family = AF_INET; - udp_addr.sin_addr = cfg->peer_ip; - udp_addr.sin_port = htons(cfg->peer_udp_port); - err = kernel_connect(sock, - (struct sockaddr *) &udp_addr, - sizeof(udp_addr), 0); - if (err < 0) - goto out; + udp_conf.family = AF_INET; + udp_conf.local_ip = cfg->local_ip; + udp_conf.peer_ip = cfg->peer_ip; + udp_conf.use_udp_checksums = cfg->use_udp_checksums; } - if (!cfg->use_udp_checksums) - sock->sk->sk_no_check = UDP_CSUM_NOXMIT; + udp_conf.local_udp_port = htons(cfg->local_udp_port); + udp_conf.peer_udp_port = htons(cfg->peer_udp_port); + + err = udp_sock_create(net, &udp_conf, &sock); + if (err < 0) + goto out; break; case L2TP_ENCAPTYPE_IP: #if IS_ENABLED(CONFIG_IPV6) if (cfg->local_ip6 && cfg->peer_ip6) { + struct sockaddr_l2tpip6 ip6_addr = {0}; + err = sock_create_kern(AF_INET6, SOCK_DGRAM, IPPROTO_L2TP, &sock); if (err < 0) @@ -1553,6 +1427,8 @@ static int l2tp_tunnel_sock_create(struct net *net, } else #endif { + struct sockaddr_l2tpip ip_addr = {0}; + err = sock_create_kern(AF_INET, SOCK_DGRAM, IPPROTO_L2TP, &sock); if (err < 0) diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 3f93ccd6ba97..68aa9ffd4ae4 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -162,7 +162,9 @@ struct l2tp_tunnel_cfg { #endif u16 local_udp_port; u16 peer_udp_port; - unsigned int use_udp_checksums:1; + unsigned int use_udp_checksums:1, + udp6_zero_tx_checksums:1, + udp6_zero_rx_checksums:1; }; struct l2tp_tunnel { diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 76125c57ee6d..edb78e69efe4 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -246,7 +246,8 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p goto out; } - dev = alloc_netdev(sizeof(*priv), name, l2tp_eth_dev_setup); + dev = alloc_netdev(sizeof(*priv), name, NET_NAME_UNKNOWN, + l2tp_eth_dev_setup); if (!dev) { rc = -ENOMEM; goto out_del_session; diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 3397fe6897c0..369a9822488c 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -606,7 +606,6 @@ static struct inet_protosw l2tp_ip_protosw = { .protocol = IPPROTO_L2TP, .prot = &l2tp_ip_prot, .ops = &l2tp_ip_ops, - .no_check = 0, }; static struct net_protocol l2tp_ip_protocol __read_mostly = { diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 7704ea9502fd..0edb263cc002 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -605,14 +605,8 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk, goto out; } - if (hlimit < 0) { - if (ipv6_addr_is_multicast(&fl6.daddr)) - hlimit = np->mcast_hops; - else - hlimit = np->hop_limit; - if (hlimit < 0) - hlimit = ip6_dst_hoplimit(dst); - } + if (hlimit < 0) + hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); if (tclass < 0) tclass = np->tclass; @@ -693,7 +687,7 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk, lsa->l2tp_scope_id = 0; lsa->l2tp_conn_id = 0; if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL) - lsa->l2tp_scope_id = IP6CB(skb)->iif; + lsa->l2tp_scope_id = inet6_iif(skb); } if (np->rxopt.all) @@ -761,7 +755,6 @@ static struct inet_protosw l2tp_ip6_protosw = { .protocol = IPPROTO_L2TP, .prot = &l2tp_ip6_prot, .ops = &l2tp_ip6_ops, - .no_check = 0, }; static struct inet6_protocol l2tp_ip6_protocol __read_mostly = { diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index bd7387adea9e..0ac907adb2f4 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -161,6 +161,13 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info cfg.peer_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_DPORT]); if (info->attrs[L2TP_ATTR_UDP_CSUM]) cfg.use_udp_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_CSUM]); + +#if IS_ENABLED(CONFIG_IPV6) + if (info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]) + cfg.udp6_zero_tx_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]); + if (info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]) + cfg.udp6_zero_rx_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]); +#endif } if (info->attrs[L2TP_ATTR_DEBUG]) @@ -297,8 +304,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla case L2TP_ENCAPTYPE_UDP: if (nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) || nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport)) || - nla_put_u8(skb, L2TP_ATTR_UDP_CSUM, - (sk->sk_no_check != UDP_CSUM_NOXMIT))) + nla_put_u8(skb, L2TP_ATTR_UDP_CSUM, !sk->sk_no_check_tx)) goto nla_put_failure; /* NOBREAK */ case L2TP_ENCAPTYPE_IP: diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 950909f04ee6..13752d96275e 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1365,7 +1365,7 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname, int err; if (level != SOL_PPPOL2TP) - return udp_prot.setsockopt(sk, level, optname, optval, optlen); + return -EINVAL; if (optlen < sizeof(int)) return -EINVAL; @@ -1491,7 +1491,7 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname, struct pppol2tp_session *ps; if (level != SOL_PPPOL2TP) - return udp_prot.getsockopt(sk, level, optname, optval, optlen); + return -EINVAL; if (get_user(len, optlen)) return -EFAULT; diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 97b5dcad5025..aeb6a483b3bc 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -19,14 +19,6 @@ if MAC80211 != n config MAC80211_HAS_RC bool -config MAC80211_RC_PID - bool "PID controller based rate control algorithm" if EXPERT - select MAC80211_HAS_RC - ---help--- - This option enables a TX rate control algorithm for - mac80211 that uses a PID controller to select the TX - rate. - config MAC80211_RC_MINSTREL bool "Minstrel" if EXPERT select MAC80211_HAS_RC @@ -51,14 +43,6 @@ choice overridden through the ieee80211_default_rc_algo module parameter if different algorithms are available. -config MAC80211_RC_DEFAULT_PID - bool "PID controller based rate control algorithm" - depends on MAC80211_RC_PID - ---help--- - Select the PID controller based rate control as the - default rate control algorithm. You should choose - this unless you know what you are doing. - config MAC80211_RC_DEFAULT_MINSTREL bool "Minstrel" depends on MAC80211_RC_MINSTREL @@ -72,7 +56,6 @@ config MAC80211_RC_DEFAULT string default "minstrel_ht" if MAC80211_RC_DEFAULT_MINSTREL && MAC80211_RC_MINSTREL_HT default "minstrel" if MAC80211_RC_DEFAULT_MINSTREL - default "pid" if MAC80211_RC_DEFAULT_PID default "" endif diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index 9d7d840aac6d..7273d2796dd1 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -17,6 +17,7 @@ mac80211-y := \ aes_ccm.o \ aes_cmac.o \ cfg.o \ + ethtool.o \ rx.o \ spectmgmt.o \ tx.o \ @@ -25,7 +26,8 @@ mac80211-y := \ wme.o \ event.o \ chan.o \ - trace.o mlme.o + trace.o mlme.o \ + tdls.o mac80211-$(CONFIG_MAC80211_LEDS) += led.o mac80211-$(CONFIG_MAC80211_DEBUGFS) += \ @@ -46,17 +48,12 @@ mac80211-$(CONFIG_PM) += pm.o CFLAGS_trace.o := -I$(src) -# objects for PID algorithm -rc80211_pid-y := rc80211_pid_algo.o -rc80211_pid-$(CONFIG_MAC80211_DEBUGFS) += rc80211_pid_debugfs.o - rc80211_minstrel-y := rc80211_minstrel.o rc80211_minstrel-$(CONFIG_MAC80211_DEBUGFS) += rc80211_minstrel_debugfs.o rc80211_minstrel_ht-y := rc80211_minstrel_ht.o rc80211_minstrel_ht-$(CONFIG_MAC80211_DEBUGFS) += rc80211_minstrel_ht_debugfs.o -mac80211-$(CONFIG_MAC80211_RC_PID) += $(rc80211_pid-y) mac80211-$(CONFIG_MAC80211_RC_MINSTREL) += $(rc80211_minstrel-y) mac80211-$(CONFIG_MAC80211_RC_MINSTREL_HT) += $(rc80211_minstrel_ht-y) diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c index 7c7df475a401..ec24378caaaf 100644 --- a/net/mac80211/aes_ccm.c +++ b/net/mac80211/aes_ccm.c @@ -23,12 +23,13 @@ void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, u8 *data, size_t data_len, u8 *mic) { struct scatterlist assoc, pt, ct[2]; - struct { - struct aead_request req; - u8 priv[crypto_aead_reqsize(tfm)]; - } aead_req; - memset(&aead_req, 0, sizeof(aead_req)); + char aead_req_data[sizeof(struct aead_request) + + crypto_aead_reqsize(tfm)] + __aligned(__alignof__(struct aead_request)); + struct aead_request *aead_req = (void *) aead_req_data; + + memset(aead_req, 0, sizeof(aead_req_data)); sg_init_one(&pt, data, data_len); sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad)); @@ -36,23 +37,23 @@ void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, sg_set_buf(&ct[0], data, data_len); sg_set_buf(&ct[1], mic, IEEE80211_CCMP_MIC_LEN); - aead_request_set_tfm(&aead_req.req, tfm); - aead_request_set_assoc(&aead_req.req, &assoc, assoc.length); - aead_request_set_crypt(&aead_req.req, &pt, ct, data_len, b_0); + aead_request_set_tfm(aead_req, tfm); + aead_request_set_assoc(aead_req, &assoc, assoc.length); + aead_request_set_crypt(aead_req, &pt, ct, data_len, b_0); - crypto_aead_encrypt(&aead_req.req); + crypto_aead_encrypt(aead_req); } int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, u8 *data, size_t data_len, u8 *mic) { struct scatterlist assoc, pt, ct[2]; - struct { - struct aead_request req; - u8 priv[crypto_aead_reqsize(tfm)]; - } aead_req; + char aead_req_data[sizeof(struct aead_request) + + crypto_aead_reqsize(tfm)] + __aligned(__alignof__(struct aead_request)); + struct aead_request *aead_req = (void *) aead_req_data; - memset(&aead_req, 0, sizeof(aead_req)); + memset(aead_req, 0, sizeof(aead_req_data)); sg_init_one(&pt, data, data_len); sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad)); @@ -60,12 +61,12 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, sg_set_buf(&ct[0], data, data_len); sg_set_buf(&ct[1], mic, IEEE80211_CCMP_MIC_LEN); - aead_request_set_tfm(&aead_req.req, tfm); - aead_request_set_assoc(&aead_req.req, &assoc, assoc.length); - aead_request_set_crypt(&aead_req.req, ct, &pt, + aead_request_set_tfm(aead_req, tfm); + aead_request_set_assoc(aead_req, &assoc, assoc.length); + aead_request_set_crypt(aead_req, ct, &pt, data_len + IEEE80211_CCMP_MIC_LEN, b_0); - return crypto_aead_decrypt(&aead_req.req); + return crypto_aead_decrypt(aead_req); } struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[]) diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 31bf2586fb84..f0e84bc48038 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -52,7 +52,7 @@ static void ieee80211_free_tid_rx(struct rcu_head *h) del_timer_sync(&tid_rx->reorder_timer); for (i = 0; i < tid_rx->buf_size; i++) - dev_kfree_skb(tid_rx->reorder_buf[i]); + __skb_queue_purge(&tid_rx->reorder_buf[i]); kfree(tid_rx->reorder_buf); kfree(tid_rx->reorder_time); kfree(tid_rx); @@ -224,28 +224,15 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d ieee80211_tx_skb(sdata, skb); } -void ieee80211_process_addba_request(struct ieee80211_local *local, - struct sta_info *sta, - struct ieee80211_mgmt *mgmt, - size_t len) +void __ieee80211_start_rx_ba_session(struct sta_info *sta, + u8 dialog_token, u16 timeout, + u16 start_seq_num, u16 ba_policy, u16 tid, + u16 buf_size, bool tx) { + struct ieee80211_local *local = sta->sdata->local; struct tid_ampdu_rx *tid_agg_rx; - u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num, status; - u8 dialog_token; - int ret = -EOPNOTSUPP; - - /* extract session parameters from addba request frame */ - dialog_token = mgmt->u.action.u.addba_req.dialog_token; - timeout = le16_to_cpu(mgmt->u.action.u.addba_req.timeout); - start_seq_num = - le16_to_cpu(mgmt->u.action.u.addba_req.start_seq_num) >> 4; - - capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab); - ba_policy = (capab & IEEE80211_ADDBA_PARAM_POLICY_MASK) >> 1; - tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; - buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6; - - status = WLAN_STATUS_REQUEST_DECLINED; + int i, ret = -EOPNOTSUPP; + u16 status = WLAN_STATUS_REQUEST_DECLINED; if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) { ht_dbg(sta->sdata, @@ -264,7 +251,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, status = WLAN_STATUS_INVALID_QOS_PARAM; ht_dbg_ratelimited(sta->sdata, "AddBA Req with bad params from %pM on tid %u. policy %d, buffer size %d\n", - mgmt->sa, tid, ba_policy, buf_size); + sta->sta.addr, tid, ba_policy, buf_size); goto end_no_lock; } /* determine default buffer size */ @@ -281,7 +268,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, if (sta->ampdu_mlme.tid_rx[tid]) { ht_dbg_ratelimited(sta->sdata, "unexpected AddBA Req from %pM on tid %u\n", - mgmt->sa, tid); + sta->sta.addr, tid); /* delete existing Rx BA session on the same tid */ ___ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT, @@ -308,7 +295,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, /* prepare reordering buffer */ tid_agg_rx->reorder_buf = - kcalloc(buf_size, sizeof(struct sk_buff *), GFP_KERNEL); + kcalloc(buf_size, sizeof(struct sk_buff_head), GFP_KERNEL); tid_agg_rx->reorder_time = kcalloc(buf_size, sizeof(unsigned long), GFP_KERNEL); if (!tid_agg_rx->reorder_buf || !tid_agg_rx->reorder_time) { @@ -318,6 +305,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, goto end; } + for (i = 0; i < buf_size; i++) + __skb_queue_head_init(&tid_agg_rx->reorder_buf[i]); + ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_START, &sta->sta, tid, &start_seq_num, 0); ht_dbg(sta->sdata, "Rx A-MPDU request on %pM tid %d result %d\n", @@ -350,6 +340,74 @@ end: mutex_unlock(&sta->ampdu_mlme.mtx); end_no_lock: - ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, tid, - dialog_token, status, 1, buf_size, timeout); + if (tx) + ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, tid, + dialog_token, status, 1, buf_size, + timeout); +} + +void ieee80211_process_addba_request(struct ieee80211_local *local, + struct sta_info *sta, + struct ieee80211_mgmt *mgmt, + size_t len) +{ + u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num; + u8 dialog_token; + + /* extract session parameters from addba request frame */ + dialog_token = mgmt->u.action.u.addba_req.dialog_token; + timeout = le16_to_cpu(mgmt->u.action.u.addba_req.timeout); + start_seq_num = + le16_to_cpu(mgmt->u.action.u.addba_req.start_seq_num) >> 4; + + capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab); + ba_policy = (capab & IEEE80211_ADDBA_PARAM_POLICY_MASK) >> 1; + tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; + buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6; + + __ieee80211_start_rx_ba_session(sta, dialog_token, timeout, + start_seq_num, ba_policy, tid, + buf_size, true); +} + +void ieee80211_start_rx_ba_session_offl(struct ieee80211_vif *vif, + const u8 *addr, u16 tid) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_local *local = sdata->local; + struct ieee80211_rx_agg *rx_agg; + struct sk_buff *skb = dev_alloc_skb(0); + + if (unlikely(!skb)) + return; + + rx_agg = (struct ieee80211_rx_agg *) &skb->cb; + memcpy(&rx_agg->addr, addr, ETH_ALEN); + rx_agg->tid = tid; + + skb->pkt_type = IEEE80211_SDATA_QUEUE_RX_AGG_START; + skb_queue_tail(&sdata->skb_queue, skb); + ieee80211_queue_work(&local->hw, &sdata->work); +} +EXPORT_SYMBOL(ieee80211_start_rx_ba_session_offl); + +void ieee80211_stop_rx_ba_session_offl(struct ieee80211_vif *vif, + const u8 *addr, u16 tid) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_local *local = sdata->local; + struct ieee80211_rx_agg *rx_agg; + struct sk_buff *skb = dev_alloc_skb(0); + + if (unlikely(!skb)) + return; + + rx_agg = (struct ieee80211_rx_agg *) &skb->cb; + memcpy(&rx_agg->addr, addr, ETH_ALEN); + rx_agg->tid = tid; + + skb->pkt_type = IEEE80211_SDATA_QUEUE_RX_AGG_STOP; + skb_queue_tail(&sdata->skb_queue, skb); + ieee80211_queue_work(&local->hw, &sdata->work); } +EXPORT_SYMBOL(ieee80211_stop_rx_ba_session_offl); diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index ce9633a3cfb0..d6986f3aa5c4 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -170,10 +170,13 @@ ieee80211_stop_queue_agg(struct ieee80211_sub_if_data *sdata, int tid) { int queue = sdata->vif.hw_queue[ieee80211_ac_from_tid(tid)]; + /* we do refcounting here, so don't use the queue reason refcounting */ + if (atomic_inc_return(&sdata->local->agg_queue_stop[queue]) == 1) ieee80211_stop_queue_by_reason( &sdata->local->hw, queue, - IEEE80211_QUEUE_STOP_REASON_AGGREGATION); + IEEE80211_QUEUE_STOP_REASON_AGGREGATION, + false); __acquire(agg_queue); } @@ -185,7 +188,8 @@ ieee80211_wake_queue_agg(struct ieee80211_sub_if_data *sdata, int tid) if (atomic_dec_return(&sdata->local->agg_queue_stop[queue]) == 0) ieee80211_wake_queue_by_reason( &sdata->local->hw, queue, - IEEE80211_QUEUE_STOP_REASON_AGGREGATION); + IEEE80211_QUEUE_STOP_REASON_AGGREGATION, + false); __release(agg_queue); } diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index aaa59d719592..927b4ea0128b 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -109,6 +109,15 @@ static int ieee80211_change_iface(struct wiphy *wiphy, static int ieee80211_start_p2p_device(struct wiphy *wiphy, struct wireless_dev *wdev) { + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + int ret; + + mutex_lock(&sdata->local->chanctx_mtx); + ret = ieee80211_check_combinations(sdata, NULL, 0, 0); + mutex_unlock(&sdata->local->chanctx_mtx); + if (ret < 0) + return ret; + return ieee80211_do_open(wdev, true); } @@ -459,316 +468,8 @@ void sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo) rinfo->flags |= RATE_INFO_FLAGS_160_MHZ_WIDTH; } -static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) -{ - struct ieee80211_sub_if_data *sdata = sta->sdata; - struct ieee80211_local *local = sdata->local; - struct timespec uptime; - u64 packets = 0; - int i, ac; - - sinfo->generation = sdata->local->sta_generation; - - sinfo->filled = STATION_INFO_INACTIVE_TIME | - STATION_INFO_RX_BYTES64 | - STATION_INFO_TX_BYTES64 | - STATION_INFO_RX_PACKETS | - STATION_INFO_TX_PACKETS | - STATION_INFO_TX_RETRIES | - STATION_INFO_TX_FAILED | - STATION_INFO_TX_BITRATE | - STATION_INFO_RX_BITRATE | - STATION_INFO_RX_DROP_MISC | - STATION_INFO_BSS_PARAM | - STATION_INFO_CONNECTED_TIME | - STATION_INFO_STA_FLAGS | - STATION_INFO_BEACON_LOSS_COUNT; - - do_posix_clock_monotonic_gettime(&uptime); - sinfo->connected_time = uptime.tv_sec - sta->last_connected; - - sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx); - sinfo->tx_bytes = 0; - for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { - sinfo->tx_bytes += sta->tx_bytes[ac]; - packets += sta->tx_packets[ac]; - } - sinfo->tx_packets = packets; - sinfo->rx_bytes = sta->rx_bytes; - sinfo->rx_packets = sta->rx_packets; - sinfo->tx_retries = sta->tx_retry_count; - sinfo->tx_failed = sta->tx_retry_failed; - sinfo->rx_dropped_misc = sta->rx_dropped; - sinfo->beacon_loss_count = sta->beacon_loss_count; - - if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) || - (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) { - sinfo->filled |= STATION_INFO_SIGNAL | STATION_INFO_SIGNAL_AVG; - if (!local->ops->get_rssi || - drv_get_rssi(local, sdata, &sta->sta, &sinfo->signal)) - sinfo->signal = (s8)sta->last_signal; - sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal); - } - if (sta->chains) { - sinfo->filled |= STATION_INFO_CHAIN_SIGNAL | - STATION_INFO_CHAIN_SIGNAL_AVG; - - sinfo->chains = sta->chains; - for (i = 0; i < ARRAY_SIZE(sinfo->chain_signal); i++) { - sinfo->chain_signal[i] = sta->chain_signal_last[i]; - sinfo->chain_signal_avg[i] = - (s8) -ewma_read(&sta->chain_signal_avg[i]); - } - } - - sta_set_rate_info_tx(sta, &sta->last_tx_rate, &sinfo->txrate); - sta_set_rate_info_rx(sta, &sinfo->rxrate); - - if (ieee80211_vif_is_mesh(&sdata->vif)) { -#ifdef CONFIG_MAC80211_MESH - sinfo->filled |= STATION_INFO_LLID | - STATION_INFO_PLID | - STATION_INFO_PLINK_STATE | - STATION_INFO_LOCAL_PM | - STATION_INFO_PEER_PM | - STATION_INFO_NONPEER_PM; - - sinfo->llid = sta->llid; - sinfo->plid = sta->plid; - sinfo->plink_state = sta->plink_state; - if (test_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN)) { - sinfo->filled |= STATION_INFO_T_OFFSET; - sinfo->t_offset = sta->t_offset; - } - sinfo->local_pm = sta->local_pm; - sinfo->peer_pm = sta->peer_pm; - sinfo->nonpeer_pm = sta->nonpeer_pm; -#endif - } - - sinfo->bss_param.flags = 0; - if (sdata->vif.bss_conf.use_cts_prot) - sinfo->bss_param.flags |= BSS_PARAM_FLAGS_CTS_PROT; - if (sdata->vif.bss_conf.use_short_preamble) - sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_PREAMBLE; - if (sdata->vif.bss_conf.use_short_slot) - sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_SLOT_TIME; - sinfo->bss_param.dtim_period = sdata->local->hw.conf.ps_dtim_period; - sinfo->bss_param.beacon_interval = sdata->vif.bss_conf.beacon_int; - - sinfo->sta_flags.set = 0; - sinfo->sta_flags.mask = BIT(NL80211_STA_FLAG_AUTHORIZED) | - BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) | - BIT(NL80211_STA_FLAG_WME) | - BIT(NL80211_STA_FLAG_MFP) | - BIT(NL80211_STA_FLAG_AUTHENTICATED) | - BIT(NL80211_STA_FLAG_ASSOCIATED) | - BIT(NL80211_STA_FLAG_TDLS_PEER); - if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) - sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHORIZED); - if (test_sta_flag(sta, WLAN_STA_SHORT_PREAMBLE)) - sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_SHORT_PREAMBLE); - if (test_sta_flag(sta, WLAN_STA_WME)) - sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_WME); - if (test_sta_flag(sta, WLAN_STA_MFP)) - sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_MFP); - if (test_sta_flag(sta, WLAN_STA_AUTH)) - sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHENTICATED); - if (test_sta_flag(sta, WLAN_STA_ASSOC)) - sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_ASSOCIATED); - if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) - sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER); -} - -static const char ieee80211_gstrings_sta_stats[][ETH_GSTRING_LEN] = { - "rx_packets", "rx_bytes", "wep_weak_iv_count", - "rx_duplicates", "rx_fragments", "rx_dropped", - "tx_packets", "tx_bytes", "tx_fragments", - "tx_filtered", "tx_retry_failed", "tx_retries", - "beacon_loss", "sta_state", "txrate", "rxrate", "signal", - "channel", "noise", "ch_time", "ch_time_busy", - "ch_time_ext_busy", "ch_time_rx", "ch_time_tx" -}; -#define STA_STATS_LEN ARRAY_SIZE(ieee80211_gstrings_sta_stats) - -static int ieee80211_get_et_sset_count(struct wiphy *wiphy, - struct net_device *dev, - int sset) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - int rv = 0; - - if (sset == ETH_SS_STATS) - rv += STA_STATS_LEN; - - rv += drv_get_et_sset_count(sdata, sset); - - if (rv == 0) - return -EOPNOTSUPP; - return rv; -} - -static void ieee80211_get_et_stats(struct wiphy *wiphy, - struct net_device *dev, - struct ethtool_stats *stats, - u64 *data) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_chanctx_conf *chanctx_conf; - struct ieee80211_channel *channel; - struct sta_info *sta; - struct ieee80211_local *local = sdata->local; - struct station_info sinfo; - struct survey_info survey; - int i, q; -#define STA_STATS_SURVEY_LEN 7 - - memset(data, 0, sizeof(u64) * STA_STATS_LEN); - -#define ADD_STA_STATS(sta) \ - do { \ - data[i++] += sta->rx_packets; \ - data[i++] += sta->rx_bytes; \ - data[i++] += sta->wep_weak_iv_count; \ - data[i++] += sta->num_duplicates; \ - data[i++] += sta->rx_fragments; \ - data[i++] += sta->rx_dropped; \ - \ - data[i++] += sinfo.tx_packets; \ - data[i++] += sinfo.tx_bytes; \ - data[i++] += sta->tx_fragments; \ - data[i++] += sta->tx_filtered_count; \ - data[i++] += sta->tx_retry_failed; \ - data[i++] += sta->tx_retry_count; \ - data[i++] += sta->beacon_loss_count; \ - } while (0) - - /* For Managed stations, find the single station based on BSSID - * and use that. For interface types, iterate through all available - * stations and add stats for any station that is assigned to this - * network device. - */ - - mutex_lock(&local->sta_mtx); - - if (sdata->vif.type == NL80211_IFTYPE_STATION) { - sta = sta_info_get_bss(sdata, sdata->u.mgd.bssid); - - if (!(sta && !WARN_ON(sta->sdata->dev != dev))) - goto do_survey; - - sinfo.filled = 0; - sta_set_sinfo(sta, &sinfo); - - i = 0; - ADD_STA_STATS(sta); - - data[i++] = sta->sta_state; - - - if (sinfo.filled & STATION_INFO_TX_BITRATE) - data[i] = 100000 * - cfg80211_calculate_bitrate(&sinfo.txrate); - i++; - if (sinfo.filled & STATION_INFO_RX_BITRATE) - data[i] = 100000 * - cfg80211_calculate_bitrate(&sinfo.rxrate); - i++; - - if (sinfo.filled & STATION_INFO_SIGNAL_AVG) - data[i] = (u8)sinfo.signal_avg; - i++; - } else { - list_for_each_entry(sta, &local->sta_list, list) { - /* Make sure this station belongs to the proper dev */ - if (sta->sdata->dev != dev) - continue; - - sinfo.filled = 0; - sta_set_sinfo(sta, &sinfo); - i = 0; - ADD_STA_STATS(sta); - } - } - -do_survey: - i = STA_STATS_LEN - STA_STATS_SURVEY_LEN; - /* Get survey stats for current channel */ - survey.filled = 0; - - rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); - if (chanctx_conf) - channel = chanctx_conf->def.chan; - else - channel = NULL; - rcu_read_unlock(); - - if (channel) { - q = 0; - do { - survey.filled = 0; - if (drv_get_survey(local, q, &survey) != 0) { - survey.filled = 0; - break; - } - q++; - } while (channel != survey.channel); - } - - if (survey.filled) - data[i++] = survey.channel->center_freq; - else - data[i++] = 0; - if (survey.filled & SURVEY_INFO_NOISE_DBM) - data[i++] = (u8)survey.noise; - else - data[i++] = -1LL; - if (survey.filled & SURVEY_INFO_CHANNEL_TIME) - data[i++] = survey.channel_time; - else - data[i++] = -1LL; - if (survey.filled & SURVEY_INFO_CHANNEL_TIME_BUSY) - data[i++] = survey.channel_time_busy; - else - data[i++] = -1LL; - if (survey.filled & SURVEY_INFO_CHANNEL_TIME_EXT_BUSY) - data[i++] = survey.channel_time_ext_busy; - else - data[i++] = -1LL; - if (survey.filled & SURVEY_INFO_CHANNEL_TIME_RX) - data[i++] = survey.channel_time_rx; - else - data[i++] = -1LL; - if (survey.filled & SURVEY_INFO_CHANNEL_TIME_TX) - data[i++] = survey.channel_time_tx; - else - data[i++] = -1LL; - - mutex_unlock(&local->sta_mtx); - - if (WARN_ON(i != STA_STATS_LEN)) - return; - - drv_get_et_stats(sdata, stats, &(data[STA_STATS_LEN])); -} - -static void ieee80211_get_et_strings(struct wiphy *wiphy, - struct net_device *dev, - u32 sset, u8 *data) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - int sz_sta_stats = 0; - - if (sset == ETH_SS_STATS) { - sz_sta_stats = sizeof(ieee80211_gstrings_sta_stats); - memcpy(data, ieee80211_gstrings_sta_stats, sz_sta_stats); - } - drv_get_et_strings(sdata, sset, &(data[sz_sta_stats])); -} - static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev, - int idx, u8 *mac, struct station_info *sinfo) + int idx, u8 *mac, struct station_info *sinfo) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; @@ -798,7 +499,7 @@ static int ieee80211_dump_survey(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev, - u8 *mac, struct station_info *sinfo) + const u8 *mac, struct station_info *sinfo) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; @@ -853,7 +554,8 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy, } static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata, - const u8 *resp, size_t resp_len) + const u8 *resp, size_t resp_len, + const struct ieee80211_csa_settings *csa) { struct probe_resp *new, *old; @@ -869,6 +571,11 @@ static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata, new->len = resp_len; memcpy(new->data, resp, resp_len); + if (csa) + memcpy(new->csa_counter_offsets, csa->counter_offsets_presp, + csa->n_counter_offsets_presp * + sizeof(new->csa_counter_offsets[0])); + rcu_assign_pointer(sdata->u.ap.probe_resp, new); if (old) kfree_rcu(old, rcu_head); @@ -877,7 +584,8 @@ static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata, } static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, - struct cfg80211_beacon_data *params) + struct cfg80211_beacon_data *params, + const struct ieee80211_csa_settings *csa) { struct beacon_data *new, *old; int new_head_len, new_tail_len; @@ -921,6 +629,13 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, new->head_len = new_head_len; new->tail_len = new_tail_len; + if (csa) { + new->csa_current_counter = csa->count; + memcpy(new->csa_counter_offsets, csa->counter_offsets_beacon, + csa->n_counter_offsets_beacon * + sizeof(new->csa_counter_offsets[0])); + } + /* copy in head */ if (params->head) memcpy(new->head, params->head, new_head_len); @@ -935,7 +650,7 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, memcpy(new->tail, old->tail, new_tail_len); err = ieee80211_set_probe_resp(sdata, params->probe_resp, - params->probe_resp_len); + params->probe_resp_len, csa); if (err < 0) return err; if (err == 0) @@ -972,13 +687,13 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, sdata->needed_rx_chains = sdata->local->rx_chains; mutex_lock(&local->mtx); - sdata->radar_required = params->radar_required; err = ieee80211_vif_use_channel(sdata, ¶ms->chandef, IEEE80211_CHANCTX_SHARED); + if (!err) + ieee80211_vif_copy_chanctx_to_vlans(sdata, false); mutex_unlock(&local->mtx); if (err) return err; - ieee80211_vif_copy_chanctx_to_vlans(sdata, false); /* * Apply control port protocol, this allows us to @@ -1020,7 +735,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow |= IEEE80211_P2P_OPPPS_ENABLE_BIT; - err = ieee80211_assign_beacon(sdata, ¶ms->beacon); + err = ieee80211_assign_beacon(sdata, ¶ms->beacon, NULL); if (err < 0) { ieee80211_vif_release_channel(sdata); return err; @@ -1068,7 +783,7 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev, if (!old) return -ENOENT; - err = ieee80211_assign_beacon(sdata, params); + err = ieee80211_assign_beacon(sdata, params, NULL); if (err < 0) return err; ieee80211_bss_info_change_notify(sdata, err); @@ -1092,7 +807,16 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) old_probe_resp = sdata_dereference(sdata->u.ap.probe_resp, sdata); /* abort any running channel switch */ + mutex_lock(&local->mtx); sdata->vif.csa_active = false; + if (sdata->csa_block_tx) { + ieee80211_wake_vif_queues(local, sdata, + IEEE80211_QUEUE_STOP_REASON_CSA); + sdata->csa_block_tx = false; + } + + mutex_unlock(&local->mtx); + kfree(sdata->u.ap.next_beacon); sdata->u.ap.next_beacon = NULL; @@ -1131,8 +855,8 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) local->total_ps_buffered -= skb_queue_len(&sdata->u.ap.ps.bc_buf); skb_queue_purge(&sdata->u.ap.ps.bc_buf); - ieee80211_vif_copy_chanctx_to_vlans(sdata, true); mutex_lock(&local->mtx); + ieee80211_vif_copy_chanctx_to_vlans(sdata, true); ieee80211_vif_release_channel(sdata); mutex_unlock(&local->mtx); @@ -1273,9 +997,12 @@ static int sta_apply_parameters(struct ieee80211_local *local, } } - ret = sta_apply_auth_flags(local, sta, mask, set); - if (ret) - return ret; + /* auth flags will be set later for TDLS stations */ + if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { + ret = sta_apply_auth_flags(local, sta, mask, set); + if (ret) + return ret; + } if (mask & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE)) { if (set & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE)) @@ -1412,11 +1139,19 @@ static int sta_apply_parameters(struct ieee80211_local *local, #endif } + /* set the STA state after all sta info from usermode has been set */ + if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { + ret = sta_apply_auth_flags(local, sta, mask, set); + if (ret) + return ret; + } + return 0; } static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, - u8 *mac, struct station_parameters *params) + const u8 *mac, + struct station_parameters *params) { struct ieee80211_local *local = wiphy_priv(wiphy); struct sta_info *sta; @@ -1450,6 +1185,8 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) { sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); + } else { + sta->sta.tdls = true; } err = sta_apply_parameters(local, sta, params); @@ -1483,7 +1220,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, - u8 *mac) + const u8 *mac) { struct ieee80211_sub_if_data *sdata; @@ -1497,7 +1234,7 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_change_station(struct wiphy *wiphy, - struct net_device *dev, u8 *mac, + struct net_device *dev, const u8 *mac, struct station_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -1566,7 +1303,7 @@ static int ieee80211_change_station(struct wiphy *wiphy, if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && sta->sdata->u.vlan.sta) { - rcu_assign_pointer(sta->sdata->u.vlan.sta, NULL); + RCU_INIT_POINTER(sta->sdata->u.vlan.sta, NULL); prev_4addr = true; } @@ -1622,7 +1359,7 @@ out_err: #ifdef CONFIG_MAC80211_MESH static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev, - u8 *dst, u8 *next_hop) + const u8 *dst, const u8 *next_hop) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; @@ -1650,7 +1387,7 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_del_mpath(struct wiphy *wiphy, struct net_device *dev, - u8 *dst) + const u8 *dst) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -1661,9 +1398,8 @@ static int ieee80211_del_mpath(struct wiphy *wiphy, struct net_device *dev, return 0; } -static int ieee80211_change_mpath(struct wiphy *wiphy, - struct net_device *dev, - u8 *dst, u8 *next_hop) +static int ieee80211_change_mpath(struct wiphy *wiphy, struct net_device *dev, + const u8 *dst, const u8 *next_hop) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; @@ -1755,8 +1491,8 @@ static int ieee80211_get_mpath(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev, - int idx, u8 *dst, u8 *next_hop, - struct mpath_info *pinfo) + int idx, u8 *dst, u8 *next_hop, + struct mpath_info *pinfo) { struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; @@ -2930,7 +2666,6 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy, /* whatever, but channel contexts should not complain about that one */ sdata->smps_mode = IEEE80211_SMPS_OFF; sdata->needed_rx_chains = local->rx_chains; - sdata->radar_required = true; err = ieee80211_vif_use_channel(sdata, chandef, IEEE80211_CHANCTX_SHARED); @@ -3011,62 +2746,106 @@ void ieee80211_csa_finish(struct ieee80211_vif *vif) } EXPORT_SYMBOL(ieee80211_csa_finish); -static void ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata) +static int ieee80211_set_after_csa_beacon(struct ieee80211_sub_if_data *sdata, + u32 *changed) { - struct ieee80211_local *local = sdata->local; - int err, changed = 0; - - sdata_assert_lock(sdata); - - mutex_lock(&local->mtx); - sdata->radar_required = sdata->csa_radar_required; - err = ieee80211_vif_change_channel(sdata, &changed); - mutex_unlock(&local->mtx); - if (WARN_ON(err < 0)) - return; - - if (!local->use_chanctx) { - local->_oper_chandef = sdata->csa_chandef; - ieee80211_hw_config(local, 0); - } + int err; - sdata->vif.csa_active = false; switch (sdata->vif.type) { case NL80211_IFTYPE_AP: - err = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon); + err = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon, + NULL); kfree(sdata->u.ap.next_beacon); sdata->u.ap.next_beacon = NULL; if (err < 0) - return; - changed |= err; + return err; + *changed |= err; break; case NL80211_IFTYPE_ADHOC: err = ieee80211_ibss_finish_csa(sdata); if (err < 0) - return; - changed |= err; + return err; + *changed |= err; break; #ifdef CONFIG_MAC80211_MESH case NL80211_IFTYPE_MESH_POINT: err = ieee80211_mesh_finish_csa(sdata); if (err < 0) - return; - changed |= err; + return err; + *changed |= err; break; #endif default: WARN_ON(1); - return; + return -EINVAL; } - ieee80211_bss_info_change_notify(sdata, changed); + return 0; +} - ieee80211_wake_queues_by_reason(&sdata->local->hw, - IEEE80211_MAX_QUEUE_MAP, - IEEE80211_QUEUE_STOP_REASON_CSA); +static int __ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + u32 changed = 0; + int err; + + sdata_assert_lock(sdata); + lockdep_assert_held(&local->mtx); + lockdep_assert_held(&local->chanctx_mtx); + /* + * using reservation isn't immediate as it may be deferred until later + * with multi-vif. once reservation is complete it will re-schedule the + * work with no reserved_chanctx so verify chandef to check if it + * completed successfully + */ + + if (sdata->reserved_chanctx) { + /* + * with multi-vif csa driver may call ieee80211_csa_finish() + * many times while waiting for other interfaces to use their + * reservations + */ + if (sdata->reserved_ready) + return 0; + + err = ieee80211_vif_use_reserved_context(sdata); + if (err) + return err; + + return 0; + } + + if (!cfg80211_chandef_identical(&sdata->vif.bss_conf.chandef, + &sdata->csa_chandef)) + return -EINVAL; + + sdata->vif.csa_active = false; + + err = ieee80211_set_after_csa_beacon(sdata, &changed); + if (err) + return err; + + ieee80211_bss_info_change_notify(sdata, changed); cfg80211_ch_switch_notify(sdata->dev, &sdata->csa_chandef); + + if (sdata->csa_block_tx) { + ieee80211_wake_vif_queues(local, sdata, + IEEE80211_QUEUE_STOP_REASON_CSA); + sdata->csa_block_tx = false; + } + + return 0; +} + +static void ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata) +{ + if (__ieee80211_csa_finalize(sdata)) { + sdata_info(sdata, "failed to finalize CSA, disconnecting\n"); + cfg80211_stop_iface(sdata->local->hw.wiphy, &sdata->wdev, + GFP_KERNEL); + } } void ieee80211_csa_finalize_work(struct work_struct *work) @@ -3074,8 +2853,12 @@ void ieee80211_csa_finalize_work(struct work_struct *work) struct ieee80211_sub_if_data *sdata = container_of(work, struct ieee80211_sub_if_data, csa_finalize_work); + struct ieee80211_local *local = sdata->local; sdata_lock(sdata); + mutex_lock(&local->mtx); + mutex_lock(&local->chanctx_mtx); + /* AP might have been stopped while waiting for the lock. */ if (!sdata->vif.csa_active) goto unlock; @@ -3086,6 +2869,8 @@ void ieee80211_csa_finalize_work(struct work_struct *work) ieee80211_csa_finalize(sdata); unlock: + mutex_unlock(&local->chanctx_mtx); + mutex_unlock(&local->mtx); sdata_unlock(sdata); } @@ -3093,6 +2878,7 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata, struct cfg80211_csa_settings *params, u32 *changed) { + struct ieee80211_csa_settings csa = {}; int err; switch (sdata->vif.type) { @@ -3121,10 +2907,19 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata, if (params->count <= 1) break; - sdata->csa_counter_offset_beacon = - params->counter_offset_beacon; - sdata->csa_counter_offset_presp = params->counter_offset_presp; - err = ieee80211_assign_beacon(sdata, ¶ms->beacon_csa); + if ((params->n_counter_offsets_beacon > + IEEE80211_MAX_CSA_COUNTERS_NUM) || + (params->n_counter_offsets_presp > + IEEE80211_MAX_CSA_COUNTERS_NUM)) + return -EINVAL; + + csa.counter_offsets_beacon = params->counter_offsets_beacon; + csa.counter_offsets_presp = params->counter_offsets_presp; + csa.n_counter_offsets_beacon = params->n_counter_offsets_beacon; + csa.n_counter_offsets_presp = params->n_counter_offsets_presp; + csa.count = params->count; + + err = ieee80211_assign_beacon(sdata, ¶ms->beacon_csa, &csa); if (err < 0) { kfree(sdata->u.ap.next_beacon); return err; @@ -3212,16 +3007,18 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata, return 0; } -int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, - struct cfg80211_csa_settings *params) +static int +__ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_csa_settings *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; - struct ieee80211_chanctx_conf *chanctx_conf; + struct ieee80211_chanctx_conf *conf; struct ieee80211_chanctx *chanctx; - int err, num_chanctx, changed = 0; + int err, changed = 0; sdata_assert_lock(sdata); + lockdep_assert_held(&local->mtx); if (!list_empty(&local->roc_list) || local->scanning) return -EBUSY; @@ -3233,45 +3030,51 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, &sdata->vif.bss_conf.chandef)) return -EINVAL; - rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); - if (!chanctx_conf) { - rcu_read_unlock(); + /* don't allow another channel switch if one is already active. */ + if (sdata->vif.csa_active) return -EBUSY; - } - /* don't handle for multi-VIF cases */ - chanctx = container_of(chanctx_conf, struct ieee80211_chanctx, conf); - if (chanctx->refcount > 1) { - rcu_read_unlock(); - return -EBUSY; + mutex_lock(&local->chanctx_mtx); + conf = rcu_dereference_protected(sdata->vif.chanctx_conf, + lockdep_is_held(&local->chanctx_mtx)); + if (!conf) { + err = -EBUSY; + goto out; } - num_chanctx = 0; - list_for_each_entry_rcu(chanctx, &local->chanctx_list, list) - num_chanctx++; - rcu_read_unlock(); - - if (num_chanctx > 1) - return -EBUSY; - /* don't allow another channel switch if one is already active. */ - if (sdata->vif.csa_active) - return -EBUSY; + chanctx = container_of(conf, struct ieee80211_chanctx, conf); + if (!chanctx) { + err = -EBUSY; + goto out; + } - err = ieee80211_set_csa_beacon(sdata, params, &changed); + err = ieee80211_vif_reserve_chanctx(sdata, ¶ms->chandef, + chanctx->mode, + params->radar_required); if (err) - return err; + goto out; - sdata->csa_radar_required = params->radar_required; + /* if reservation is invalid then this will fail */ + err = ieee80211_check_combinations(sdata, NULL, chanctx->mode, 0); + if (err) { + ieee80211_vif_unreserve_chanctx(sdata); + goto out; + } - if (params->block_tx) - ieee80211_stop_queues_by_reason(&local->hw, - IEEE80211_MAX_QUEUE_MAP, - IEEE80211_QUEUE_STOP_REASON_CSA); + err = ieee80211_set_csa_beacon(sdata, params, &changed); + if (err) { + ieee80211_vif_unreserve_chanctx(sdata); + goto out; + } sdata->csa_chandef = params->chandef; + sdata->csa_block_tx = params->block_tx; sdata->vif.csa_active = true; + if (sdata->csa_block_tx) + ieee80211_stop_vif_queues(local, sdata, + IEEE80211_QUEUE_STOP_REASON_CSA); + if (changed) { ieee80211_bss_info_change_notify(sdata, changed); drv_channel_switch_beacon(sdata, ¶ms->chandef); @@ -3280,7 +3083,23 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, ieee80211_csa_finalize(sdata); } - return 0; +out: + mutex_unlock(&local->chanctx_mtx); + return err; +} + +int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_csa_settings *params) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + int err; + + mutex_lock(&local->mtx); + err = __ieee80211_channel_switch(wiphy, dev, params); + mutex_unlock(&local->mtx); + + return err; } static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, @@ -3295,6 +3114,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, bool need_offchan = false; u32 flags; int ret; + u8 *data; if (params->dont_wait_for_ack) flags = IEEE80211_TX_CTL_NO_ACK; @@ -3388,7 +3208,33 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, } skb_reserve(skb, local->hw.extra_tx_headroom); - memcpy(skb_put(skb, params->len), params->buf, params->len); + data = skb_put(skb, params->len); + memcpy(data, params->buf, params->len); + + /* Update CSA counters */ + if (sdata->vif.csa_active && + (sdata->vif.type == NL80211_IFTYPE_AP || + sdata->vif.type == NL80211_IFTYPE_ADHOC) && + params->n_csa_offsets) { + int i; + struct beacon_data *beacon = NULL; + + rcu_read_lock(); + + if (sdata->vif.type == NL80211_IFTYPE_AP) + beacon = rcu_dereference(sdata->u.ap.beacon); + else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) + beacon = rcu_dereference(sdata->u.ibss.presp); + else if (ieee80211_vif_is_mesh(&sdata->vif)) + beacon = rcu_dereference(sdata->u.mesh.beacon); + + if (beacon) + for (i = 0; i < params->n_csa_offsets; i++) + data[params->csa_offsets[i]] = + beacon->csa_current_counter; + + rcu_read_unlock(); + } IEEE80211_SKB_CB(skb)->flags = flags; @@ -3467,21 +3313,6 @@ static int ieee80211_get_antenna(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant) return drv_get_antenna(local, tx_ant, rx_ant); } -static int ieee80211_set_ringparam(struct wiphy *wiphy, u32 tx, u32 rx) -{ - struct ieee80211_local *local = wiphy_priv(wiphy); - - return drv_set_ringparam(local, tx, rx); -} - -static void ieee80211_get_ringparam(struct wiphy *wiphy, - u32 *tx, u32 *tx_max, u32 *rx, u32 *rx_max) -{ - struct ieee80211_local *local = wiphy_priv(wiphy); - - drv_get_ringparam(local, tx, tx_max, rx, rx_max); -} - static int ieee80211_set_rekey_data(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_gtk_rekey_data *data) @@ -3497,320 +3328,6 @@ static int ieee80211_set_rekey_data(struct wiphy *wiphy, return 0; } -static void ieee80211_tdls_add_ext_capab(struct sk_buff *skb) -{ - u8 *pos = (void *)skb_put(skb, 7); - - *pos++ = WLAN_EID_EXT_CAPABILITY; - *pos++ = 5; /* len */ - *pos++ = 0x0; - *pos++ = 0x0; - *pos++ = 0x0; - *pos++ = 0x0; - *pos++ = WLAN_EXT_CAPA5_TDLS_ENABLED; -} - -static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata) -{ - struct ieee80211_local *local = sdata->local; - u16 capab; - - capab = 0; - if (ieee80211_get_sdata_band(sdata) != IEEE80211_BAND_2GHZ) - return capab; - - if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE)) - capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME; - if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE)) - capab |= WLAN_CAPABILITY_SHORT_PREAMBLE; - - return capab; -} - -static void ieee80211_tdls_add_link_ie(struct sk_buff *skb, u8 *src_addr, - u8 *peer, u8 *bssid) -{ - struct ieee80211_tdls_lnkie *lnkid; - - lnkid = (void *)skb_put(skb, sizeof(struct ieee80211_tdls_lnkie)); - - lnkid->ie_type = WLAN_EID_LINK_ID; - lnkid->ie_len = sizeof(struct ieee80211_tdls_lnkie) - 2; - - memcpy(lnkid->bssid, bssid, ETH_ALEN); - memcpy(lnkid->init_sta, src_addr, ETH_ALEN); - memcpy(lnkid->resp_sta, peer, ETH_ALEN); -} - -static int -ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev, - u8 *peer, u8 action_code, u8 dialog_token, - u16 status_code, struct sk_buff *skb) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - enum ieee80211_band band = ieee80211_get_sdata_band(sdata); - struct ieee80211_tdls_data *tf; - - tf = (void *)skb_put(skb, offsetof(struct ieee80211_tdls_data, u)); - - memcpy(tf->da, peer, ETH_ALEN); - memcpy(tf->sa, sdata->vif.addr, ETH_ALEN); - tf->ether_type = cpu_to_be16(ETH_P_TDLS); - tf->payload_type = WLAN_TDLS_SNAP_RFTYPE; - - switch (action_code) { - case WLAN_TDLS_SETUP_REQUEST: - tf->category = WLAN_CATEGORY_TDLS; - tf->action_code = WLAN_TDLS_SETUP_REQUEST; - - skb_put(skb, sizeof(tf->u.setup_req)); - tf->u.setup_req.dialog_token = dialog_token; - tf->u.setup_req.capability = - cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); - - ieee80211_add_srates_ie(sdata, skb, false, band); - ieee80211_add_ext_srates_ie(sdata, skb, false, band); - ieee80211_tdls_add_ext_capab(skb); - break; - case WLAN_TDLS_SETUP_RESPONSE: - tf->category = WLAN_CATEGORY_TDLS; - tf->action_code = WLAN_TDLS_SETUP_RESPONSE; - - skb_put(skb, sizeof(tf->u.setup_resp)); - tf->u.setup_resp.status_code = cpu_to_le16(status_code); - tf->u.setup_resp.dialog_token = dialog_token; - tf->u.setup_resp.capability = - cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); - - ieee80211_add_srates_ie(sdata, skb, false, band); - ieee80211_add_ext_srates_ie(sdata, skb, false, band); - ieee80211_tdls_add_ext_capab(skb); - break; - case WLAN_TDLS_SETUP_CONFIRM: - tf->category = WLAN_CATEGORY_TDLS; - tf->action_code = WLAN_TDLS_SETUP_CONFIRM; - - skb_put(skb, sizeof(tf->u.setup_cfm)); - tf->u.setup_cfm.status_code = cpu_to_le16(status_code); - tf->u.setup_cfm.dialog_token = dialog_token; - break; - case WLAN_TDLS_TEARDOWN: - tf->category = WLAN_CATEGORY_TDLS; - tf->action_code = WLAN_TDLS_TEARDOWN; - - skb_put(skb, sizeof(tf->u.teardown)); - tf->u.teardown.reason_code = cpu_to_le16(status_code); - break; - case WLAN_TDLS_DISCOVERY_REQUEST: - tf->category = WLAN_CATEGORY_TDLS; - tf->action_code = WLAN_TDLS_DISCOVERY_REQUEST; - - skb_put(skb, sizeof(tf->u.discover_req)); - tf->u.discover_req.dialog_token = dialog_token; - break; - default: - return -EINVAL; - } - - return 0; -} - -static int -ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev, - u8 *peer, u8 action_code, u8 dialog_token, - u16 status_code, struct sk_buff *skb) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - enum ieee80211_band band = ieee80211_get_sdata_band(sdata); - struct ieee80211_mgmt *mgmt; - - mgmt = (void *)skb_put(skb, 24); - memset(mgmt, 0, 24); - memcpy(mgmt->da, peer, ETH_ALEN); - memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); - memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN); - - mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | - IEEE80211_STYPE_ACTION); - - switch (action_code) { - case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: - skb_put(skb, 1 + sizeof(mgmt->u.action.u.tdls_discover_resp)); - mgmt->u.action.category = WLAN_CATEGORY_PUBLIC; - mgmt->u.action.u.tdls_discover_resp.action_code = - WLAN_PUB_ACTION_TDLS_DISCOVER_RES; - mgmt->u.action.u.tdls_discover_resp.dialog_token = - dialog_token; - mgmt->u.action.u.tdls_discover_resp.capability = - cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); - - ieee80211_add_srates_ie(sdata, skb, false, band); - ieee80211_add_ext_srates_ie(sdata, skb, false, band); - ieee80211_tdls_add_ext_capab(skb); - break; - default: - return -EINVAL; - } - - return 0; -} - -static int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, - u8 *peer, u8 action_code, u8 dialog_token, - u16 status_code, u32 peer_capability, - const u8 *extra_ies, size_t extra_ies_len) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_local *local = sdata->local; - struct sk_buff *skb = NULL; - bool send_direct; - int ret; - - if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS)) - return -ENOTSUPP; - - /* make sure we are in managed mode, and associated */ - if (sdata->vif.type != NL80211_IFTYPE_STATION || - !sdata->u.mgd.associated) - return -EINVAL; - - tdls_dbg(sdata, "TDLS mgmt action %d peer %pM\n", - action_code, peer); - - skb = dev_alloc_skb(local->hw.extra_tx_headroom + - max(sizeof(struct ieee80211_mgmt), - sizeof(struct ieee80211_tdls_data)) + - 50 + /* supported rates */ - 7 + /* ext capab */ - extra_ies_len + - sizeof(struct ieee80211_tdls_lnkie)); - if (!skb) - return -ENOMEM; - - skb_reserve(skb, local->hw.extra_tx_headroom); - - switch (action_code) { - case WLAN_TDLS_SETUP_REQUEST: - case WLAN_TDLS_SETUP_RESPONSE: - case WLAN_TDLS_SETUP_CONFIRM: - case WLAN_TDLS_TEARDOWN: - case WLAN_TDLS_DISCOVERY_REQUEST: - ret = ieee80211_prep_tdls_encap_data(wiphy, dev, peer, - action_code, dialog_token, - status_code, skb); - send_direct = false; - break; - case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: - ret = ieee80211_prep_tdls_direct(wiphy, dev, peer, action_code, - dialog_token, status_code, - skb); - send_direct = true; - break; - default: - ret = -ENOTSUPP; - break; - } - - if (ret < 0) - goto fail; - - if (extra_ies_len) - memcpy(skb_put(skb, extra_ies_len), extra_ies, extra_ies_len); - - /* the TDLS link IE is always added last */ - switch (action_code) { - case WLAN_TDLS_SETUP_REQUEST: - case WLAN_TDLS_SETUP_CONFIRM: - case WLAN_TDLS_TEARDOWN: - case WLAN_TDLS_DISCOVERY_REQUEST: - /* we are the initiator */ - ieee80211_tdls_add_link_ie(skb, sdata->vif.addr, peer, - sdata->u.mgd.bssid); - break; - case WLAN_TDLS_SETUP_RESPONSE: - case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: - /* we are the responder */ - ieee80211_tdls_add_link_ie(skb, peer, sdata->vif.addr, - sdata->u.mgd.bssid); - break; - default: - ret = -ENOTSUPP; - goto fail; - } - - if (send_direct) { - ieee80211_tx_skb(sdata, skb); - return 0; - } - - /* - * According to 802.11z: Setup req/resp are sent in AC_BK, otherwise - * we should default to AC_VI. - */ - switch (action_code) { - case WLAN_TDLS_SETUP_REQUEST: - case WLAN_TDLS_SETUP_RESPONSE: - skb_set_queue_mapping(skb, IEEE80211_AC_BK); - skb->priority = 2; - break; - default: - skb_set_queue_mapping(skb, IEEE80211_AC_VI); - skb->priority = 5; - break; - } - - /* disable bottom halves when entering the Tx path */ - local_bh_disable(); - ret = ieee80211_subif_start_xmit(skb, dev); - local_bh_enable(); - - return ret; - -fail: - dev_kfree_skb(skb); - return ret; -} - -static int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, - u8 *peer, enum nl80211_tdls_operation oper) -{ - struct sta_info *sta; - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS)) - return -ENOTSUPP; - - if (sdata->vif.type != NL80211_IFTYPE_STATION) - return -EINVAL; - - tdls_dbg(sdata, "TDLS oper %d peer %pM\n", oper, peer); - - switch (oper) { - case NL80211_TDLS_ENABLE_LINK: - rcu_read_lock(); - sta = sta_info_get(sdata, peer); - if (!sta) { - rcu_read_unlock(); - return -ENOLINK; - } - - set_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH); - rcu_read_unlock(); - break; - case NL80211_TDLS_DISABLE_LINK: - return sta_info_destroy_addr(sdata, peer); - case NL80211_TDLS_TEARDOWN: - case NL80211_TDLS_SETUP: - case NL80211_TDLS_DISCOVERY_REQ: - /* We don't support in-driver setup/teardown/discovery */ - return -ENOTSUPP; - default: - return -ENOTSUPP; - } - - return 0; -} - static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, u64 *cookie) { @@ -3949,6 +3466,21 @@ static int ieee80211_set_qos_map(struct wiphy *wiphy, return 0; } +static int ieee80211_set_ap_chanwidth(struct wiphy *wiphy, + struct net_device *dev, + struct cfg80211_chan_def *chandef) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + int ret; + u32 changed = 0; + + ret = ieee80211_vif_change_bandwidth(sdata, chandef, &changed); + if (ret == 0) + ieee80211_bss_info_change_notify(sdata, changed); + + return ret; +} + const struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -4012,8 +3544,6 @@ const struct cfg80211_ops mac80211_config_ops = { .mgmt_frame_register = ieee80211_mgmt_frame_register, .set_antenna = ieee80211_set_antenna, .get_antenna = ieee80211_get_antenna, - .set_ringparam = ieee80211_set_ringparam, - .get_ringparam = ieee80211_get_ringparam, .set_rekey_data = ieee80211_set_rekey_data, .tdls_oper = ieee80211_tdls_oper, .tdls_mgmt = ieee80211_tdls_mgmt, @@ -4022,11 +3552,9 @@ const struct cfg80211_ops mac80211_config_ops = { #ifdef CONFIG_PM .set_wakeup = ieee80211_set_wakeup, #endif - .get_et_sset_count = ieee80211_get_et_sset_count, - .get_et_stats = ieee80211_get_et_stats, - .get_et_strings = ieee80211_get_et_strings, .get_channel = ieee80211_cfg_get_channel, .start_radar_detection = ieee80211_start_radar_detection, .channel_switch = ieee80211_channel_switch, .set_qos_map = ieee80211_set_qos_map, + .set_ap_chanwidth = ieee80211_set_ap_chanwidth, }; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 75b5dd2c9267..6d537f03c0ba 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -9,6 +9,187 @@ #include "ieee80211_i.h" #include "driver-ops.h" +static int ieee80211_chanctx_num_assigned(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx) +{ + struct ieee80211_sub_if_data *sdata; + int num = 0; + + lockdep_assert_held(&local->chanctx_mtx); + + list_for_each_entry(sdata, &ctx->assigned_vifs, assigned_chanctx_list) + num++; + + return num; +} + +static int ieee80211_chanctx_num_reserved(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx) +{ + struct ieee80211_sub_if_data *sdata; + int num = 0; + + lockdep_assert_held(&local->chanctx_mtx); + + list_for_each_entry(sdata, &ctx->reserved_vifs, reserved_chanctx_list) + num++; + + return num; +} + +int ieee80211_chanctx_refcount(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx) +{ + return ieee80211_chanctx_num_assigned(local, ctx) + + ieee80211_chanctx_num_reserved(local, ctx); +} + +static int ieee80211_num_chanctx(struct ieee80211_local *local) +{ + struct ieee80211_chanctx *ctx; + int num = 0; + + lockdep_assert_held(&local->chanctx_mtx); + + list_for_each_entry(ctx, &local->chanctx_list, list) + num++; + + return num; +} + +static bool ieee80211_can_create_new_chanctx(struct ieee80211_local *local) +{ + lockdep_assert_held(&local->chanctx_mtx); + return ieee80211_num_chanctx(local) < ieee80211_max_num_channels(local); +} + +static struct ieee80211_chanctx * +ieee80211_vif_get_chanctx(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local __maybe_unused = sdata->local; + struct ieee80211_chanctx_conf *conf; + + conf = rcu_dereference_protected(sdata->vif.chanctx_conf, + lockdep_is_held(&local->chanctx_mtx)); + if (!conf) + return NULL; + + return container_of(conf, struct ieee80211_chanctx, conf); +} + +static const struct cfg80211_chan_def * +ieee80211_chanctx_reserved_chandef(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx, + const struct cfg80211_chan_def *compat) +{ + struct ieee80211_sub_if_data *sdata; + + lockdep_assert_held(&local->chanctx_mtx); + + list_for_each_entry(sdata, &ctx->reserved_vifs, + reserved_chanctx_list) { + if (!compat) + compat = &sdata->reserved_chandef; + + compat = cfg80211_chandef_compatible(&sdata->reserved_chandef, + compat); + if (!compat) + break; + } + + return compat; +} + +static const struct cfg80211_chan_def * +ieee80211_chanctx_non_reserved_chandef(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx, + const struct cfg80211_chan_def *compat) +{ + struct ieee80211_sub_if_data *sdata; + + lockdep_assert_held(&local->chanctx_mtx); + + list_for_each_entry(sdata, &ctx->assigned_vifs, + assigned_chanctx_list) { + if (sdata->reserved_chanctx != NULL) + continue; + + if (!compat) + compat = &sdata->vif.bss_conf.chandef; + + compat = cfg80211_chandef_compatible( + &sdata->vif.bss_conf.chandef, compat); + if (!compat) + break; + } + + return compat; +} + +static const struct cfg80211_chan_def * +ieee80211_chanctx_combined_chandef(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx, + const struct cfg80211_chan_def *compat) +{ + lockdep_assert_held(&local->chanctx_mtx); + + compat = ieee80211_chanctx_reserved_chandef(local, ctx, compat); + if (!compat) + return NULL; + + compat = ieee80211_chanctx_non_reserved_chandef(local, ctx, compat); + if (!compat) + return NULL; + + return compat; +} + +static bool +ieee80211_chanctx_can_reserve_chandef(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx, + const struct cfg80211_chan_def *def) +{ + lockdep_assert_held(&local->chanctx_mtx); + + if (ieee80211_chanctx_combined_chandef(local, ctx, def)) + return true; + + if (!list_empty(&ctx->reserved_vifs) && + ieee80211_chanctx_reserved_chandef(local, ctx, def)) + return true; + + return false; +} + +static struct ieee80211_chanctx * +ieee80211_find_reservation_chanctx(struct ieee80211_local *local, + const struct cfg80211_chan_def *chandef, + enum ieee80211_chanctx_mode mode) +{ + struct ieee80211_chanctx *ctx; + + lockdep_assert_held(&local->chanctx_mtx); + + if (mode == IEEE80211_CHANCTX_EXCLUSIVE) + return NULL; + + list_for_each_entry(ctx, &local->chanctx_list, list) { + if (ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED) + continue; + + if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE) + continue; + + if (!ieee80211_chanctx_can_reserve_chandef(local, ctx, + chandef)) + continue; + + return ctx; + } + + return NULL; +} + static enum nl80211_chan_width ieee80211_get_sta_bw(struct ieee80211_sta *sta) { switch (sta->bandwidth) { @@ -183,6 +364,9 @@ ieee80211_find_chanctx(struct ieee80211_local *local, list_for_each_entry(ctx, &local->chanctx_list, list) { const struct cfg80211_chan_def *compat; + if (ctx->replace_state != IEEE80211_CHANCTX_REPLACE_NONE) + continue; + if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE) continue; @@ -190,6 +374,11 @@ ieee80211_find_chanctx(struct ieee80211_local *local, if (!compat) continue; + compat = ieee80211_chanctx_reserved_chandef(local, ctx, + compat); + if (!compat) + continue; + ieee80211_change_chanctx(local, ctx, compat); return ctx; @@ -217,62 +406,91 @@ static bool ieee80211_is_radar_required(struct ieee80211_local *local) } static struct ieee80211_chanctx * -ieee80211_new_chanctx(struct ieee80211_local *local, - const struct cfg80211_chan_def *chandef, - enum ieee80211_chanctx_mode mode) +ieee80211_alloc_chanctx(struct ieee80211_local *local, + const struct cfg80211_chan_def *chandef, + enum ieee80211_chanctx_mode mode) { struct ieee80211_chanctx *ctx; - u32 changed; - int err; lockdep_assert_held(&local->chanctx_mtx); ctx = kzalloc(sizeof(*ctx) + local->hw.chanctx_data_size, GFP_KERNEL); if (!ctx) - return ERR_PTR(-ENOMEM); + return NULL; + INIT_LIST_HEAD(&ctx->assigned_vifs); + INIT_LIST_HEAD(&ctx->reserved_vifs); ctx->conf.def = *chandef; ctx->conf.rx_chains_static = 1; ctx->conf.rx_chains_dynamic = 1; ctx->mode = mode; ctx->conf.radar_enabled = ieee80211_is_radar_required(local); ieee80211_recalc_chanctx_min_def(local, ctx); + + return ctx; +} + +static int ieee80211_add_chanctx(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx) +{ + u32 changed; + int err; + + lockdep_assert_held(&local->mtx); + lockdep_assert_held(&local->chanctx_mtx); + if (!local->use_chanctx) local->hw.conf.radar_enabled = ctx->conf.radar_enabled; - /* we hold the mutex to prevent idle from changing */ - lockdep_assert_held(&local->mtx); /* turn idle off *before* setting channel -- some drivers need that */ changed = ieee80211_idle_off(local); if (changed) ieee80211_hw_config(local, changed); if (!local->use_chanctx) { - local->_oper_chandef = *chandef; + local->_oper_chandef = ctx->conf.def; ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); } else { err = drv_add_chanctx(local, ctx); if (err) { - kfree(ctx); ieee80211_recalc_idle(local); - return ERR_PTR(err); + return err; } } - /* and keep the mutex held until the new chanctx is on the list */ - list_add_rcu(&ctx->list, &local->chanctx_list); + return 0; +} + +static struct ieee80211_chanctx * +ieee80211_new_chanctx(struct ieee80211_local *local, + const struct cfg80211_chan_def *chandef, + enum ieee80211_chanctx_mode mode) +{ + struct ieee80211_chanctx *ctx; + int err; + lockdep_assert_held(&local->mtx); + lockdep_assert_held(&local->chanctx_mtx); + + ctx = ieee80211_alloc_chanctx(local, chandef, mode); + if (!ctx) + return ERR_PTR(-ENOMEM); + + err = ieee80211_add_chanctx(local, ctx); + if (err) { + kfree(ctx); + return ERR_PTR(err); + } + + list_add_rcu(&ctx->list, &local->chanctx_list); return ctx; } -static void ieee80211_free_chanctx(struct ieee80211_local *local, - struct ieee80211_chanctx *ctx) +static void ieee80211_del_chanctx(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx) { - bool check_single_channel = false; lockdep_assert_held(&local->chanctx_mtx); - WARN_ON_ONCE(ctx->refcount != 0); - if (!local->use_chanctx) { struct cfg80211_chan_def *chandef = &local->_oper_chandef; chandef->width = NL80211_CHAN_WIDTH_20_NOHT; @@ -282,8 +500,9 @@ static void ieee80211_free_chanctx(struct ieee80211_local *local, /* NOTE: Disabling radar is only valid here for * single channel context. To be sure, check it ... */ - if (local->hw.conf.radar_enabled) - check_single_channel = true; + WARN_ON(local->hw.conf.radar_enabled && + !list_empty(&local->chanctx_list)); + local->hw.conf.radar_enabled = false; ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); @@ -291,39 +510,19 @@ static void ieee80211_free_chanctx(struct ieee80211_local *local, drv_remove_chanctx(local, ctx); } - list_del_rcu(&ctx->list); - kfree_rcu(ctx, rcu_head); - - /* throw a warning if this wasn't the only channel context. */ - WARN_ON(check_single_channel && !list_empty(&local->chanctx_list)); - ieee80211_recalc_idle(local); } -static int ieee80211_assign_vif_chanctx(struct ieee80211_sub_if_data *sdata, - struct ieee80211_chanctx *ctx) +static void ieee80211_free_chanctx(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx) { - struct ieee80211_local *local = sdata->local; - int ret; - lockdep_assert_held(&local->chanctx_mtx); - ret = drv_assign_vif_chanctx(local, sdata, ctx); - if (ret) - return ret; - - rcu_assign_pointer(sdata->vif.chanctx_conf, &ctx->conf); - ctx->refcount++; - - ieee80211_recalc_txpower(sdata); - ieee80211_recalc_chanctx_min_def(local, ctx); - sdata->vif.bss_conf.idle = false; - - if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && - sdata->vif.type != NL80211_IFTYPE_MONITOR) - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE); + WARN_ON_ONCE(ieee80211_chanctx_refcount(local, ctx) != 0); - return 0; + list_del_rcu(&ctx->list); + ieee80211_del_chanctx(local, ctx); + kfree_rcu(ctx, rcu_head); } static void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local, @@ -384,30 +583,58 @@ static void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local, drv_change_chanctx(local, chanctx, IEEE80211_CHANCTX_CHANGE_RADAR); } -static void ieee80211_unassign_vif_chanctx(struct ieee80211_sub_if_data *sdata, - struct ieee80211_chanctx *ctx) +static int ieee80211_assign_vif_chanctx(struct ieee80211_sub_if_data *sdata, + struct ieee80211_chanctx *new_ctx) { struct ieee80211_local *local = sdata->local; + struct ieee80211_chanctx_conf *conf; + struct ieee80211_chanctx *curr_ctx = NULL; + int ret = 0; - lockdep_assert_held(&local->chanctx_mtx); + conf = rcu_dereference_protected(sdata->vif.chanctx_conf, + lockdep_is_held(&local->chanctx_mtx)); - ctx->refcount--; - rcu_assign_pointer(sdata->vif.chanctx_conf, NULL); + if (conf) { + curr_ctx = container_of(conf, struct ieee80211_chanctx, conf); - sdata->vif.bss_conf.idle = true; + drv_unassign_vif_chanctx(local, sdata, curr_ctx); + conf = NULL; + list_del(&sdata->assigned_chanctx_list); + } - if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && - sdata->vif.type != NL80211_IFTYPE_MONITOR) - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE); + if (new_ctx) { + ret = drv_assign_vif_chanctx(local, sdata, new_ctx); + if (ret) + goto out; - drv_unassign_vif_chanctx(local, sdata, ctx); + conf = &new_ctx->conf; + list_add(&sdata->assigned_chanctx_list, + &new_ctx->assigned_vifs); + } - if (ctx->refcount > 0) { - ieee80211_recalc_chanctx_chantype(sdata->local, ctx); - ieee80211_recalc_smps_chanctx(local, ctx); - ieee80211_recalc_radar_chanctx(local, ctx); - ieee80211_recalc_chanctx_min_def(local, ctx); +out: + rcu_assign_pointer(sdata->vif.chanctx_conf, conf); + + sdata->vif.bss_conf.idle = !conf; + + if (curr_ctx && ieee80211_chanctx_num_assigned(local, curr_ctx) > 0) { + ieee80211_recalc_chanctx_chantype(local, curr_ctx); + ieee80211_recalc_smps_chanctx(local, curr_ctx); + ieee80211_recalc_radar_chanctx(local, curr_ctx); + ieee80211_recalc_chanctx_min_def(local, curr_ctx); } + + if (new_ctx && ieee80211_chanctx_num_assigned(local, new_ctx) > 0) { + ieee80211_recalc_txpower(sdata); + ieee80211_recalc_chanctx_min_def(local, new_ctx); + } + + if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && + sdata->vif.type != NL80211_IFTYPE_MONITOR) + ieee80211_bss_info_change_notify(sdata, + BSS_CHANGED_IDLE); + + return ret; } static void __ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata) @@ -415,6 +642,7 @@ static void __ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata) struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx_conf *conf; struct ieee80211_chanctx *ctx; + bool use_reserved_switch = false; lockdep_assert_held(&local->chanctx_mtx); @@ -425,9 +653,23 @@ static void __ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata) ctx = container_of(conf, struct ieee80211_chanctx, conf); - ieee80211_unassign_vif_chanctx(sdata, ctx); - if (ctx->refcount == 0) + if (sdata->reserved_chanctx) { + if (sdata->reserved_chanctx->replace_state == + IEEE80211_CHANCTX_REPLACES_OTHER && + ieee80211_chanctx_num_reserved(local, + sdata->reserved_chanctx) > 1) + use_reserved_switch = true; + + ieee80211_vif_unreserve_chanctx(sdata); + } + + ieee80211_assign_vif_chanctx(sdata, NULL); + if (ieee80211_chanctx_refcount(local, ctx) == 0) ieee80211_free_chanctx(local, ctx); + + /* Unreserving may ready an in-place reservation. */ + if (use_reserved_switch) + ieee80211_vif_use_reserved_switch(local); } void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, @@ -526,6 +768,7 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx *ctx; + u8 radar_detect_width = 0; int ret; lockdep_assert_held(&local->mtx); @@ -533,6 +776,22 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, WARN_ON(sdata->dev && netif_carrier_ok(sdata->dev)); mutex_lock(&local->chanctx_mtx); + + ret = cfg80211_chandef_dfs_required(local->hw.wiphy, + chandef, + sdata->wdev.iftype); + if (ret < 0) + goto out; + if (ret > 0) + radar_detect_width = BIT(chandef->width); + + sdata->radar_required = ret; + + ret = ieee80211_check_combinations(sdata, chandef, mode, + radar_detect_width); + if (ret < 0) + goto out; + __ieee80211_vif_release_channel(sdata); ctx = ieee80211_find_chanctx(local, chandef, mode); @@ -548,7 +807,7 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, ret = ieee80211_assign_vif_chanctx(sdata, ctx); if (ret) { /* if assign fails refcount stays the same */ - if (ctx->refcount == 0) + if (ieee80211_chanctx_refcount(local, ctx) == 0) ieee80211_free_chanctx(local, ctx); goto out; } @@ -560,60 +819,769 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, return ret; } -int ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata, - u32 *changed) +static void +__ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata, + bool clear) { - struct ieee80211_local *local = sdata->local; + struct ieee80211_local *local __maybe_unused = sdata->local; + struct ieee80211_sub_if_data *vlan; struct ieee80211_chanctx_conf *conf; - struct ieee80211_chanctx *ctx; - const struct cfg80211_chan_def *chandef = &sdata->csa_chandef; - int ret; - u32 chanctx_changed = 0; + + if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP)) + return; lockdep_assert_held(&local->mtx); - /* should never be called if not performing a channel switch. */ - if (WARN_ON(!sdata->vif.csa_active)) + /* Check that conf exists, even when clearing this function + * must be called with the AP's channel context still there + * as it would otherwise cause VLANs to have an invalid + * channel context pointer for a while, possibly pointing + * to a channel context that has already been freed. + */ + conf = rcu_dereference_protected(sdata->vif.chanctx_conf, + lockdep_is_held(&local->chanctx_mtx)); + WARN_ON(!conf); + + if (clear) + conf = NULL; + + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + rcu_assign_pointer(vlan->vif.chanctx_conf, conf); +} + +void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata, + bool clear) +{ + struct ieee80211_local *local = sdata->local; + + mutex_lock(&local->chanctx_mtx); + + __ieee80211_vif_copy_chanctx_to_vlans(sdata, clear); + + mutex_unlock(&local->chanctx_mtx); +} + +int ieee80211_vif_unreserve_chanctx(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_chanctx *ctx = sdata->reserved_chanctx; + + lockdep_assert_held(&sdata->local->chanctx_mtx); + + if (WARN_ON(!ctx)) return -EINVAL; - if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef, - IEEE80211_CHAN_DISABLED)) + list_del(&sdata->reserved_chanctx_list); + sdata->reserved_chanctx = NULL; + + if (ieee80211_chanctx_refcount(sdata->local, ctx) == 0) { + if (ctx->replace_state == IEEE80211_CHANCTX_REPLACES_OTHER) { + if (WARN_ON(!ctx->replace_ctx)) + return -EINVAL; + + WARN_ON(ctx->replace_ctx->replace_state != + IEEE80211_CHANCTX_WILL_BE_REPLACED); + WARN_ON(ctx->replace_ctx->replace_ctx != ctx); + + ctx->replace_ctx->replace_ctx = NULL; + ctx->replace_ctx->replace_state = + IEEE80211_CHANCTX_REPLACE_NONE; + + list_del_rcu(&ctx->list); + kfree_rcu(ctx, rcu_head); + } else { + ieee80211_free_chanctx(sdata->local, ctx); + } + } + + return 0; +} + +int ieee80211_vif_reserve_chanctx(struct ieee80211_sub_if_data *sdata, + const struct cfg80211_chan_def *chandef, + enum ieee80211_chanctx_mode mode, + bool radar_required) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_chanctx *new_ctx, *curr_ctx, *ctx; + + lockdep_assert_held(&local->chanctx_mtx); + + curr_ctx = ieee80211_vif_get_chanctx(sdata); + if (curr_ctx && local->use_chanctx && !local->ops->switch_vif_chanctx) + return -ENOTSUPP; + + new_ctx = ieee80211_find_reservation_chanctx(local, chandef, mode); + if (!new_ctx) { + if (ieee80211_can_create_new_chanctx(local)) { + new_ctx = ieee80211_new_chanctx(local, chandef, mode); + if (IS_ERR(new_ctx)) + return PTR_ERR(new_ctx); + } else { + if (!curr_ctx || + (curr_ctx->replace_state == + IEEE80211_CHANCTX_WILL_BE_REPLACED) || + !list_empty(&curr_ctx->reserved_vifs)) { + /* + * Another vif already requested this context + * for a reservation. Find another one hoping + * all vifs assigned to it will also switch + * soon enough. + * + * TODO: This needs a little more work as some + * cases (more than 2 chanctx capable devices) + * may fail which could otherwise succeed + * provided some channel context juggling was + * performed. + * + * Consider ctx1..3, vif1..6, each ctx has 2 + * vifs. vif1 and vif2 from ctx1 request new + * different chandefs starting 2 in-place + * reserations with ctx4 and ctx5 replacing + * ctx1 and ctx2 respectively. Next vif5 and + * vif6 from ctx3 reserve ctx4. If vif3 and + * vif4 remain on ctx2 as they are then this + * fails unless `replace_ctx` from ctx5 is + * replaced with ctx3. + */ + list_for_each_entry(ctx, &local->chanctx_list, + list) { + if (ctx->replace_state != + IEEE80211_CHANCTX_REPLACE_NONE) + continue; + + if (!list_empty(&ctx->reserved_vifs)) + continue; + + curr_ctx = ctx; + break; + } + } + + /* + * If that's true then all available contexts already + * have reservations and cannot be used. + */ + if (!curr_ctx || + (curr_ctx->replace_state == + IEEE80211_CHANCTX_WILL_BE_REPLACED) || + !list_empty(&curr_ctx->reserved_vifs)) + return -EBUSY; + + new_ctx = ieee80211_alloc_chanctx(local, chandef, mode); + if (!new_ctx) + return -ENOMEM; + + new_ctx->replace_ctx = curr_ctx; + new_ctx->replace_state = + IEEE80211_CHANCTX_REPLACES_OTHER; + + curr_ctx->replace_ctx = new_ctx; + curr_ctx->replace_state = + IEEE80211_CHANCTX_WILL_BE_REPLACED; + + list_add_rcu(&new_ctx->list, &local->chanctx_list); + } + } + + list_add(&sdata->reserved_chanctx_list, &new_ctx->reserved_vifs); + sdata->reserved_chanctx = new_ctx; + sdata->reserved_chandef = *chandef; + sdata->reserved_radar_required = radar_required; + sdata->reserved_ready = false; + + return 0; +} + +static void +ieee80211_vif_chanctx_reservation_complete(struct ieee80211_sub_if_data *sdata) +{ + switch (sdata->vif.type) { + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_MESH_POINT: + ieee80211_queue_work(&sdata->local->hw, + &sdata->csa_finalize_work); + break; + case NL80211_IFTYPE_STATION: + ieee80211_queue_work(&sdata->local->hw, + &sdata->u.mgd.chswitch_work); + break; + case NL80211_IFTYPE_UNSPECIFIED: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_WDS: + case NL80211_IFTYPE_MONITOR: + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_P2P_DEVICE: + case NUM_NL80211_IFTYPES: + WARN_ON(1); + break; + } +} + +static int +ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_vif_chanctx_switch vif_chsw[1] = {}; + struct ieee80211_chanctx *old_ctx, *new_ctx; + const struct cfg80211_chan_def *chandef; + u32 changed = 0; + int err; + + lockdep_assert_held(&local->mtx); + lockdep_assert_held(&local->chanctx_mtx); + + new_ctx = sdata->reserved_chanctx; + old_ctx = ieee80211_vif_get_chanctx(sdata); + + if (WARN_ON(!sdata->reserved_ready)) + return -EBUSY; + + if (WARN_ON(!new_ctx)) return -EINVAL; - mutex_lock(&local->chanctx_mtx); - conf = rcu_dereference_protected(sdata->vif.chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); - if (!conf) { - ret = -EINVAL; + if (WARN_ON(!old_ctx)) + return -EINVAL; + + if (WARN_ON(new_ctx->replace_state == + IEEE80211_CHANCTX_REPLACES_OTHER)) + return -EINVAL; + + chandef = ieee80211_chanctx_non_reserved_chandef(local, new_ctx, + &sdata->reserved_chandef); + if (WARN_ON(!chandef)) + return -EINVAL; + + vif_chsw[0].vif = &sdata->vif; + vif_chsw[0].old_ctx = &old_ctx->conf; + vif_chsw[0].new_ctx = &new_ctx->conf; + + list_del(&sdata->reserved_chanctx_list); + sdata->reserved_chanctx = NULL; + + err = drv_switch_vif_chanctx(local, vif_chsw, 1, + CHANCTX_SWMODE_REASSIGN_VIF); + if (err) { + if (ieee80211_chanctx_refcount(local, new_ctx) == 0) + ieee80211_free_chanctx(local, new_ctx); + goto out; } - ctx = container_of(conf, struct ieee80211_chanctx, conf); - if (ctx->refcount != 1) { - ret = -EINVAL; + list_move(&sdata->assigned_chanctx_list, &new_ctx->assigned_vifs); + rcu_assign_pointer(sdata->vif.chanctx_conf, &new_ctx->conf); + + if (sdata->vif.type == NL80211_IFTYPE_AP) + __ieee80211_vif_copy_chanctx_to_vlans(sdata, false); + + if (ieee80211_chanctx_refcount(local, old_ctx) == 0) + ieee80211_free_chanctx(local, old_ctx); + + if (sdata->vif.bss_conf.chandef.width != sdata->reserved_chandef.width) + changed = BSS_CHANGED_BANDWIDTH; + + sdata->vif.bss_conf.chandef = sdata->reserved_chandef; + + if (changed) + ieee80211_bss_info_change_notify(sdata, changed); + +out: + ieee80211_vif_chanctx_reservation_complete(sdata); + return err; +} + +static int +ieee80211_vif_use_reserved_assign(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_chanctx *old_ctx, *new_ctx; + const struct cfg80211_chan_def *chandef; + int err; + + old_ctx = ieee80211_vif_get_chanctx(sdata); + new_ctx = sdata->reserved_chanctx; + + if (WARN_ON(!sdata->reserved_ready)) + return -EINVAL; + + if (WARN_ON(old_ctx)) + return -EINVAL; + + if (WARN_ON(!new_ctx)) + return -EINVAL; + + if (WARN_ON(new_ctx->replace_state == + IEEE80211_CHANCTX_REPLACES_OTHER)) + return -EINVAL; + + chandef = ieee80211_chanctx_non_reserved_chandef(local, new_ctx, + &sdata->reserved_chandef); + if (WARN_ON(!chandef)) + return -EINVAL; + + list_del(&sdata->reserved_chanctx_list); + sdata->reserved_chanctx = NULL; + + err = ieee80211_assign_vif_chanctx(sdata, new_ctx); + if (err) { + if (ieee80211_chanctx_refcount(local, new_ctx) == 0) + ieee80211_free_chanctx(local, new_ctx); + goto out; } - if (sdata->vif.bss_conf.chandef.width != chandef->width) { - chanctx_changed = IEEE80211_CHANCTX_CHANGE_WIDTH; - *changed |= BSS_CHANGED_BANDWIDTH; +out: + ieee80211_vif_chanctx_reservation_complete(sdata); + return err; +} + +static bool +ieee80211_vif_has_in_place_reservation(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_chanctx *old_ctx, *new_ctx; + + lockdep_assert_held(&sdata->local->chanctx_mtx); + + new_ctx = sdata->reserved_chanctx; + old_ctx = ieee80211_vif_get_chanctx(sdata); + + if (!old_ctx) + return false; + + if (WARN_ON(!new_ctx)) + return false; + + if (old_ctx->replace_state != IEEE80211_CHANCTX_WILL_BE_REPLACED) + return false; + + if (new_ctx->replace_state != IEEE80211_CHANCTX_REPLACES_OTHER) + return false; + + return true; +} + +static int ieee80211_chsw_switch_hwconf(struct ieee80211_local *local, + struct ieee80211_chanctx *new_ctx) +{ + const struct cfg80211_chan_def *chandef; + + lockdep_assert_held(&local->mtx); + lockdep_assert_held(&local->chanctx_mtx); + + chandef = ieee80211_chanctx_reserved_chandef(local, new_ctx, NULL); + if (WARN_ON(!chandef)) + return -EINVAL; + + local->hw.conf.radar_enabled = new_ctx->conf.radar_enabled; + local->_oper_chandef = *chandef; + ieee80211_hw_config(local, 0); + + return 0; +} + +static int ieee80211_chsw_switch_vifs(struct ieee80211_local *local, + int n_vifs) +{ + struct ieee80211_vif_chanctx_switch *vif_chsw; + struct ieee80211_sub_if_data *sdata; + struct ieee80211_chanctx *ctx, *old_ctx; + int i, err; + + lockdep_assert_held(&local->mtx); + lockdep_assert_held(&local->chanctx_mtx); + + vif_chsw = kzalloc(sizeof(vif_chsw[0]) * n_vifs, GFP_KERNEL); + if (!vif_chsw) + return -ENOMEM; + + i = 0; + list_for_each_entry(ctx, &local->chanctx_list, list) { + if (ctx->replace_state != IEEE80211_CHANCTX_REPLACES_OTHER) + continue; + + if (WARN_ON(!ctx->replace_ctx)) { + err = -EINVAL; + goto out; + } + + list_for_each_entry(sdata, &ctx->reserved_vifs, + reserved_chanctx_list) { + if (!ieee80211_vif_has_in_place_reservation( + sdata)) + continue; + + old_ctx = ieee80211_vif_get_chanctx(sdata); + vif_chsw[i].vif = &sdata->vif; + vif_chsw[i].old_ctx = &old_ctx->conf; + vif_chsw[i].new_ctx = &ctx->conf; + + i++; + } + } + + err = drv_switch_vif_chanctx(local, vif_chsw, n_vifs, + CHANCTX_SWMODE_SWAP_CONTEXTS); + +out: + kfree(vif_chsw); + return err; +} + +static int ieee80211_chsw_switch_ctxs(struct ieee80211_local *local) +{ + struct ieee80211_chanctx *ctx; + int err; + + lockdep_assert_held(&local->mtx); + lockdep_assert_held(&local->chanctx_mtx); + + list_for_each_entry(ctx, &local->chanctx_list, list) { + if (ctx->replace_state != IEEE80211_CHANCTX_REPLACES_OTHER) + continue; + + if (!list_empty(&ctx->replace_ctx->assigned_vifs)) + continue; + + ieee80211_del_chanctx(local, ctx->replace_ctx); + err = ieee80211_add_chanctx(local, ctx); + if (err) + goto err; + } + + return 0; + +err: + WARN_ON(ieee80211_add_chanctx(local, ctx)); + list_for_each_entry_continue_reverse(ctx, &local->chanctx_list, list) { + if (ctx->replace_state != IEEE80211_CHANCTX_REPLACES_OTHER) + continue; + + if (!list_empty(&ctx->replace_ctx->assigned_vifs)) + continue; + + ieee80211_del_chanctx(local, ctx); + WARN_ON(ieee80211_add_chanctx(local, ctx->replace_ctx)); } - sdata->vif.bss_conf.chandef = *chandef; - ctx->conf.def = *chandef; + return err; +} - chanctx_changed |= IEEE80211_CHANCTX_CHANGE_CHANNEL; - drv_change_chanctx(local, ctx, chanctx_changed); +int +ieee80211_vif_use_reserved_switch(struct ieee80211_local *local) +{ + struct ieee80211_sub_if_data *sdata, *sdata_tmp; + struct ieee80211_chanctx *ctx, *ctx_tmp, *old_ctx; + struct ieee80211_chanctx *new_ctx = NULL; + int i, err, n_assigned, n_reserved, n_ready; + int n_ctx = 0, n_vifs_switch = 0, n_vifs_assign = 0, n_vifs_ctxless = 0; - ieee80211_recalc_chanctx_chantype(local, ctx); - ieee80211_recalc_smps_chanctx(local, ctx); - ieee80211_recalc_radar_chanctx(local, ctx); - ieee80211_recalc_chanctx_min_def(local, ctx); + lockdep_assert_held(&local->mtx); + lockdep_assert_held(&local->chanctx_mtx); - ret = 0; - out: - mutex_unlock(&local->chanctx_mtx); - return ret; + /* + * If there are 2 independent pairs of channel contexts performing + * cross-switch of their vifs this code will still wait until both are + * ready even though it could be possible to switch one before the + * other is ready. + * + * For practical reasons and code simplicity just do a single huge + * switch. + */ + + /* + * Verify if the reservation is still feasible. + * - if it's not then disconnect + * - if it is but not all vifs necessary are ready then defer + */ + + list_for_each_entry(ctx, &local->chanctx_list, list) { + if (ctx->replace_state != IEEE80211_CHANCTX_REPLACES_OTHER) + continue; + + if (WARN_ON(!ctx->replace_ctx)) { + err = -EINVAL; + goto err; + } + + if (!local->use_chanctx) + new_ctx = ctx; + + n_ctx++; + + n_assigned = 0; + n_reserved = 0; + n_ready = 0; + + list_for_each_entry(sdata, &ctx->replace_ctx->assigned_vifs, + assigned_chanctx_list) { + n_assigned++; + if (sdata->reserved_chanctx) { + n_reserved++; + if (sdata->reserved_ready) + n_ready++; + } + } + + if (n_assigned != n_reserved) { + if (n_ready == n_reserved) { + wiphy_info(local->hw.wiphy, + "channel context reservation cannot be finalized because some interfaces aren't switching\n"); + err = -EBUSY; + goto err; + } + + return -EAGAIN; + } + + ctx->conf.radar_enabled = false; + list_for_each_entry(sdata, &ctx->reserved_vifs, + reserved_chanctx_list) { + if (ieee80211_vif_has_in_place_reservation(sdata) && + !sdata->reserved_ready) + return -EAGAIN; + + old_ctx = ieee80211_vif_get_chanctx(sdata); + if (old_ctx) { + if (old_ctx->replace_state == + IEEE80211_CHANCTX_WILL_BE_REPLACED) + n_vifs_switch++; + else + n_vifs_assign++; + } else { + n_vifs_ctxless++; + } + + if (sdata->reserved_radar_required) + ctx->conf.radar_enabled = true; + } + } + + if (WARN_ON(n_ctx == 0) || + WARN_ON(n_vifs_switch == 0 && + n_vifs_assign == 0 && + n_vifs_ctxless == 0) || + WARN_ON(n_ctx > 1 && !local->use_chanctx) || + WARN_ON(!new_ctx && !local->use_chanctx)) { + err = -EINVAL; + goto err; + } + + /* + * All necessary vifs are ready. Perform the switch now depending on + * reservations and driver capabilities. + */ + + if (local->use_chanctx) { + if (n_vifs_switch > 0) { + err = ieee80211_chsw_switch_vifs(local, n_vifs_switch); + if (err) + goto err; + } + + if (n_vifs_assign > 0 || n_vifs_ctxless > 0) { + err = ieee80211_chsw_switch_ctxs(local); + if (err) + goto err; + } + } else { + err = ieee80211_chsw_switch_hwconf(local, new_ctx); + if (err) + goto err; + } + + /* + * Update all structures, values and pointers to point to new channel + * context(s). + */ + + i = 0; + list_for_each_entry(ctx, &local->chanctx_list, list) { + if (ctx->replace_state != IEEE80211_CHANCTX_REPLACES_OTHER) + continue; + + if (WARN_ON(!ctx->replace_ctx)) { + err = -EINVAL; + goto err; + } + + list_for_each_entry(sdata, &ctx->reserved_vifs, + reserved_chanctx_list) { + u32 changed = 0; + + if (!ieee80211_vif_has_in_place_reservation(sdata)) + continue; + + rcu_assign_pointer(sdata->vif.chanctx_conf, &ctx->conf); + + if (sdata->vif.type == NL80211_IFTYPE_AP) + __ieee80211_vif_copy_chanctx_to_vlans(sdata, + false); + + sdata->radar_required = sdata->reserved_radar_required; + + if (sdata->vif.bss_conf.chandef.width != + sdata->reserved_chandef.width) + changed = BSS_CHANGED_BANDWIDTH; + + sdata->vif.bss_conf.chandef = sdata->reserved_chandef; + if (changed) + ieee80211_bss_info_change_notify(sdata, + changed); + + ieee80211_recalc_txpower(sdata); + } + + ieee80211_recalc_chanctx_chantype(local, ctx); + ieee80211_recalc_smps_chanctx(local, ctx); + ieee80211_recalc_radar_chanctx(local, ctx); + ieee80211_recalc_chanctx_min_def(local, ctx); + + list_for_each_entry_safe(sdata, sdata_tmp, &ctx->reserved_vifs, + reserved_chanctx_list) { + if (ieee80211_vif_get_chanctx(sdata) != ctx) + continue; + + list_del(&sdata->reserved_chanctx_list); + list_move(&sdata->assigned_chanctx_list, + &new_ctx->assigned_vifs); + sdata->reserved_chanctx = NULL; + + ieee80211_vif_chanctx_reservation_complete(sdata); + } + + /* + * This context might have been a dependency for an already + * ready re-assign reservation interface that was deferred. Do + * not propagate error to the caller though. The in-place + * reservation for originally requested interface has already + * succeeded at this point. + */ + list_for_each_entry_safe(sdata, sdata_tmp, &ctx->reserved_vifs, + reserved_chanctx_list) { + if (WARN_ON(ieee80211_vif_has_in_place_reservation( + sdata))) + continue; + + if (WARN_ON(sdata->reserved_chanctx != ctx)) + continue; + + if (!sdata->reserved_ready) + continue; + + if (ieee80211_vif_get_chanctx(sdata)) + err = ieee80211_vif_use_reserved_reassign( + sdata); + else + err = ieee80211_vif_use_reserved_assign(sdata); + + if (err) { + sdata_info(sdata, + "failed to finalize (re-)assign reservation (err=%d)\n", + err); + ieee80211_vif_unreserve_chanctx(sdata); + cfg80211_stop_iface(local->hw.wiphy, + &sdata->wdev, + GFP_KERNEL); + } + } + } + + /* + * Finally free old contexts + */ + + list_for_each_entry_safe(ctx, ctx_tmp, &local->chanctx_list, list) { + if (ctx->replace_state != IEEE80211_CHANCTX_WILL_BE_REPLACED) + continue; + + ctx->replace_ctx->replace_ctx = NULL; + ctx->replace_ctx->replace_state = + IEEE80211_CHANCTX_REPLACE_NONE; + + list_del_rcu(&ctx->list); + kfree_rcu(ctx, rcu_head); + } + + return 0; + +err: + list_for_each_entry(ctx, &local->chanctx_list, list) { + if (ctx->replace_state != IEEE80211_CHANCTX_REPLACES_OTHER) + continue; + + list_for_each_entry_safe(sdata, sdata_tmp, &ctx->reserved_vifs, + reserved_chanctx_list) { + ieee80211_vif_unreserve_chanctx(sdata); + ieee80211_vif_chanctx_reservation_complete(sdata); + } + } + + return err; +} + +int ieee80211_vif_use_reserved_context(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_chanctx *new_ctx; + struct ieee80211_chanctx *old_ctx; + int err; + + lockdep_assert_held(&local->mtx); + lockdep_assert_held(&local->chanctx_mtx); + + new_ctx = sdata->reserved_chanctx; + old_ctx = ieee80211_vif_get_chanctx(sdata); + + if (WARN_ON(!new_ctx)) + return -EINVAL; + + if (WARN_ON(new_ctx->replace_state == + IEEE80211_CHANCTX_WILL_BE_REPLACED)) + return -EINVAL; + + if (WARN_ON(sdata->reserved_ready)) + return -EINVAL; + + sdata->reserved_ready = true; + + if (new_ctx->replace_state == IEEE80211_CHANCTX_REPLACE_NONE) { + if (old_ctx) + err = ieee80211_vif_use_reserved_reassign(sdata); + else + err = ieee80211_vif_use_reserved_assign(sdata); + + if (err) + return err; + } + + /* + * In-place reservation may need to be finalized now either if: + * a) sdata is taking part in the swapping itself and is the last one + * b) sdata has switched with a re-assign reservation to an existing + * context readying in-place switching of old_ctx + * + * In case of (b) do not propagate the error up because the requested + * sdata already switched successfully. Just spill an extra warning. + * The ieee80211_vif_use_reserved_switch() already stops all necessary + * interfaces upon failure. + */ + if ((old_ctx && + old_ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED) || + new_ctx->replace_state == IEEE80211_CHANCTX_REPLACES_OTHER) { + err = ieee80211_vif_use_reserved_switch(local); + if (err && err != -EAGAIN) { + if (new_ctx->replace_state == + IEEE80211_CHANCTX_REPLACES_OTHER) + return err; + + wiphy_info(local->hw.wiphy, + "depending in-place reservation failed (err=%d)\n", + err); + } + } + + return 0; } int ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata, @@ -623,6 +1591,7 @@ int ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx_conf *conf; struct ieee80211_chanctx *ctx; + const struct cfg80211_chan_def *compat; int ret; if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef, @@ -649,11 +1618,33 @@ int ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata, } ctx = container_of(conf, struct ieee80211_chanctx, conf); - if (!cfg80211_chandef_compatible(&conf->def, chandef)) { + + compat = cfg80211_chandef_compatible(&conf->def, chandef); + if (!compat) { ret = -EINVAL; goto out; } + switch (ctx->replace_state) { + case IEEE80211_CHANCTX_REPLACE_NONE: + if (!ieee80211_chanctx_reserved_chandef(local, ctx, compat)) { + ret = -EBUSY; + goto out; + } + break; + case IEEE80211_CHANCTX_WILL_BE_REPLACED: + /* TODO: Perhaps the bandwith change could be treated as a + * reservation itself? */ + ret = -EBUSY; + goto out; + case IEEE80211_CHANCTX_REPLACES_OTHER: + /* channel context that is going to replace another channel + * context doesn't really exist and shouldn't be assigned + * anywhere yet */ + WARN_ON(1); + break; + } + sdata->vif.bss_conf.chandef = *chandef; ieee80211_recalc_chanctx_chantype(local, ctx); @@ -695,40 +1686,6 @@ void ieee80211_vif_vlan_copy_chanctx(struct ieee80211_sub_if_data *sdata) mutex_unlock(&local->chanctx_mtx); } -void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata, - bool clear) -{ - struct ieee80211_local *local = sdata->local; - struct ieee80211_sub_if_data *vlan; - struct ieee80211_chanctx_conf *conf; - - ASSERT_RTNL(); - - if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP)) - return; - - mutex_lock(&local->chanctx_mtx); - - /* - * Check that conf exists, even when clearing this function - * must be called with the AP's channel context still there - * as it would otherwise cause VLANs to have an invalid - * channel context pointer for a while, possibly pointing - * to a channel context that has already been freed. - */ - conf = rcu_dereference_protected(sdata->vif.chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); - WARN_ON(!conf); - - if (clear) - conf = NULL; - - list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) - rcu_assign_pointer(vlan->vif.chanctx_conf, conf); - - mutex_unlock(&local->chanctx_mtx); -} - void ieee80211_iter_chan_contexts_atomic( struct ieee80211_hw *hw, void (*iter)(struct ieee80211_hw *hw, diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index fa16e54980a1..0e963bc1ceac 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -128,7 +128,7 @@ static ssize_t sta_tx_latency_stat_write(struct file *file, if (!strcmp(buf, TX_LATENCY_DISABLED)) { if (!tx_latency) goto unlock; - rcu_assign_pointer(local->tx_latency, NULL); + RCU_INIT_POINTER(local->tx_latency, NULL); synchronize_rcu(); kfree(tx_latency); goto unlock; diff --git a/net/mac80211/debugfs.h b/net/mac80211/debugfs.h index 214ed4ecd739..60c35afee29d 100644 --- a/net/mac80211/debugfs.h +++ b/net/mac80211/debugfs.h @@ -1,6 +1,8 @@ #ifndef __MAC80211_DEBUGFS_H #define __MAC80211_DEBUGFS_H +#include "ieee80211_i.h" + #ifdef CONFIG_MAC80211_DEBUGFS void debugfs_hw_add(struct ieee80211_local *local); int __printf(4, 5) mac80211_format_buffer(char __user *userbuf, size_t count, diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 40a648938985..e205ebabfa50 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -34,8 +34,7 @@ static ssize_t ieee80211_if_read( ssize_t ret = -EINVAL; read_lock(&dev_base_lock); - if (sdata->dev->reg_state == NETREG_REGISTERED) - ret = (*format)(sdata, buf, sizeof(buf)); + ret = (*format)(sdata, buf, sizeof(buf)); read_unlock(&dev_base_lock); if (ret >= 0) @@ -62,8 +61,7 @@ static ssize_t ieee80211_if_write( ret = -ENODEV; rtnl_lock(); - if (sdata->dev->reg_state == NETREG_REGISTERED) - ret = (*write)(sdata, buf, count); + ret = (*write)(sdata, buf, count); rtnl_unlock(); return ret; diff --git a/net/mac80211/debugfs_netdev.h b/net/mac80211/debugfs_netdev.h index 79025e79f4d6..9f5501a9a795 100644 --- a/net/mac80211/debugfs_netdev.h +++ b/net/mac80211/debugfs_netdev.h @@ -3,6 +3,8 @@ #ifndef __IEEE80211_DEBUGFS_NETDEV_H #define __IEEE80211_DEBUGFS_NETDEV_H +#include "ieee80211_i.h" + #ifdef CONFIG_MAC80211_DEBUGFS void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata); void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata); diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 2ecb4deddb5d..3db96648b45a 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -124,7 +124,7 @@ static ssize_t sta_connected_time_read(struct file *file, char __user *userbuf, long connected_time_secs; char buf[100]; int res; - do_posix_clock_monotonic_gettime(&uptime); + ktime_get_ts(&uptime); connected_time_secs = uptime.tv_sec - sta->last_connected; time_to_tm(connected_time_secs, 0, &result); result.tm_year -= 70; @@ -587,7 +587,6 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) DEBUGFS_ADD_COUNTER(tx_filtered, tx_filtered_count); DEBUGFS_ADD_COUNTER(tx_retry_failed, tx_retry_failed); DEBUGFS_ADD_COUNTER(tx_retry_count, tx_retry_count); - DEBUGFS_ADD_COUNTER(wep_weak_iv_count, wep_weak_iv_count); if (sizeof(sta->driver_buffered_tids) == sizeof(u32)) debugfs_create_x32("driver_buffered_tids", 0400, diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index fc689f5d971e..11423958116a 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -5,11 +5,11 @@ #include "ieee80211_i.h" #include "trace.h" -static inline void check_sdata_in_driver(struct ieee80211_sub_if_data *sdata) +static inline bool check_sdata_in_driver(struct ieee80211_sub_if_data *sdata) { - WARN(!(sdata->flags & IEEE80211_SDATA_IN_DRIVER), - "%s: Failed check-sdata-in-driver check, flags: 0x%x\n", - sdata->dev ? sdata->dev->name : sdata->name, sdata->flags); + return !WARN(!(sdata->flags & IEEE80211_SDATA_IN_DRIVER), + "%s: Failed check-sdata-in-driver check, flags: 0x%x\n", + sdata->dev ? sdata->dev->name : sdata->name, sdata->flags); } static inline struct ieee80211_sub_if_data * @@ -168,7 +168,8 @@ static inline int drv_change_interface(struct ieee80211_local *local, might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_change_interface(local, sdata, type, p2p); ret = local->ops->change_interface(&local->hw, &sdata->vif, type, p2p); @@ -181,7 +182,8 @@ static inline void drv_remove_interface(struct ieee80211_local *local, { might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_remove_interface(local, sdata); local->ops->remove_interface(&local->hw, &sdata->vif); @@ -219,7 +221,8 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local, sdata->vif.type == NL80211_IFTYPE_MONITOR)) return; - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_bss_info_changed(local, sdata, info, changed); if (local->ops->bss_info_changed) @@ -278,7 +281,8 @@ static inline int drv_set_key(struct ieee80211_local *local, might_sleep(); sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_set_key(local, cmd, sdata, sta, key); ret = local->ops->set_key(&local->hw, cmd, &sdata->vif, sta, key); @@ -298,7 +302,8 @@ static inline void drv_update_tkip_key(struct ieee80211_local *local, ista = &sta->sta; sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_update_tkip_key(local, sdata, conf, ista, iv32); if (local->ops->update_tkip_key) @@ -309,13 +314,14 @@ static inline void drv_update_tkip_key(struct ieee80211_local *local, static inline int drv_hw_scan(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, - struct cfg80211_scan_request *req) + struct ieee80211_scan_request *req) { int ret; might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_hw_scan(local, sdata); ret = local->ops->hw_scan(&local->hw, &sdata->vif, req); @@ -328,7 +334,8 @@ static inline void drv_cancel_hw_scan(struct ieee80211_local *local, { might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_cancel_hw_scan(local, sdata); local->ops->cancel_hw_scan(&local->hw, &sdata->vif); @@ -339,13 +346,14 @@ static inline int drv_sched_scan_start(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct cfg80211_sched_scan_request *req, - struct ieee80211_sched_scan_ies *ies) + struct ieee80211_scan_ies *ies) { int ret; might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_sched_scan_start(local, sdata); ret = local->ops->sched_scan_start(&local->hw, &sdata->vif, @@ -361,7 +369,8 @@ static inline int drv_sched_scan_stop(struct ieee80211_local *local, might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_sched_scan_stop(local, sdata); ret = local->ops->sched_scan_stop(&local->hw, &sdata->vif); @@ -462,7 +471,8 @@ static inline void drv_sta_notify(struct ieee80211_local *local, struct ieee80211_sta *sta) { sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_sta_notify(local, sdata, cmd, sta); if (local->ops->sta_notify) @@ -479,7 +489,8 @@ static inline int drv_sta_add(struct ieee80211_local *local, might_sleep(); sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_sta_add(local, sdata, sta); if (local->ops->sta_add) @@ -497,7 +508,8 @@ static inline void drv_sta_remove(struct ieee80211_local *local, might_sleep(); sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_sta_remove(local, sdata, sta); if (local->ops->sta_remove) @@ -515,7 +527,8 @@ static inline void drv_sta_add_debugfs(struct ieee80211_local *local, might_sleep(); sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; if (local->ops->sta_add_debugfs) local->ops->sta_add_debugfs(&local->hw, &sdata->vif, @@ -545,7 +558,8 @@ static inline void drv_sta_pre_rcu_remove(struct ieee80211_local *local, might_sleep(); sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_sta_pre_rcu_remove(local, sdata, &sta->sta); if (local->ops->sta_pre_rcu_remove) @@ -566,7 +580,8 @@ int drv_sta_state(struct ieee80211_local *local, might_sleep(); sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_sta_state(local, sdata, &sta->sta, old_state, new_state); if (local->ops->sta_state) { @@ -590,7 +605,8 @@ static inline void drv_sta_rc_update(struct ieee80211_local *local, struct ieee80211_sta *sta, u32 changed) { sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; WARN_ON(changed & IEEE80211_RC_SUPP_RATES_CHANGED && (sdata->vif.type != NL80211_IFTYPE_ADHOC && @@ -612,7 +628,8 @@ static inline int drv_conf_tx(struct ieee80211_local *local, might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_conf_tx(local, sdata, ac, params); if (local->ops->conf_tx) @@ -629,7 +646,8 @@ static inline u64 drv_get_tsf(struct ieee80211_local *local, might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return ret; trace_drv_get_tsf(local, sdata); if (local->ops->get_tsf) @@ -644,7 +662,8 @@ static inline void drv_set_tsf(struct ieee80211_local *local, { might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_set_tsf(local, sdata, tsf); if (local->ops->set_tsf) @@ -657,7 +676,8 @@ static inline void drv_reset_tsf(struct ieee80211_local *local, { might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_reset_tsf(local, sdata); if (local->ops->reset_tsf) @@ -689,7 +709,8 @@ static inline int drv_ampdu_action(struct ieee80211_local *local, might_sleep(); sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn, buf_size); @@ -726,13 +747,19 @@ static inline void drv_rfkill_poll(struct ieee80211_local *local) } static inline void drv_flush(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, u32 queues, bool drop) { + struct ieee80211_vif *vif = sdata ? &sdata->vif : NULL; + might_sleep(); + if (sdata && !check_sdata_in_driver(sdata)) + return; + trace_drv_flush(local, queues, drop); if (local->ops->flush) - local->ops->flush(&local->hw, queues, drop); + local->ops->flush(&local->hw, vif, queues, drop); trace_drv_return_void(local); } @@ -848,7 +875,8 @@ static inline int drv_set_bitrate_mask(struct ieee80211_local *local, might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_set_bitrate_mask(local, sdata, mask); if (local->ops->set_bitrate_mask) @@ -863,7 +891,8 @@ static inline void drv_set_rekey_data(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct cfg80211_gtk_rekey_data *data) { - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_set_rekey_data(local, sdata, data); if (local->ops->set_rekey_data) @@ -931,7 +960,8 @@ static inline void drv_mgd_prepare_tx(struct ieee80211_local *local, { might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION); trace_drv_mgd_prepare_tx(local, sdata); @@ -940,6 +970,22 @@ static inline void drv_mgd_prepare_tx(struct ieee80211_local *local, trace_drv_return_void(local); } +static inline void +drv_mgd_protect_tdls_discover(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) +{ + might_sleep(); + + if (!check_sdata_in_driver(sdata)) + return; + WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION); + + trace_drv_mgd_protect_tdls_discover(local, sdata); + if (local->ops->mgd_protect_tdls_discover) + local->ops->mgd_protect_tdls_discover(&local->hw, &sdata->vif); + trace_drv_return_void(local); +} + static inline int drv_add_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *ctx) { @@ -958,6 +1004,9 @@ static inline int drv_add_chanctx(struct ieee80211_local *local, static inline void drv_remove_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *ctx) { + if (WARN_ON(!ctx->driver_present)) + return; + trace_drv_remove_chanctx(local, ctx); if (local->ops->remove_chanctx) local->ops->remove_chanctx(&local->hw, &ctx->conf); @@ -983,7 +1032,8 @@ static inline int drv_assign_vif_chanctx(struct ieee80211_local *local, { int ret = 0; - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_assign_vif_chanctx(local, sdata, ctx); if (local->ops->assign_vif_chanctx) { @@ -1001,7 +1051,8 @@ static inline void drv_unassign_vif_chanctx(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_chanctx *ctx) { - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_unassign_vif_chanctx(local, sdata, ctx); if (local->ops->unassign_vif_chanctx) { @@ -1013,12 +1064,66 @@ static inline void drv_unassign_vif_chanctx(struct ieee80211_local *local, trace_drv_return_void(local); } +static inline int +drv_switch_vif_chanctx(struct ieee80211_local *local, + struct ieee80211_vif_chanctx_switch *vifs, + int n_vifs, + enum ieee80211_chanctx_switch_mode mode) +{ + int ret = 0; + int i; + + if (!local->ops->switch_vif_chanctx) + return -EOPNOTSUPP; + + for (i = 0; i < n_vifs; i++) { + struct ieee80211_chanctx *new_ctx = + container_of(vifs[i].new_ctx, + struct ieee80211_chanctx, + conf); + struct ieee80211_chanctx *old_ctx = + container_of(vifs[i].old_ctx, + struct ieee80211_chanctx, + conf); + + WARN_ON_ONCE(!old_ctx->driver_present); + WARN_ON_ONCE((mode == CHANCTX_SWMODE_SWAP_CONTEXTS && + new_ctx->driver_present) || + (mode == CHANCTX_SWMODE_REASSIGN_VIF && + !new_ctx->driver_present)); + } + + trace_drv_switch_vif_chanctx(local, vifs, n_vifs, mode); + ret = local->ops->switch_vif_chanctx(&local->hw, + vifs, n_vifs, mode); + trace_drv_return_int(local, ret); + + if (!ret && mode == CHANCTX_SWMODE_SWAP_CONTEXTS) { + for (i = 0; i < n_vifs; i++) { + struct ieee80211_chanctx *new_ctx = + container_of(vifs[i].new_ctx, + struct ieee80211_chanctx, + conf); + struct ieee80211_chanctx *old_ctx = + container_of(vifs[i].old_ctx, + struct ieee80211_chanctx, + conf); + + new_ctx->driver_present = true; + old_ctx->driver_present = false; + } + } + + return ret; +} + static inline int drv_start_ap(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { int ret = 0; - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_start_ap(local, sdata, &sdata->vif.bss_conf); if (local->ops->start_ap) @@ -1030,7 +1135,8 @@ static inline int drv_start_ap(struct ieee80211_local *local, static inline void drv_stop_ap(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_stop_ap(local, sdata); if (local->ops->stop_ap) @@ -1053,7 +1159,8 @@ drv_set_default_unicast_key(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, int key_idx) { - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; WARN_ON_ONCE(key_idx < -1 || key_idx > 3); @@ -1095,7 +1202,8 @@ static inline int drv_join_ibss(struct ieee80211_local *local, int ret = 0; might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; trace_drv_join_ibss(local, sdata, &sdata->vif.bss_conf); if (local->ops->join_ibss) @@ -1108,7 +1216,8 @@ static inline void drv_leave_ibss(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { might_sleep(); - check_sdata_in_driver(sdata); + if (!check_sdata_in_driver(sdata)) + return; trace_drv_leave_ibss(local, sdata); if (local->ops->leave_ibss) @@ -1116,4 +1225,17 @@ static inline void drv_leave_ibss(struct ieee80211_local *local, trace_drv_return_void(local); } +static inline u32 drv_get_expected_throughput(struct ieee80211_local *local, + struct ieee80211_sta *sta) +{ + u32 ret = 0; + + trace_drv_get_expected_throughput(sta); + if (local->ops->get_expected_throughput) + ret = local->ops->get_expected_throughput(sta); + trace_drv_return_u32(local, ret); + + return ret; +} + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c new file mode 100644 index 000000000000..ebfc8091557b --- /dev/null +++ b/net/mac80211/ethtool.c @@ -0,0 +1,244 @@ +/* + * mac80211 ethtool hooks for cfg80211 + * + * Copied from cfg.c - originally + * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> + * Copyright 2014 Intel Corporation (Author: Johannes Berg) + * + * This file is GPLv2 as found in COPYING. + */ +#include <linux/types.h> +#include <net/cfg80211.h> +#include "ieee80211_i.h" +#include "sta_info.h" +#include "driver-ops.h" + +static int ieee80211_set_ringparam(struct net_device *dev, + struct ethtool_ringparam *rp) +{ + struct ieee80211_local *local = wiphy_priv(dev->ieee80211_ptr->wiphy); + + if (rp->rx_mini_pending != 0 || rp->rx_jumbo_pending != 0) + return -EINVAL; + + return drv_set_ringparam(local, rp->tx_pending, rp->rx_pending); +} + +static void ieee80211_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *rp) +{ + struct ieee80211_local *local = wiphy_priv(dev->ieee80211_ptr->wiphy); + + memset(rp, 0, sizeof(*rp)); + + drv_get_ringparam(local, &rp->tx_pending, &rp->tx_max_pending, + &rp->rx_pending, &rp->rx_max_pending); +} + +static const char ieee80211_gstrings_sta_stats[][ETH_GSTRING_LEN] = { + "rx_packets", "rx_bytes", + "rx_duplicates", "rx_fragments", "rx_dropped", + "tx_packets", "tx_bytes", "tx_fragments", + "tx_filtered", "tx_retry_failed", "tx_retries", + "beacon_loss", "sta_state", "txrate", "rxrate", "signal", + "channel", "noise", "ch_time", "ch_time_busy", + "ch_time_ext_busy", "ch_time_rx", "ch_time_tx" +}; +#define STA_STATS_LEN ARRAY_SIZE(ieee80211_gstrings_sta_stats) + +static int ieee80211_get_sset_count(struct net_device *dev, int sset) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + int rv = 0; + + if (sset == ETH_SS_STATS) + rv += STA_STATS_LEN; + + rv += drv_get_et_sset_count(sdata, sset); + + if (rv == 0) + return -EOPNOTSUPP; + return rv; +} + +static void ieee80211_get_stats(struct net_device *dev, + struct ethtool_stats *stats, + u64 *data) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_chanctx_conf *chanctx_conf; + struct ieee80211_channel *channel; + struct sta_info *sta; + struct ieee80211_local *local = sdata->local; + struct station_info sinfo; + struct survey_info survey; + int i, q; +#define STA_STATS_SURVEY_LEN 7 + + memset(data, 0, sizeof(u64) * STA_STATS_LEN); + +#define ADD_STA_STATS(sta) \ + do { \ + data[i++] += sta->rx_packets; \ + data[i++] += sta->rx_bytes; \ + data[i++] += sta->num_duplicates; \ + data[i++] += sta->rx_fragments; \ + data[i++] += sta->rx_dropped; \ + \ + data[i++] += sinfo.tx_packets; \ + data[i++] += sinfo.tx_bytes; \ + data[i++] += sta->tx_fragments; \ + data[i++] += sta->tx_filtered_count; \ + data[i++] += sta->tx_retry_failed; \ + data[i++] += sta->tx_retry_count; \ + data[i++] += sta->beacon_loss_count; \ + } while (0) + + /* For Managed stations, find the single station based on BSSID + * and use that. For interface types, iterate through all available + * stations and add stats for any station that is assigned to this + * network device. + */ + + mutex_lock(&local->sta_mtx); + + if (sdata->vif.type == NL80211_IFTYPE_STATION) { + sta = sta_info_get_bss(sdata, sdata->u.mgd.bssid); + + if (!(sta && !WARN_ON(sta->sdata->dev != dev))) + goto do_survey; + + sinfo.filled = 0; + sta_set_sinfo(sta, &sinfo); + + i = 0; + ADD_STA_STATS(sta); + + data[i++] = sta->sta_state; + + + if (sinfo.filled & STATION_INFO_TX_BITRATE) + data[i] = 100000 * + cfg80211_calculate_bitrate(&sinfo.txrate); + i++; + if (sinfo.filled & STATION_INFO_RX_BITRATE) + data[i] = 100000 * + cfg80211_calculate_bitrate(&sinfo.rxrate); + i++; + + if (sinfo.filled & STATION_INFO_SIGNAL_AVG) + data[i] = (u8)sinfo.signal_avg; + i++; + } else { + list_for_each_entry(sta, &local->sta_list, list) { + /* Make sure this station belongs to the proper dev */ + if (sta->sdata->dev != dev) + continue; + + sinfo.filled = 0; + sta_set_sinfo(sta, &sinfo); + i = 0; + ADD_STA_STATS(sta); + } + } + +do_survey: + i = STA_STATS_LEN - STA_STATS_SURVEY_LEN; + /* Get survey stats for current channel */ + survey.filled = 0; + + rcu_read_lock(); + chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + if (chanctx_conf) + channel = chanctx_conf->def.chan; + else + channel = NULL; + rcu_read_unlock(); + + if (channel) { + q = 0; + do { + survey.filled = 0; + if (drv_get_survey(local, q, &survey) != 0) { + survey.filled = 0; + break; + } + q++; + } while (channel != survey.channel); + } + + if (survey.filled) + data[i++] = survey.channel->center_freq; + else + data[i++] = 0; + if (survey.filled & SURVEY_INFO_NOISE_DBM) + data[i++] = (u8)survey.noise; + else + data[i++] = -1LL; + if (survey.filled & SURVEY_INFO_CHANNEL_TIME) + data[i++] = survey.channel_time; + else + data[i++] = -1LL; + if (survey.filled & SURVEY_INFO_CHANNEL_TIME_BUSY) + data[i++] = survey.channel_time_busy; + else + data[i++] = -1LL; + if (survey.filled & SURVEY_INFO_CHANNEL_TIME_EXT_BUSY) + data[i++] = survey.channel_time_ext_busy; + else + data[i++] = -1LL; + if (survey.filled & SURVEY_INFO_CHANNEL_TIME_RX) + data[i++] = survey.channel_time_rx; + else + data[i++] = -1LL; + if (survey.filled & SURVEY_INFO_CHANNEL_TIME_TX) + data[i++] = survey.channel_time_tx; + else + data[i++] = -1LL; + + mutex_unlock(&local->sta_mtx); + + if (WARN_ON(i != STA_STATS_LEN)) + return; + + drv_get_et_stats(sdata, stats, &(data[STA_STATS_LEN])); +} + +static void ieee80211_get_strings(struct net_device *dev, u32 sset, u8 *data) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + int sz_sta_stats = 0; + + if (sset == ETH_SS_STATS) { + sz_sta_stats = sizeof(ieee80211_gstrings_sta_stats); + memcpy(data, ieee80211_gstrings_sta_stats, sz_sta_stats); + } + drv_get_et_strings(sdata, sset, &(data[sz_sta_stats])); +} + +static int ieee80211_get_regs_len(struct net_device *dev) +{ + return 0; +} + +static void ieee80211_get_regs(struct net_device *dev, + struct ethtool_regs *regs, + void *data) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + + regs->version = wdev->wiphy->hw_version; + regs->len = 0; +} + +const struct ethtool_ops ieee80211_ethtool_ops = { + .get_drvinfo = cfg80211_get_drvinfo, + .get_regs_len = ieee80211_get_regs_len, + .get_regs = ieee80211_get_regs, + .get_link = ethtool_op_get_link, + .get_ringparam = ieee80211_get_ringparam, + .set_ringparam = ieee80211_set_ringparam, + .get_strings = ieee80211_get_strings, + .get_ethtool_stats = ieee80211_get_stats, + .get_sset_count = ieee80211_get_sset_count, +}; diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index c150b68436d7..ff630be2ca75 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -31,6 +31,18 @@ static void __check_htcap_disable(struct ieee80211_ht_cap *ht_capa, } } +static void __check_htcap_enable(struct ieee80211_ht_cap *ht_capa, + struct ieee80211_ht_cap *ht_capa_mask, + struct ieee80211_sta_ht_cap *ht_cap, + u16 flag) +{ + __le16 le_flag = cpu_to_le16(flag); + + if ((ht_capa_mask->cap_info & le_flag) && + (ht_capa->cap_info & le_flag)) + ht_cap->cap |= flag; +} + void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, struct ieee80211_sta_ht_cap *ht_cap) { @@ -59,7 +71,7 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, smask = (u8 *)(&ht_capa_mask->mcs.rx_mask); /* NOTE: If you add more over-rides here, update register_hw - * ht_capa_mod_msk logic in main.c as well. + * ht_capa_mod_mask logic in main.c as well. * And, if this method can ever change ht_cap.ht_supported, fix * the check in ieee80211_add_ht_ie. */ @@ -86,6 +98,14 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, __check_htcap_disable(ht_capa, ht_capa_mask, ht_cap, IEEE80211_HT_CAP_MAX_AMSDU); + /* Allow user to disable LDPC */ + __check_htcap_disable(ht_capa, ht_capa_mask, ht_cap, + IEEE80211_HT_CAP_LDPC_CODING); + + /* Allow user to enable 40 MHz intolerant bit. */ + __check_htcap_enable(ht_capa, ht_capa_mask, ht_cap, + IEEE80211_HT_CAP_40MHZ_INTOLERANT); + /* Allow user to decrease AMPDU factor */ if (ht_capa_mask->ampdu_params_info & IEEE80211_HT_AMPDU_PARM_FACTOR) { @@ -130,13 +150,12 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, /* * If user has specified capability over-rides, take care - * of that if the station we're setting up is the AP that + * of that if the station we're setting up is the AP or TDLS peer that * we advertised a restricted capability set to. Override * our own capabilities and then use those below. */ - if ((sdata->vif.type == NL80211_IFTYPE_STATION || - sdata->vif.type == NL80211_IFTYPE_ADHOC) && - !test_sta_flag(sta, WLAN_STA_TDLS_PEER)) + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) ieee80211_apply_htcap_overrides(sdata, &own_cap); /* @@ -208,6 +227,9 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, if (own_cap.mcs.rx_mask[32/8] & ht_cap_ie->mcs.rx_mask[32/8] & 1) ht_cap.mcs.rx_mask[32/8] |= 1; + /* set Rx highest rate */ + ht_cap.mcs.rx_highest = ht_cap_ie->mcs.rx_highest; + apply: changed = memcmp(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap)); diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 06d28787945b..9713dc54ea4b 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -143,7 +143,7 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata, *pos++ = csa_settings->block_tx ? 1 : 0; *pos++ = ieee80211_frequency_to_channel( csa_settings->chandef.chan->center_freq); - sdata->csa_counter_offset_beacon = (pos - presp->head); + presp->csa_counter_offsets[0] = (pos - presp->head); *pos++ = csa_settings->count; } @@ -189,17 +189,8 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata, chandef, 0); } - if (local->hw.queues >= IEEE80211_NUM_ACS) { - *pos++ = WLAN_EID_VENDOR_SPECIFIC; - *pos++ = 7; /* len */ - *pos++ = 0x00; /* Microsoft OUI 00:50:F2 */ - *pos++ = 0x50; - *pos++ = 0xf2; - *pos++ = 2; /* WME */ - *pos++ = 0; /* WME info */ - *pos++ = 1; /* WME ver */ - *pos++ = 0; /* U-APSD no in use */ - } + if (local->hw.queues >= IEEE80211_NUM_ACS) + pos = ieee80211_add_wmm_info_ie(pos, 0); /* U-APSD not in use */ presp->head_len = pos - presp->head; if (WARN_ON(presp->head_len > frame_len)) @@ -228,7 +219,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, struct beacon_data *presp; enum nl80211_bss_scan_width scan_width; bool have_higher_than_11mbit; - bool radar_required = false; + bool radar_required; int err; sdata_assert_lock(sdata); @@ -253,7 +244,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, presp = rcu_dereference_protected(ifibss->presp, lockdep_is_held(&sdata->wdev.mtx)); - rcu_assign_pointer(ifibss->presp, NULL); + RCU_INIT_POINTER(ifibss->presp, NULL); if (presp) kfree_rcu(presp, rcu_head); @@ -262,7 +253,8 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, /* make a copy of the chandef, it could be modified below. */ chandef = *req_chandef; chan = chandef.chan; - if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef)) { + if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef, + NL80211_IFTYPE_ADHOC)) { if (chandef.width == NL80211_CHAN_WIDTH_5 || chandef.width == NL80211_CHAN_WIDTH_10 || chandef.width == NL80211_CHAN_WIDTH_20_NOHT || @@ -274,7 +266,8 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, chandef.width = NL80211_CHAN_WIDTH_20; chandef.center_freq1 = chan->center_freq; /* check again for downgraded chandef */ - if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef)) { + if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef, + NL80211_IFTYPE_ADHOC)) { sdata_info(sdata, "Failed to join IBSS, beacons forbidden\n"); return; @@ -282,21 +275,20 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, } err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy, - &chandef); + &chandef, NL80211_IFTYPE_ADHOC); if (err < 0) { sdata_info(sdata, "Failed to join IBSS, invalid chandef\n"); return; } - if (err > 0) { - if (!ifibss->userspace_handles_dfs) { - sdata_info(sdata, - "Failed to join IBSS, DFS channel without control program\n"); - return; - } - radar_required = true; + if (err > 0 && !ifibss->userspace_handles_dfs) { + sdata_info(sdata, + "Failed to join IBSS, DFS channel without control program\n"); + return; } + radar_required = err; + mutex_lock(&local->mtx); if (ieee80211_vif_use_channel(sdata, &chandef, ifibss->fixed_channel ? @@ -775,7 +767,8 @@ static void ieee80211_ibss_csa_mark_radar(struct ieee80211_sub_if_data *sdata) * unavailable. */ err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy, - &ifibss->chandef); + &ifibss->chandef, + NL80211_IFTYPE_ADHOC); if (err > 0) cfg80211_radar_event(sdata->local->hw.wiphy, &ifibss->chandef, GFP_ATOMIC); @@ -861,7 +854,8 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, goto disconnect; } - if (!cfg80211_reg_can_beacon(sdata->local->hw.wiphy, ¶ms.chandef)) { + if (!cfg80211_reg_can_beacon(sdata->local->hw.wiphy, ¶ms.chandef, + NL80211_IFTYPE_ADHOC)) { sdata_info(sdata, "IBSS %pM switches to unsupported channel (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n", ifibss->bssid, @@ -873,17 +867,17 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, } err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy, - ¶ms.chandef); + ¶ms.chandef, + NL80211_IFTYPE_ADHOC); if (err < 0) goto disconnect; - if (err) { + if (err > 0 && !ifibss->userspace_handles_dfs) { /* IBSS-DFS only allowed with a control program */ - if (!ifibss->userspace_handles_dfs) - goto disconnect; - - params.radar_required = true; + goto disconnect; } + params.radar_required = err; + if (cfg80211_chandef_identical(¶ms.chandef, &sdata->vif.bss_conf.chandef)) { ibss_dbg(sdata, @@ -1636,7 +1630,33 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, u32 changed = 0; u32 rate_flags; struct ieee80211_supported_band *sband; + enum ieee80211_chanctx_mode chanmode; + struct ieee80211_local *local = sdata->local; + int radar_detect_width = 0; int i; + int ret; + + ret = cfg80211_chandef_dfs_required(local->hw.wiphy, + ¶ms->chandef, + sdata->wdev.iftype); + if (ret < 0) + return ret; + + if (ret > 0) { + if (!params->userspace_handles_dfs) + return -EINVAL; + radar_detect_width = BIT(params->chandef.width); + } + + chanmode = (params->channel_fixed && !ret) ? + IEEE80211_CHANCTX_SHARED : IEEE80211_CHANCTX_EXCLUSIVE; + + mutex_lock(&local->chanctx_mtx); + ret = ieee80211_check_combinations(sdata, ¶ms->chandef, chanmode, + radar_detect_width); + mutex_unlock(&local->chanctx_mtx); + if (ret < 0) + return ret; if (params->bssid) { memcpy(sdata->u.ibss.bssid, params->bssid, ETH_ALEN); @@ -1648,10 +1668,11 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, sdata->u.ibss.control_port = params->control_port; sdata->u.ibss.userspace_handles_dfs = params->userspace_handles_dfs; sdata->u.ibss.basic_rates = params->basic_rates; + sdata->u.ibss.last_scan_completed = jiffies; /* fix basic_rates if channel does not support these rates */ rate_flags = ieee80211_chandef_rate_flags(¶ms->chandef); - sband = sdata->local->hw.wiphy->bands[params->chandef.chan->band]; + sband = local->hw.wiphy->bands[params->chandef.chan->band]; for (i = 0; i < sband->n_bitrates; i++) { if ((rate_flags & sband->bitrates[i].flags) != rate_flags) sdata->u.ibss.basic_rates &= ~BIT(i); @@ -1700,9 +1721,9 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, ieee80211_bss_info_change_notify(sdata, changed); sdata->smps_mode = IEEE80211_SMPS_OFF; - sdata->needed_rx_chains = sdata->local->rx_chains; + sdata->needed_rx_chains = local->rx_chains; - ieee80211_queue_work(&sdata->local->hw, &sdata->work); + ieee80211_queue_work(&local->hw, &sdata->work); return 0; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 222c28b75315..ef7a089ac546 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -229,16 +229,29 @@ struct ieee80211_rx_data { u16 tkip_iv16; }; +struct ieee80211_csa_settings { + const u16 *counter_offsets_beacon; + const u16 *counter_offsets_presp; + + int n_counter_offsets_beacon; + int n_counter_offsets_presp; + + u8 count; +}; + struct beacon_data { u8 *head, *tail; int head_len, tail_len; struct ieee80211_meshconf_ie *meshconf; + u16 csa_counter_offsets[IEEE80211_MAX_CSA_COUNTERS_NUM]; + u8 csa_current_counter; struct rcu_head rcu_head; }; struct probe_resp { struct rcu_head rcu_head; int len; + u16 csa_counter_offsets[IEEE80211_MAX_CSA_COUNTERS_NUM]; u8 data[0]; }; @@ -260,7 +273,7 @@ struct ieee80211_if_ap { /* to be used after channel switch. */ struct cfg80211_beacon_data *next_beacon; - struct list_head vlans; + struct list_head vlans; /* write-protected with RTNL and local->mtx */ struct ps_data ps; atomic_t num_mcast_sta; /* number of stations receiving multicast */ @@ -276,7 +289,7 @@ struct ieee80211_if_wds { }; struct ieee80211_if_vlan { - struct list_head list; + struct list_head list; /* write-protected with RTNL and local->mtx */ /* used for all tx if the VLAN is configured to 4-addr mode */ struct sta_info __rcu *sta; @@ -317,6 +330,7 @@ struct ieee80211_roc_work { bool started, abort, hw_begun, notified; bool to_be_freed; + bool on_channel; unsigned long hw_start_time; @@ -331,7 +345,6 @@ enum ieee80211_sta_flags { IEEE80211_STA_CONNECTION_POLL = BIT(1), IEEE80211_STA_CONTROL_PORT = BIT(2), IEEE80211_STA_DISABLE_HT = BIT(4), - IEEE80211_STA_CSA_RECEIVED = BIT(5), IEEE80211_STA_MFP_ENABLED = BIT(6), IEEE80211_STA_UAPSD_ENABLED = BIT(7), IEEE80211_STA_NULLFUNC_ACKED = BIT(8), @@ -489,6 +502,9 @@ struct ieee80211_if_managed { struct ieee80211_ht_cap ht_capa_mask; /* Valid parts of ht_capa */ struct ieee80211_vht_cap vht_capa; /* configured VHT overrides */ struct ieee80211_vht_cap vht_capa_mask; /* Valid parts of vht_capa */ + + u8 tdls_peer[ETH_ALEN] __aligned(2); + struct delayed_work tdls_peer_del_work; }; struct ieee80211_if_ibss { @@ -687,12 +703,35 @@ enum ieee80211_chanctx_mode { IEEE80211_CHANCTX_EXCLUSIVE }; +/** + * enum ieee80211_chanctx_replace_state - channel context replacement state + * + * This is used for channel context in-place reservations that require channel + * context switch/swap. + * + * @IEEE80211_CHANCTX_REPLACE_NONE: no replacement is taking place + * @IEEE80211_CHANCTX_WILL_BE_REPLACED: this channel context will be replaced + * by a (not yet registered) channel context pointed by %replace_ctx. + * @IEEE80211_CHANCTX_REPLACES_OTHER: this (not yet registered) channel context + * replaces an existing channel context pointed to by %replace_ctx. + */ +enum ieee80211_chanctx_replace_state { + IEEE80211_CHANCTX_REPLACE_NONE, + IEEE80211_CHANCTX_WILL_BE_REPLACED, + IEEE80211_CHANCTX_REPLACES_OTHER, +}; + struct ieee80211_chanctx { struct list_head list; struct rcu_head rcu_head; + struct list_head assigned_vifs; + struct list_head reserved_vifs; + + enum ieee80211_chanctx_replace_state replace_state; + struct ieee80211_chanctx *replace_ctx; + enum ieee80211_chanctx_mode mode; - int refcount; bool driver_present; struct ieee80211_chanctx_conf conf; @@ -751,11 +790,18 @@ struct ieee80211_sub_if_data { struct mac80211_qos_map __rcu *qos_map; struct work_struct csa_finalize_work; - int csa_counter_offset_beacon; - int csa_counter_offset_presp; - bool csa_radar_required; + bool csa_block_tx; /* write-protected by sdata_lock and local->mtx */ struct cfg80211_chan_def csa_chandef; + struct list_head assigned_chanctx_list; /* protected by chanctx_mtx */ + struct list_head reserved_chanctx_list; /* protected by chanctx_mtx */ + + /* context reservation -- protected with chanctx_mtx */ + struct ieee80211_chanctx *reserved_chanctx; + struct cfg80211_chan_def reserved_chandef; + bool reserved_radar_required; + bool reserved_ready; + /* used to reconfigure hardware SM PS */ struct work_struct recalc_smps; @@ -879,10 +925,17 @@ ieee80211_vif_get_shift(struct ieee80211_vif *vif) return shift; } +struct ieee80211_rx_agg { + u8 addr[ETH_ALEN]; + u16 tid; +}; + enum sdata_queue_type { IEEE80211_SDATA_QUEUE_TYPE_FRAME = 0, IEEE80211_SDATA_QUEUE_AGG_START = 1, IEEE80211_SDATA_QUEUE_AGG_STOP = 2, + IEEE80211_SDATA_QUEUE_RX_AGG_START = 3, + IEEE80211_SDATA_QUEUE_RX_AGG_STOP = 4, }; enum { @@ -899,6 +952,9 @@ enum queue_stop_reason { IEEE80211_QUEUE_STOP_REASON_SKB_ADD, IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL, IEEE80211_QUEUE_STOP_REASON_FLUSH, + IEEE80211_QUEUE_STOP_REASON_TDLS_TEARDOWN, + + IEEE80211_QUEUE_STOP_REASONS, }; #ifdef CONFIG_MAC80211_LEDS @@ -995,6 +1051,7 @@ struct ieee80211_local { struct workqueue_struct *workqueue; unsigned long queue_stop_reasons[IEEE80211_MAX_QUEUES]; + int q_stop_reasons[IEEE80211_MAX_QUEUES][IEEE80211_QUEUE_STOP_REASONS]; /* also used to protect ampdu_ac_queue and amdpu_ac_stop_refcnt */ spinlock_t queue_stop_reason_lock; @@ -1122,7 +1179,8 @@ struct ieee80211_local { unsigned long scanning; struct cfg80211_ssid scan_ssid; struct cfg80211_scan_request *int_scan_req; - struct cfg80211_scan_request *scan_req, *hw_scan_req; + struct cfg80211_scan_request *scan_req; + struct ieee80211_scan_request *hw_scan_req; struct cfg80211_chan_def scan_chandef; enum ieee80211_band hw_scan_band; int scan_channel_idx; @@ -1448,6 +1506,7 @@ __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, struct cfg80211_sched_scan_request *req); int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata); +void ieee80211_sched_scan_end(struct ieee80211_local *local); void ieee80211_sched_scan_stopped_work(struct work_struct *work); /* off-channel helpers */ @@ -1525,6 +1584,10 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, u16 initiator, u16 reason, bool stop); void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, u16 initiator, u16 reason, bool stop); +void __ieee80211_start_rx_ba_session(struct sta_info *sta, + u8 dialog_token, u16 timeout, + u16 start_seq_num, u16 ba_policy, u16 tid, + u16 buf_size, bool tx); void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, enum ieee80211_agg_stop_reason reason); void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, @@ -1677,6 +1740,21 @@ static inline void ieee802_11_parse_elems(const u8 *start, size_t len, ieee802_11_parse_elems_crc(start, len, action, elems, 0, 0); } +static inline bool ieee80211_rx_reorder_ready(struct sk_buff_head *frames) +{ + struct sk_buff *tail = skb_peek_tail(frames); + struct ieee80211_rx_status *status; + + if (!tail) + return false; + + status = IEEE80211_SKB_RXCB(tail); + if (status->flag & RX_FLAG_AMSDU_MORE) + return false; + + return true; +} + void ieee80211_dynamic_ps_enable_work(struct work_struct *work); void ieee80211_dynamic_ps_disable_work(struct work_struct *work); void ieee80211_dynamic_ps_timer(unsigned long data); @@ -1690,14 +1768,24 @@ void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata, void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, unsigned long queues, - enum queue_stop_reason reason); + enum queue_stop_reason reason, + bool refcounted); +void ieee80211_stop_vif_queues(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + enum queue_stop_reason reason); +void ieee80211_wake_vif_queues(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + enum queue_stop_reason reason); void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, unsigned long queues, - enum queue_stop_reason reason); + enum queue_stop_reason reason, + bool refcounted); void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue, - enum queue_stop_reason reason); + enum queue_stop_reason reason, + bool refcounted); void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue, - enum queue_stop_reason reason); + enum queue_stop_reason reason, + bool refcounted); void ieee80211_propagate_queue_wake(struct ieee80211_local *local, int queue); void ieee80211_add_pending_skb(struct ieee80211_local *local, struct sk_buff *skb); @@ -1715,8 +1803,10 @@ void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, const u8 *bssid, u16 stype, u16 reason, bool send_frame, u8 *frame_buf); int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, - size_t buffer_len, const u8 *ie, size_t ie_len, - enum ieee80211_band band, u32 rate_mask, + size_t buffer_len, + struct ieee80211_scan_ies *ie_desc, + const u8 *ie, size_t ie_len, + u8 bands_used, u32 *rate_masks, struct cfg80211_chan_def *chandef); struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, u32 ratemask, @@ -1759,6 +1849,7 @@ int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, bool need_basic, enum ieee80211_band band); +u8 *ieee80211_add_wmm_info_ie(u8 *buf, u8 qosinfo); /* channel management */ void ieee80211_ht_oper_to_chandef(struct ieee80211_channel *control_chan, @@ -1771,17 +1862,25 @@ ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, const struct cfg80211_chan_def *chandef, enum ieee80211_chanctx_mode mode); int __must_check +ieee80211_vif_reserve_chanctx(struct ieee80211_sub_if_data *sdata, + const struct cfg80211_chan_def *chandef, + enum ieee80211_chanctx_mode mode, + bool radar_required); +int __must_check +ieee80211_vif_use_reserved_context(struct ieee80211_sub_if_data *sdata); +int ieee80211_vif_unreserve_chanctx(struct ieee80211_sub_if_data *sdata); +int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local); + +int __must_check ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata, const struct cfg80211_chan_def *chandef, u32 *changed); -/* NOTE: only use ieee80211_vif_change_channel() for channel switch */ -int __must_check -ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata, - u32 *changed); void ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata); void ieee80211_vif_vlan_copy_chanctx(struct ieee80211_sub_if_data *sdata); void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata, bool clear); +int ieee80211_chanctx_refcount(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx); void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *chanctx); @@ -1805,6 +1904,23 @@ int ieee80211_cs_headroom(struct ieee80211_local *local, enum nl80211_iftype iftype); void ieee80211_recalc_dtim(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata); +int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, + const struct cfg80211_chan_def *chandef, + enum ieee80211_chanctx_mode chanmode, + u8 radar_detect); +int ieee80211_max_num_channels(struct ieee80211_local *local); + +/* TDLS */ +int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, + const u8 *peer, u8 action_code, u8 dialog_token, + u16 status_code, u32 peer_capability, + bool initiator, const u8 *extra_ies, + size_t extra_ies_len); +int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, + const u8 *peer, enum nl80211_tdls_operation oper); + + +extern const struct ethtool_ops ieee80211_ethtool_ops; #ifdef CONFIG_MAC80211_NOINLINE #define debug_noinline noinline @@ -1813,3 +1929,4 @@ void ieee80211_recalc_dtim(struct ieee80211_local *local, #endif #endif /* IEEE80211_I_H */ +void ieee80211_tdls_peer_del_work(struct work_struct *wk); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index b8d331e7d883..01eede7406a5 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -250,6 +250,7 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_sub_if_data *nsdata; + int ret; ASSERT_RTNL(); @@ -300,7 +301,10 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata, } } - return 0; + mutex_lock(&local->chanctx_mtx); + ret = ieee80211_check_combinations(sdata, NULL, 0, 0); + mutex_unlock(&local->chanctx_mtx); + return ret; } static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata, @@ -395,6 +399,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) sdata->vif.type = NL80211_IFTYPE_MONITOR; snprintf(sdata->name, IFNAMSIZ, "%s-monitor", wiphy_name(local->hw.wiphy)); + sdata->wdev.iftype = NL80211_IFTYPE_MONITOR; sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM; @@ -423,7 +428,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) mutex_unlock(&local->mtx); if (ret) { mutex_lock(&local->iflist_mtx); - rcu_assign_pointer(local->monitor_sdata, NULL); + RCU_INIT_POINTER(local->monitor_sdata, NULL); mutex_unlock(&local->iflist_mtx); synchronize_net(); drv_remove_interface(local, sdata); @@ -452,7 +457,7 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local) return; } - rcu_assign_pointer(local->monitor_sdata, NULL); + RCU_INIT_POINTER(local->monitor_sdata, NULL); mutex_unlock(&local->iflist_mtx); synchronize_net(); @@ -492,7 +497,9 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) if (!sdata->bss) return -ENOLINK; + mutex_lock(&local->mtx); list_add(&sdata->u.vlan.list, &sdata->bss->vlans); + mutex_unlock(&local->mtx); master = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); @@ -722,8 +729,11 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) drv_stop(local); err_del_bss: sdata->bss = NULL; - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { + mutex_lock(&local->mtx); list_del(&sdata->u.vlan.list); + mutex_unlock(&local->mtx); + } /* might already be clear but that doesn't matter */ clear_bit(SDATA_STATE_RUNNING, &sdata->state); return res; @@ -829,8 +839,16 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, cancel_work_sync(&sdata->recalc_smps); sdata_lock(sdata); + mutex_lock(&local->mtx); sdata->vif.csa_active = false; + if (sdata->csa_block_tx) { + ieee80211_wake_vif_queues(local, sdata, + IEEE80211_QUEUE_STOP_REASON_CSA); + sdata->csa_block_tx = false; + } + mutex_unlock(&local->mtx); sdata_unlock(sdata); + cancel_work_sync(&sdata->csa_finalize_work); cancel_delayed_work_sync(&sdata->dfs_cac_timer_work); @@ -875,8 +893,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, switch (sdata->vif.type) { case NL80211_IFTYPE_AP_VLAN: + mutex_lock(&local->mtx); list_del(&sdata->u.vlan.list); - rcu_assign_pointer(sdata->vif.chanctx_conf, NULL); + mutex_unlock(&local->mtx); + RCU_INIT_POINTER(sdata->vif.chanctx_conf, NULL); /* no need to tell driver */ break; case NL80211_IFTYPE_MONITOR: @@ -895,7 +915,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, break; case NL80211_IFTYPE_P2P_DEVICE: /* relies on synchronize_rcu() below */ - rcu_assign_pointer(local->p2p_sdata, NULL); + RCU_INIT_POINTER(local->p2p_sdata, NULL); /* fall through */ default: cancel_work_sync(&sdata->work); @@ -1120,6 +1140,7 @@ static void ieee80211_iface_work(struct work_struct *work) struct sk_buff *skb; struct sta_info *sta; struct ieee80211_ra_tid *ra_tid; + struct ieee80211_rx_agg *rx_agg; if (!ieee80211_sdata_running(sdata)) return; @@ -1147,6 +1168,34 @@ static void ieee80211_iface_work(struct work_struct *work) ra_tid = (void *)&skb->cb; ieee80211_stop_tx_ba_cb(&sdata->vif, ra_tid->ra, ra_tid->tid); + } else if (skb->pkt_type == IEEE80211_SDATA_QUEUE_RX_AGG_START) { + rx_agg = (void *)&skb->cb; + mutex_lock(&local->sta_mtx); + sta = sta_info_get_bss(sdata, rx_agg->addr); + if (sta) { + u16 last_seq; + + last_seq = le16_to_cpu( + sta->last_seq_ctrl[rx_agg->tid]); + + __ieee80211_start_rx_ba_session(sta, + 0, 0, + ieee80211_sn_inc(last_seq), + 1, rx_agg->tid, + IEEE80211_MAX_AMPDU_BUF, + false); + } + mutex_unlock(&local->sta_mtx); + } else if (skb->pkt_type == IEEE80211_SDATA_QUEUE_RX_AGG_STOP) { + rx_agg = (void *)&skb->cb; + mutex_lock(&local->sta_mtx); + sta = sta_info_get_bss(sdata, rx_agg->addr); + if (sta) + __ieee80211_stop_rx_ba_session(sta, + rx_agg->tid, + WLAN_BACK_RECIPIENT, 0, + false); + mutex_unlock(&local->sta_mtx); } else if (ieee80211_is_action(mgmt->frame_control) && mgmt->u.action.category == WLAN_CATEGORY_BACK) { int len = skb->len; @@ -1267,6 +1316,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, sdata->control_port_protocol = cpu_to_be16(ETH_P_PAE); sdata->control_port_no_encrypt = false; sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM; + sdata->vif.bss_conf.idle = true; sdata->noack_map = 0; @@ -1280,6 +1330,8 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, INIT_WORK(&sdata->work, ieee80211_iface_work); INIT_WORK(&sdata->recalc_smps, ieee80211_recalc_smps_work); INIT_WORK(&sdata->csa_finalize_work, ieee80211_csa_finalize_work); + INIT_LIST_HEAD(&sdata->assigned_chanctx_list); + INIT_LIST_HEAD(&sdata->reserved_chanctx_list); switch (type) { case NL80211_IFTYPE_P2P_GO: @@ -1601,9 +1653,9 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, if (local->hw.queues >= IEEE80211_NUM_ACS) txqs = IEEE80211_NUM_ACS; - ndev = alloc_netdev_mqs(sizeof(*sdata) + - local->hw.vif_data_size, - name, ieee80211_if_setup, txqs, 1); + ndev = alloc_netdev_mqs(sizeof(*sdata) + local->hw.vif_data_size, + name, NET_NAME_UNKNOWN, + ieee80211_if_setup, txqs, 1); if (!ndev) return -ENOMEM; dev_net_set(ndev, wiphy_net(local->hw.wiphy)); @@ -1683,6 +1735,8 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ndev->features |= local->hw.netdev_features; + netdev_set_default_ethtool_ops(ndev, &ieee80211_ethtool_ops); + ret = register_netdevice(ndev); if (ret) { free_netdev(ndev); @@ -1758,7 +1812,6 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) } mutex_unlock(&local->iflist_mtx); unregister_netdevice_many(&unreg_list); - list_del(&unreg_list); list_for_each_entry_safe(sdata, tmp, &wdev_list, list) { list_del(&sdata->list); @@ -1774,20 +1827,19 @@ static int netdev_notify(struct notifier_block *nb, struct ieee80211_sub_if_data *sdata; if (state != NETDEV_CHANGENAME) - return 0; + return NOTIFY_DONE; if (!dev->ieee80211_ptr || !dev->ieee80211_ptr->wiphy) - return 0; + return NOTIFY_DONE; if (dev->ieee80211_ptr->wiphy->privid != mac80211_wiphy_privid) - return 0; + return NOTIFY_DONE; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - memcpy(sdata->name, dev->name, IFNAMSIZ); - ieee80211_debugfs_rename_netdev(sdata); - return 0; + + return NOTIFY_OK; } static struct notifier_block mac80211_netdev_notifier = { diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 6ff65a1ebaa9..d808cff80153 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -325,7 +325,8 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, struct ieee80211_key *key; int i, j, err; - BUG_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS); + if (WARN_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS)) + return ERR_PTR(-EINVAL); key = kzalloc(sizeof(struct ieee80211_key) + key_len, GFP_KERNEL); if (!key) @@ -481,9 +482,6 @@ int ieee80211_key_link(struct ieee80211_key *key, int idx, ret; bool pairwise; - BUG_ON(!sdata); - BUG_ON(!key); - pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE; idx = key->conf.keyidx; key->local = sdata->local; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 4c1bf61bc778..e0ab4320a078 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -272,7 +272,8 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw) /* use this reason, ieee80211_reconfig will unblock it */ ieee80211_stop_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, - IEEE80211_QUEUE_STOP_REASON_SUSPEND); + IEEE80211_QUEUE_STOP_REASON_SUSPEND, + false); /* * Stop all Rx during the reconfig. We don't want state changes @@ -340,7 +341,7 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, sdata_unlock(sdata); - return NOTIFY_DONE; + return NOTIFY_OK; } #endif @@ -371,7 +372,7 @@ static int ieee80211_ifa6_changed(struct notifier_block *nb, drv_ipv6_addr_change(local, sdata, idev); - return NOTIFY_DONE; + return NOTIFY_OK; } #endif @@ -446,7 +447,9 @@ static const struct ieee80211_ht_cap mac80211_ht_capa_mod_mask = { .cap_info = cpu_to_le16(IEEE80211_HT_CAP_SUP_WIDTH_20_40 | IEEE80211_HT_CAP_MAX_AMSDU | IEEE80211_HT_CAP_SGI_20 | - IEEE80211_HT_CAP_SGI_40), + IEEE80211_HT_CAP_SGI_40 | + IEEE80211_HT_CAP_LDPC_CODING | + IEEE80211_HT_CAP_40MHZ_INTOLERANT), .mcs = { .rx_mask = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, }, @@ -954,6 +957,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (local->hw.wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS) local->hw.wiphy->flags |= WIPHY_FLAG_TDLS_EXTERNAL_SETUP; + local->hw.wiphy->max_num_csa_counters = IEEE80211_MAX_CSA_COUNTERS_NUM; + result = wiphy_register(local->hw.wiphy); if (result < 0) goto fail_wiphy_register; @@ -1183,18 +1188,12 @@ static int __init ieee80211_init(void) if (ret) goto err_minstrel; - ret = rc80211_pid_init(); - if (ret) - goto err_pid; - ret = ieee80211_iface_init(); if (ret) goto err_netdev; return 0; err_netdev: - rc80211_pid_exit(); - err_pid: rc80211_minstrel_ht_exit(); err_minstrel: rc80211_minstrel_exit(); @@ -1204,7 +1203,6 @@ static int __init ieee80211_init(void) static void __exit ieee80211_exit(void) { - rc80211_pid_exit(); rc80211_minstrel_ht_exit(); rc80211_minstrel_exit(); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index f70e9cd10552..e9f99c1e3fad 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -366,20 +366,15 @@ int mesh_add_rsn_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) return 0; /* find RSN IE */ - data = ifmsh->ie; - while (data < ifmsh->ie + ifmsh->ie_len) { - if (*data == WLAN_EID_RSN) { - len = data[1] + 2; - break; - } - data++; - } + data = cfg80211_find_ie(WLAN_EID_RSN, ifmsh->ie, ifmsh->ie_len); + if (!data) + return 0; - if (len) { - if (skb_tailroom(skb) < len) - return -ENOMEM; - memcpy(skb_put(skb, len), data, len); - } + len = data[1] + 2; + + if (skb_tailroom(skb) < len) + return -ENOMEM; + memcpy(skb_put(skb, len), data, len); return 0; } @@ -684,7 +679,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) *pos++ = 0x0; *pos++ = ieee80211_frequency_to_channel( csa->settings.chandef.chan->center_freq); - sdata->csa_counter_offset_beacon = hdr_len + 6; + bcn->csa_counter_offsets[0] = hdr_len + 6; *pos++ = csa->settings.count; *pos++ = WLAN_EID_CHAN_SWITCH_PARAM; *pos++ = 6; @@ -829,7 +824,7 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); bcn = rcu_dereference_protected(ifmsh->beacon, lockdep_is_held(&sdata->wdev.mtx)); - rcu_assign_pointer(ifmsh->beacon, NULL); + RCU_INIT_POINTER(ifmsh->beacon, NULL); kfree_rcu(bcn, rcu_head); /* flush STAs and mpaths on this iface */ @@ -903,14 +898,15 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, } err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy, - ¶ms.chandef); + ¶ms.chandef, + NL80211_IFTYPE_MESH_POINT); if (err < 0) return false; - if (err) { - params.radar_required = true; + if (err > 0) /* TODO: DFS not (yet) supported */ return false; - } + + params.radar_required = err; if (cfg80211_chandef_identical(¶ms.chandef, &sdata->vif.bss_conf.chandef)) { @@ -1068,7 +1064,7 @@ int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata) /* Remove the CSA and MCSP elements from the beacon */ tmp_csa_settings = rcu_dereference(ifmsh->csa); - rcu_assign_pointer(ifmsh->csa, NULL); + RCU_INIT_POINTER(ifmsh->csa, NULL); if (tmp_csa_settings) kfree_rcu(tmp_csa_settings, rcu_head); ret = ieee80211_mesh_rebuild_beacon(sdata); @@ -1102,7 +1098,7 @@ int ieee80211_mesh_csa_beacon(struct ieee80211_sub_if_data *sdata, ret = ieee80211_mesh_rebuild_beacon(sdata); if (ret) { tmp_csa_settings = rcu_dereference(ifmsh->csa); - rcu_assign_pointer(ifmsh->csa, NULL); + RCU_INIT_POINTER(ifmsh->csa, NULL); kfree_rcu(tmp_csa_settings, rcu_head); return ret; } @@ -1126,7 +1122,7 @@ static int mesh_fwd_csa_frame(struct ieee80211_sub_if_data *sdata, mgmt_fwd = (struct ieee80211_mgmt *) skb_put(skb, len); /* offset_ttl is based on whether the secondary channel - * offset is available or not. Substract 1 from the mesh TTL + * offset is available or not. Subtract 1 from the mesh TTL * and disable the initiator flag before forwarding. */ offset_ttl = (len < 42) ? 7 : 10; diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index f9514685d45a..214e63b84e5c 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -37,7 +37,7 @@ static inline u32 u32_field_get(const u8 *preq_elem, int offset, bool ae) return get_unaligned_le32(preq_elem + offset); } -static inline u32 u16_field_get(const u8 *preq_elem, int offset, bool ae) +static inline u16 u16_field_get(const u8 *preq_elem, int offset, bool ae) { if (ae) offset += 6; @@ -157,7 +157,6 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, default: kfree_skb(skb); return -ENOTSUPP; - break; } *pos++ = ie_len; *pos++ = flags; @@ -544,9 +543,10 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, if (time_after(jiffies, ifmsh->last_sn_update + net_traversal_jiffies(sdata)) || time_before(jiffies, ifmsh->last_sn_update)) { - target_sn = ++ifmsh->sn; + ++ifmsh->sn; ifmsh->last_sn_update = jiffies; } + target_sn = ifmsh->sn; } else if (is_broadcast_ether_addr(target_addr) && (target_flags & IEEE80211_PREQ_TO_FLAG)) { rcu_read_lock(); diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 7d050ed6fe5a..cf032a8db9d7 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -287,8 +287,10 @@ static void mesh_path_move_to_queue(struct mesh_path *gate_mpath, struct sk_buff_head failq; unsigned long flags; - BUG_ON(gate_mpath == from_mpath); - BUG_ON(!gate_mpath->next_hop); + if (WARN_ON(gate_mpath == from_mpath)) + return; + if (WARN_ON(!gate_mpath->next_hop)) + return; __skb_queue_head_init(&failq); diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index e8f60aa2e848..63b874101b27 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -551,11 +551,30 @@ static void mesh_plink_timer(unsigned long data) return; spin_lock_bh(&sta->lock); - if (sta->ignore_plink_timer) { - sta->ignore_plink_timer = false; + + /* If a timer fires just before a state transition on another CPU, + * we may have already extended the timeout and changed state by the + * time we've acquired the lock and arrived here. In that case, + * skip this timer and wait for the new one. + */ + if (time_before(jiffies, sta->plink_timer.expires)) { + mpl_dbg(sta->sdata, + "Ignoring timer for %pM in state %s (timer adjusted)", + sta->sta.addr, mplstates[sta->plink_state]); spin_unlock_bh(&sta->lock); return; } + + /* del_timer() and handler may race when entering these states */ + if (sta->plink_state == NL80211_PLINK_LISTEN || + sta->plink_state == NL80211_PLINK_ESTAB) { + mpl_dbg(sta->sdata, + "Ignoring timer for %pM in state %s (timer deleted)", + sta->sta.addr, mplstates[sta->plink_state]); + spin_unlock_bh(&sta->lock); + return; + } + mpl_dbg(sta->sdata, "Mesh plink timer for %pM fired on state %s\n", sta->sta.addr, mplstates[sta->plink_state]); @@ -773,9 +792,7 @@ static u32 mesh_plink_fsm(struct ieee80211_sub_if_data *sdata, break; case CNF_ACPT: sta->plink_state = NL80211_PLINK_CNF_RCVD; - if (!mod_plink_timer(sta, - mshcfg->dot11MeshConfirmTimeout)) - sta->ignore_plink_timer = true; + mod_plink_timer(sta, mshcfg->dot11MeshConfirmTimeout); break; default: break; @@ -834,8 +851,7 @@ static u32 mesh_plink_fsm(struct ieee80211_sub_if_data *sdata, case NL80211_PLINK_HOLDING: switch (event) { case CLS_ACPT: - if (del_timer(&sta->plink_timer)) - sta->ignore_plink_timer = 1; + del_timer(&sta->plink_timer); mesh_plink_fsm_restart(sta); break; case OPN_ACPT: diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c index 2bc5dc25d5ad..09625d6205c3 100644 --- a/net/mac80211/mesh_sync.c +++ b/net/mac80211/mesh_sync.c @@ -171,7 +171,7 @@ static void mesh_sync_offset_adjust_tbtt(struct ieee80211_sub_if_data *sdata, u8 cap; WARN_ON(ifmsh->mesh_sp_id != IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET); - BUG_ON(!rcu_read_lock_held()); + WARN_ON(!rcu_read_lock_held()); cap = beacon->meshconf->meshconf_cap; spin_lock_bh(&ifmsh->sync_offset_lock); diff --git a/net/mac80211/michael.h b/net/mac80211/michael.h index 3b848dad9587..0e4886f881f1 100644 --- a/net/mac80211/michael.h +++ b/net/mac80211/michael.h @@ -11,6 +11,7 @@ #define MICHAEL_H #include <linux/types.h> +#include <linux/ieee80211.h> #define MICHAEL_MIC_LEN 8 diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index dee50aefd6e8..31a8afaf7332 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -830,16 +830,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) qos_info = 0; } - pos = skb_put(skb, 9); - *pos++ = WLAN_EID_VENDOR_SPECIFIC; - *pos++ = 7; /* len */ - *pos++ = 0x00; /* Microsoft OUI 00:50:F2 */ - *pos++ = 0x50; - *pos++ = 0xf2; - *pos++ = 2; /* WME */ - *pos++ = 0; /* WME info */ - *pos++ = 1; /* WME ver */ - *pos++ = qos_info; + pos = ieee80211_add_wmm_info_ie(skb_put(skb, 9), qos_info); } /* add any remaining custom (i.e. vendor specific here) IEs */ @@ -940,51 +931,77 @@ static void ieee80211_chswitch_work(struct work_struct *work) container_of(work, struct ieee80211_sub_if_data, u.mgd.chswitch_work); struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - u32 changed = 0; int ret; if (!ieee80211_sdata_running(sdata)) return; sdata_lock(sdata); + mutex_lock(&local->mtx); + mutex_lock(&local->chanctx_mtx); + if (!ifmgd->associated) goto out; - mutex_lock(&local->mtx); - ret = ieee80211_vif_change_channel(sdata, &changed); - mutex_unlock(&local->mtx); - if (ret) { + if (!sdata->vif.csa_active) + goto out; + + /* + * using reservation isn't immediate as it may be deferred until later + * with multi-vif. once reservation is complete it will re-schedule the + * work with no reserved_chanctx so verify chandef to check if it + * completed successfully + */ + + if (sdata->reserved_chanctx) { + /* + * with multi-vif csa driver may call ieee80211_csa_finish() + * many times while waiting for other interfaces to use their + * reservations + */ + if (sdata->reserved_ready) + goto out; + + ret = ieee80211_vif_use_reserved_context(sdata); + if (ret) { + sdata_info(sdata, + "failed to use reserved channel context, disconnecting (err=%d)\n", + ret); + ieee80211_queue_work(&sdata->local->hw, + &ifmgd->csa_connection_drop_work); + goto out; + } + + goto out; + } + + if (!cfg80211_chandef_identical(&sdata->vif.bss_conf.chandef, + &sdata->csa_chandef)) { sdata_info(sdata, - "vif channel switch failed, disconnecting\n"); + "failed to finalize channel switch, disconnecting\n"); ieee80211_queue_work(&sdata->local->hw, &ifmgd->csa_connection_drop_work); goto out; } - if (!local->use_chanctx) { - local->_oper_chandef = sdata->csa_chandef; - /* Call "hw_config" only if doing sw channel switch. - * Otherwise update the channel directly - */ - if (!local->ops->channel_switch) - ieee80211_hw_config(local, 0); - else - local->hw.conf.chandef = local->_oper_chandef; - } - /* XXX: shouldn't really modify cfg80211-owned data! */ ifmgd->associated->channel = sdata->csa_chandef.chan; + sdata->vif.csa_active = false; + /* XXX: wait for a beacon first? */ - ieee80211_wake_queues_by_reason(&local->hw, - IEEE80211_MAX_QUEUE_MAP, - IEEE80211_QUEUE_STOP_REASON_CSA); + if (sdata->csa_block_tx) { + ieee80211_wake_vif_queues(local, sdata, + IEEE80211_QUEUE_STOP_REASON_CSA); + sdata->csa_block_tx = false; + } - ieee80211_bss_info_change_notify(sdata, changed); + ieee80211_sta_reset_beacon_monitor(sdata); + ieee80211_sta_reset_conn_monitor(sdata); - out: - sdata->vif.csa_active = false; - ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; +out: + mutex_unlock(&local->chanctx_mtx); + mutex_unlock(&local->mtx); sdata_unlock(sdata); } @@ -1021,6 +1038,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct cfg80211_bss *cbss = ifmgd->associated; + struct ieee80211_chanctx_conf *conf; struct ieee80211_chanctx *chanctx; enum ieee80211_band current_band; struct ieee80211_csa_ie csa_ie; @@ -1035,7 +1053,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, return; /* disregard subsequent announcements if we are already processing */ - if (ifmgd->flags & IEEE80211_STA_CSA_RECEIVED) + if (sdata->vif.csa_active) return; current_band = cbss->channel->band; @@ -1062,9 +1080,22 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, return; } - ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED; - + mutex_lock(&local->mtx); mutex_lock(&local->chanctx_mtx); + conf = rcu_dereference_protected(sdata->vif.chanctx_conf, + lockdep_is_held(&local->chanctx_mtx)); + if (!conf) { + sdata_info(sdata, + "no channel context assigned to vif?, disconnecting\n"); + ieee80211_queue_work(&local->hw, + &ifmgd->csa_connection_drop_work); + mutex_unlock(&local->chanctx_mtx); + mutex_unlock(&local->mtx); + return; + } + + chanctx = container_of(conf, struct ieee80211_chanctx, conf); + if (local->use_chanctx) { u32 num_chanctx = 0; list_for_each_entry(chanctx, &local->chanctx_list, list) @@ -1077,35 +1108,33 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, ieee80211_queue_work(&local->hw, &ifmgd->csa_connection_drop_work); mutex_unlock(&local->chanctx_mtx); + mutex_unlock(&local->mtx); return; } } - if (WARN_ON(!rcu_access_pointer(sdata->vif.chanctx_conf))) { - ieee80211_queue_work(&local->hw, - &ifmgd->csa_connection_drop_work); - mutex_unlock(&local->chanctx_mtx); - return; - } - chanctx = container_of(rcu_access_pointer(sdata->vif.chanctx_conf), - struct ieee80211_chanctx, conf); - if (chanctx->refcount > 1) { + res = ieee80211_vif_reserve_chanctx(sdata, &csa_ie.chandef, + chanctx->mode, false); + if (res) { sdata_info(sdata, - "channel switch with multiple interfaces on the same channel, disconnecting\n"); + "failed to reserve channel context for channel switch, disconnecting (err=%d)\n", + res); ieee80211_queue_work(&local->hw, &ifmgd->csa_connection_drop_work); mutex_unlock(&local->chanctx_mtx); + mutex_unlock(&local->mtx); return; } mutex_unlock(&local->chanctx_mtx); - sdata->csa_chandef = csa_ie.chandef; sdata->vif.csa_active = true; + sdata->csa_chandef = csa_ie.chandef; + sdata->csa_block_tx = csa_ie.mode; - if (csa_ie.mode) - ieee80211_stop_queues_by_reason(&local->hw, - IEEE80211_MAX_QUEUE_MAP, - IEEE80211_QUEUE_STOP_REASON_CSA); + if (sdata->csa_block_tx) + ieee80211_stop_vif_queues(local, sdata, + IEEE80211_QUEUE_STOP_REASON_CSA); + mutex_unlock(&local->mtx); if (local->ops->channel_switch) { /* use driver's channel switch callback */ @@ -1374,7 +1403,8 @@ void ieee80211_dynamic_ps_disable_work(struct work_struct *work) ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, - IEEE80211_QUEUE_STOP_REASON_PS); + IEEE80211_QUEUE_STOP_REASON_PS, + false); } void ieee80211_dynamic_ps_enable_work(struct work_struct *work) @@ -1817,6 +1847,13 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, ifmgd->flags = 0; mutex_lock(&local->mtx); ieee80211_vif_release_channel(sdata); + + sdata->vif.csa_active = false; + if (sdata->csa_block_tx) { + ieee80211_wake_vif_queues(local, sdata, + IEEE80211_QUEUE_STOP_REASON_CSA); + sdata->csa_block_tx = false; + } mutex_unlock(&local->mtx); sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM; @@ -2045,6 +2082,7 @@ EXPORT_SYMBOL(ieee80211_ap_probereq_get); static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) { + struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; @@ -2057,11 +2095,14 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, true, frame_buf); - ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; + mutex_lock(&local->mtx); sdata->vif.csa_active = false; - ieee80211_wake_queues_by_reason(&sdata->local->hw, - IEEE80211_MAX_QUEUE_MAP, - IEEE80211_QUEUE_STOP_REASON_CSA); + if (sdata->csa_block_tx) { + ieee80211_wake_vif_queues(local, sdata, + IEEE80211_QUEUE_STOP_REASON_CSA); + sdata->csa_block_tx = false; + } + mutex_unlock(&local->mtx); cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); @@ -3546,6 +3587,9 @@ static void ieee80211_sta_bcn_mon_timer(unsigned long data) if (local->quiescing) return; + if (sdata->vif.csa_active) + return; + sdata->u.mgd.connection_loss = false; ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.beacon_connection_loss_work); @@ -3561,6 +3605,9 @@ static void ieee80211_sta_conn_mon_timer(unsigned long data) if (local->quiescing) return; + if (sdata->vif.csa_active) + return; + ieee80211_queue_work(&local->hw, &ifmgd->monitor_work); } @@ -3598,18 +3645,24 @@ void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata) sdata_lock(sdata); - if (ifmgd->auth_data) { + if (ifmgd->auth_data || ifmgd->assoc_data) { + const u8 *bssid = ifmgd->auth_data ? + ifmgd->auth_data->bss->bssid : + ifmgd->assoc_data->bss->bssid; + /* - * If we are trying to authenticate while suspending, cfg80211 - * won't know and won't actually abort those attempts, thus we - * need to do that ourselves. + * If we are trying to authenticate / associate while suspending, + * cfg80211 won't know and won't actually abort those attempts, + * thus we need to do that ourselves. */ - ieee80211_send_deauth_disassoc(sdata, - ifmgd->auth_data->bss->bssid, + ieee80211_send_deauth_disassoc(sdata, bssid, IEEE80211_STYPE_DEAUTH, WLAN_REASON_DEAUTH_LEAVING, false, frame_buf); - ieee80211_destroy_auth_data(sdata, false); + if (ifmgd->assoc_data) + ieee80211_destroy_assoc_data(sdata, false); + if (ifmgd->auth_data) + ieee80211_destroy_auth_data(sdata, false); cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); } @@ -3654,6 +3707,8 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) INIT_WORK(&ifmgd->csa_connection_drop_work, ieee80211_csa_connection_drop_work); INIT_WORK(&ifmgd->request_smps_work, ieee80211_request_smps_mgd_work); + INIT_DELAYED_WORK(&ifmgd->tdls_peer_del_work, + ieee80211_tdls_peer_del_work); setup_timer(&ifmgd->timer, ieee80211_sta_timer, (unsigned long) sdata); setup_timer(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer, @@ -3701,7 +3756,7 @@ int ieee80211_max_network_latency(struct notifier_block *nb, ieee80211_recalc_ps(local, latency_usec); mutex_unlock(&local->iflist_mtx); - return 0; + return NOTIFY_OK; } static u8 ieee80211_ht_vht_rx_chains(struct ieee80211_sub_if_data *sdata, @@ -4517,6 +4572,7 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) cancel_work_sync(&ifmgd->request_smps_work); cancel_work_sync(&ifmgd->csa_connection_drop_work); cancel_work_sync(&ifmgd->chswitch_work); + cancel_delayed_work_sync(&ifmgd->tdls_peer_del_work); sdata_lock(sdata); if (ifmgd->assoc_data) { diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 6fb38558a5e6..ff20b2ebdb30 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -119,7 +119,8 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local) * before sending nullfunc to enable powersave at the AP. */ ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, - IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL); + IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL, + false); ieee80211_flush_queues(local, NULL); mutex_lock(&local->iflist_mtx); @@ -182,7 +183,8 @@ void ieee80211_offchannel_return(struct ieee80211_local *local) mutex_unlock(&local->iflist_mtx); ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, - IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL); + IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL, + false); } void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc) @@ -333,7 +335,7 @@ void ieee80211_sw_roc_work(struct work_struct *work) container_of(work, struct ieee80211_roc_work, work.work); struct ieee80211_sub_if_data *sdata = roc->sdata; struct ieee80211_local *local = sdata->local; - bool started; + bool started, on_channel; mutex_lock(&local->mtx); @@ -354,14 +356,26 @@ void ieee80211_sw_roc_work(struct work_struct *work) if (!roc->started) { struct ieee80211_roc_work *dep; - /* start this ROC */ - ieee80211_offchannel_stop_vifs(local); + WARN_ON(local->use_chanctx); + + /* If actually operating on the desired channel (with at least + * 20 MHz channel width) don't stop all the operations but still + * treat it as though the ROC operation started properly, so + * other ROC operations won't interfere with this one. + */ + roc->on_channel = roc->chan == local->_oper_chandef.chan && + local->_oper_chandef.width != NL80211_CHAN_WIDTH_5 && + local->_oper_chandef.width != NL80211_CHAN_WIDTH_10; - /* switch channel etc */ + /* start this ROC */ ieee80211_recalc_idle(local); - local->tmp_channel = roc->chan; - ieee80211_hw_config(local, 0); + if (!roc->on_channel) { + ieee80211_offchannel_stop_vifs(local); + + local->tmp_channel = roc->chan; + ieee80211_hw_config(local, 0); + } /* tell userspace or send frame */ ieee80211_handle_roc_started(roc); @@ -380,9 +394,10 @@ void ieee80211_sw_roc_work(struct work_struct *work) finish: list_del(&roc->list); started = roc->started; + on_channel = roc->on_channel; ieee80211_roc_notify_destroy(roc, !roc->abort); - if (started) { + if (started && !on_channel) { ieee80211_flush_queues(local, NULL); local->tmp_channel = NULL; diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index d478b880a0af..4c5192e0d66c 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -35,7 +35,8 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) ieee80211_stop_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, - IEEE80211_QUEUE_STOP_REASON_SUSPEND); + IEEE80211_QUEUE_STOP_REASON_SUSPEND, + false); /* flush out all packets */ synchronize_net(); @@ -74,7 +75,8 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) } ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, - IEEE80211_QUEUE_STOP_REASON_SUSPEND); + IEEE80211_QUEUE_STOP_REASON_SUSPEND, + false); return err; } else if (err > 0) { WARN_ON(err != 1); diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 9aa2a1190a86..18babe302832 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -143,19 +143,6 @@ void rate_control_deinitialize(struct ieee80211_local *local); /* Rate control algorithms */ -#ifdef CONFIG_MAC80211_RC_PID -int rc80211_pid_init(void); -void rc80211_pid_exit(void); -#else -static inline int rc80211_pid_init(void) -{ - return 0; -} -static inline void rc80211_pid_exit(void) -{ -} -#endif - #ifdef CONFIG_MAC80211_RC_MINSTREL int rc80211_minstrel_init(void); void rc80211_minstrel_exit(void); diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 26fd94fa0aed..1c1469c36dca 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -657,6 +657,17 @@ minstrel_free(void *priv) kfree(priv); } +static u32 minstrel_get_expected_throughput(void *priv_sta) +{ + struct minstrel_sta_info *mi = priv_sta; + int idx = mi->max_tp_rate[0]; + + /* convert pkt per sec in kbps (1200 is the average pkt size used for + * computing cur_tp + */ + return MINSTREL_TRUNC(mi->r[idx].cur_tp) * 1200 * 8 / 1024; +} + const struct rate_control_ops mac80211_minstrel = { .name = "minstrel", .tx_status = minstrel_tx_status, @@ -670,6 +681,7 @@ const struct rate_control_ops mac80211_minstrel = { .add_sta_debugfs = minstrel_add_sta_debugfs, .remove_sta_debugfs = minstrel_remove_sta_debugfs, #endif + .get_expected_throughput = minstrel_get_expected_throughput, }; int __init diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index bccaf854a309..85c1e74b7714 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -22,7 +22,7 @@ #define MCS_NBITS (AVG_PKT_SIZE << 3) /* Number of symbols for a packet with (bps) bits per symbol */ -#define MCS_NSYMS(bps) ((MCS_NBITS + (bps) - 1) / (bps)) +#define MCS_NSYMS(bps) DIV_ROUND_UP(MCS_NBITS, (bps)) /* Transmission time (nanoseconds) for a packet containing (syms) symbols */ #define MCS_SYMBOL_TIME(sgi, syms) \ @@ -226,8 +226,9 @@ minstrel_ht_calc_tp(struct minstrel_ht_sta *mi, int group, int rate) nsecs = 1000 * mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len); nsecs += minstrel_mcs_groups[group].duration[rate]; - tp = 1000000 * ((prob * 1000) / nsecs); + /* prob is scaled - see MINSTREL_FRAC above */ + tp = 1000000 * ((prob * 1000) / nsecs); mr->cur_tp = MINSTREL_TRUNC(tp); } @@ -1031,6 +1032,22 @@ minstrel_ht_free(void *priv) mac80211_minstrel.free(priv); } +static u32 minstrel_ht_get_expected_throughput(void *priv_sta) +{ + struct minstrel_ht_sta_priv *msp = priv_sta; + struct minstrel_ht_sta *mi = &msp->ht; + int i, j; + + if (!msp->is_ht) + return mac80211_minstrel.get_expected_throughput(priv_sta); + + i = mi->max_tp_rate / MCS_GROUP_RATES; + j = mi->max_tp_rate % MCS_GROUP_RATES; + + /* convert cur_tp from pkt per second in kbps */ + return mi->groups[i].rates[j].cur_tp * AVG_PKT_SIZE * 8 / 1024; +} + static const struct rate_control_ops mac80211_minstrel_ht = { .name = "minstrel_ht", .tx_status = minstrel_ht_tx_status, @@ -1045,6 +1062,7 @@ static const struct rate_control_ops mac80211_minstrel_ht = { .add_sta_debugfs = minstrel_ht_add_sta_debugfs, .remove_sta_debugfs = minstrel_ht_remove_sta_debugfs, #endif + .get_expected_throughput = minstrel_ht_get_expected_throughput, }; diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h deleted file mode 100644 index 19111c7bf454..000000000000 --- a/net/mac80211/rc80211_pid.h +++ /dev/null @@ -1,278 +0,0 @@ -/* - * Copyright 2007, Mattias Nissler <mattias.nissler@gmx.de> - * Copyright 2007, Stefano Brivio <stefano.brivio@polimi.it> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef RC80211_PID_H -#define RC80211_PID_H - -/* Sampling period for measuring percentage of failed frames in ms. */ -#define RC_PID_INTERVAL 125 - -/* Exponential averaging smoothness (used for I part of PID controller) */ -#define RC_PID_SMOOTHING_SHIFT 3 -#define RC_PID_SMOOTHING (1 << RC_PID_SMOOTHING_SHIFT) - -/* Sharpening factor (used for D part of PID controller) */ -#define RC_PID_SHARPENING_FACTOR 0 -#define RC_PID_SHARPENING_DURATION 0 - -/* Fixed point arithmetic shifting amount. */ -#define RC_PID_ARITH_SHIFT 8 - -/* Proportional PID component coefficient. */ -#define RC_PID_COEFF_P 15 -/* Integral PID component coefficient. */ -#define RC_PID_COEFF_I 9 -/* Derivative PID component coefficient. */ -#define RC_PID_COEFF_D 15 - -/* Target failed frames rate for the PID controller. NB: This effectively gives - * maximum failed frames percentage we're willing to accept. If the wireless - * link quality is good, the controller will fail to adjust failed frames - * percentage to the target. This is intentional. - */ -#define RC_PID_TARGET_PF 14 - -/* Rate behaviour normalization quantity over time. */ -#define RC_PID_NORM_OFFSET 3 - -/* Push high rates right after loading. */ -#define RC_PID_FAST_START 0 - -/* Arithmetic right shift for positive and negative values for ISO C. */ -#define RC_PID_DO_ARITH_RIGHT_SHIFT(x, y) \ - ((x) < 0 ? -((-(x)) >> (y)) : (x) >> (y)) - -enum rc_pid_event_type { - RC_PID_EVENT_TYPE_TX_STATUS, - RC_PID_EVENT_TYPE_RATE_CHANGE, - RC_PID_EVENT_TYPE_TX_RATE, - RC_PID_EVENT_TYPE_PF_SAMPLE, -}; - -union rc_pid_event_data { - /* RC_PID_EVENT_TX_STATUS */ - struct { - u32 flags; - struct ieee80211_tx_info tx_status; - }; - /* RC_PID_EVENT_TYPE_RATE_CHANGE */ - /* RC_PID_EVENT_TYPE_TX_RATE */ - struct { - int index; - int rate; - }; - /* RC_PID_EVENT_TYPE_PF_SAMPLE */ - struct { - s32 pf_sample; - s32 prop_err; - s32 int_err; - s32 der_err; - }; -}; - -struct rc_pid_event { - /* The time when the event occurred */ - unsigned long timestamp; - - /* Event ID number */ - unsigned int id; - - /* Type of event */ - enum rc_pid_event_type type; - - /* type specific data */ - union rc_pid_event_data data; -}; - -/* Size of the event ring buffer. */ -#define RC_PID_EVENT_RING_SIZE 32 - -struct rc_pid_event_buffer { - /* Counter that generates event IDs */ - unsigned int ev_count; - - /* Ring buffer of events */ - struct rc_pid_event ring[RC_PID_EVENT_RING_SIZE]; - - /* Index to the entry in events_buf to be reused */ - unsigned int next_entry; - - /* Lock that guards against concurrent access to this buffer struct */ - spinlock_t lock; - - /* Wait queue for poll/select and blocking I/O */ - wait_queue_head_t waitqueue; -}; - -struct rc_pid_events_file_info { - /* The event buffer we read */ - struct rc_pid_event_buffer *events; - - /* The entry we have should read next */ - unsigned int next_entry; -}; - -/** - * struct rc_pid_debugfs_entries - tunable parameters - * - * Algorithm parameters, tunable via debugfs. - * @target: target percentage for failed frames - * @sampling_period: error sampling interval in milliseconds - * @coeff_p: absolute value of the proportional coefficient - * @coeff_i: absolute value of the integral coefficient - * @coeff_d: absolute value of the derivative coefficient - * @smoothing_shift: absolute value of the integral smoothing factor (i.e. - * amount of smoothing introduced by the exponential moving average) - * @sharpen_factor: absolute value of the derivative sharpening factor (i.e. - * amount of emphasis given to the derivative term after low activity - * events) - * @sharpen_duration: duration of the sharpening effect after the detected low - * activity event, relative to sampling_period - * @norm_offset: amount of normalization periodically performed on the learnt - * rate behaviour values (lower means we should trust more what we learnt - * about behaviour of rates, higher means we should trust more the natural - * ordering of rates) - */ -struct rc_pid_debugfs_entries { - struct dentry *target; - struct dentry *sampling_period; - struct dentry *coeff_p; - struct dentry *coeff_i; - struct dentry *coeff_d; - struct dentry *smoothing_shift; - struct dentry *sharpen_factor; - struct dentry *sharpen_duration; - struct dentry *norm_offset; -}; - -void rate_control_pid_event_tx_status(struct rc_pid_event_buffer *buf, - struct ieee80211_tx_info *stat); - -void rate_control_pid_event_rate_change(struct rc_pid_event_buffer *buf, - int index, int rate); - -void rate_control_pid_event_tx_rate(struct rc_pid_event_buffer *buf, - int index, int rate); - -void rate_control_pid_event_pf_sample(struct rc_pid_event_buffer *buf, - s32 pf_sample, s32 prop_err, - s32 int_err, s32 der_err); - -void rate_control_pid_add_sta_debugfs(void *priv, void *priv_sta, - struct dentry *dir); - -void rate_control_pid_remove_sta_debugfs(void *priv, void *priv_sta); - -struct rc_pid_sta_info { - unsigned long last_change; - unsigned long last_sample; - - u32 tx_num_failed; - u32 tx_num_xmit; - - int txrate_idx; - - /* Average failed frames percentage error (i.e. actual vs. target - * percentage), scaled by RC_PID_SMOOTHING. This value is computed - * using using an exponential weighted average technique: - * - * (RC_PID_SMOOTHING - 1) * err_avg_old + err - * err_avg = ------------------------------------------ - * RC_PID_SMOOTHING - * - * where err_avg is the new approximation, err_avg_old the previous one - * and err is the error w.r.t. to the current failed frames percentage - * sample. Note that the bigger RC_PID_SMOOTHING the more weight is - * given to the previous estimate, resulting in smoother behavior (i.e. - * corresponding to a longer integration window). - * - * For computation, we actually don't use the above formula, but this - * one: - * - * err_avg_scaled = err_avg_old_scaled - err_avg_old + err - * - * where: - * err_avg_scaled = err * RC_PID_SMOOTHING - * err_avg_old_scaled = err_avg_old * RC_PID_SMOOTHING - * - * This avoids floating point numbers and the per_failed_old value can - * easily be obtained by shifting per_failed_old_scaled right by - * RC_PID_SMOOTHING_SHIFT. - */ - s32 err_avg_sc; - - /* Last framed failes percentage sample. */ - u32 last_pf; - - /* Sharpening needed. */ - u8 sharp_cnt; - -#ifdef CONFIG_MAC80211_DEBUGFS - /* Event buffer */ - struct rc_pid_event_buffer events; - - /* Events debugfs file entry */ - struct dentry *events_entry; -#endif -}; - -/* Algorithm parameters. We keep them on a per-algorithm approach, so they can - * be tuned individually for each interface. - */ -struct rc_pid_rateinfo { - - /* Map sorted rates to rates in ieee80211_hw_mode. */ - int index; - - /* Map rates in ieee80211_hw_mode to sorted rates. */ - int rev_index; - - /* Did we do any measurement on this rate? */ - bool valid; - - /* Comparison with the lowest rate. */ - int diff; -}; - -struct rc_pid_info { - - /* The failed frames percentage target. */ - unsigned int target; - - /* Rate at which failed frames percentage is sampled in 0.001s. */ - unsigned int sampling_period; - - /* P, I and D coefficients. */ - int coeff_p; - int coeff_i; - int coeff_d; - - /* Exponential averaging shift. */ - unsigned int smoothing_shift; - - /* Sharpening factor and duration. */ - unsigned int sharpen_factor; - unsigned int sharpen_duration; - - /* Normalization offset. */ - unsigned int norm_offset; - - /* Rates information. */ - struct rc_pid_rateinfo *rinfo; - - /* Index of the last used rate. */ - int oldrate; - -#ifdef CONFIG_MAC80211_DEBUGFS - /* Debugfs entries created for the parameters above. */ - struct rc_pid_debugfs_entries dentries; -#endif -}; - -#endif /* RC80211_PID_H */ diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c deleted file mode 100644 index d0da2a70fe68..000000000000 --- a/net/mac80211/rc80211_pid_algo.c +++ /dev/null @@ -1,478 +0,0 @@ -/* - * Copyright 2002-2005, Instant802 Networks, Inc. - * Copyright 2005, Devicescape Software, Inc. - * Copyright 2007, Mattias Nissler <mattias.nissler@gmx.de> - * Copyright 2007-2008, Stefano Brivio <stefano.brivio@polimi.it> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/netdevice.h> -#include <linux/types.h> -#include <linux/skbuff.h> -#include <linux/debugfs.h> -#include <linux/slab.h> -#include <net/mac80211.h> -#include "rate.h" -#include "mesh.h" -#include "rc80211_pid.h" - - -/* This is an implementation of a TX rate control algorithm that uses a PID - * controller. Given a target failed frames rate, the controller decides about - * TX rate changes to meet the target failed frames rate. - * - * The controller basically computes the following: - * - * adj = CP * err + CI * err_avg + CD * (err - last_err) * (1 + sharpening) - * - * where - * adj adjustment value that is used to switch TX rate (see below) - * err current error: target vs. current failed frames percentage - * last_err last error - * err_avg average (i.e. poor man's integral) of recent errors - * sharpening non-zero when fast response is needed (i.e. right after - * association or no frames sent for a long time), heading - * to zero over time - * CP Proportional coefficient - * CI Integral coefficient - * CD Derivative coefficient - * - * CP, CI, CD are subject to careful tuning. - * - * The integral component uses a exponential moving average approach instead of - * an actual sliding window. The advantage is that we don't need to keep an - * array of the last N error values and computation is easier. - * - * Once we have the adj value, we map it to a rate by means of a learning - * algorithm. This algorithm keeps the state of the percentual failed frames - * difference between rates. The behaviour of the lowest available rate is kept - * as a reference value, and every time we switch between two rates, we compute - * the difference between the failed frames each rate exhibited. By doing so, - * we compare behaviours which different rates exhibited in adjacent timeslices, - * thus the comparison is minimally affected by external conditions. This - * difference gets propagated to the whole set of measurements, so that the - * reference is always the same. Periodically, we normalize this set so that - * recent events weigh the most. By comparing the adj value with this set, we - * avoid pejorative switches to lower rates and allow for switches to higher - * rates if they behaved well. - * - * Note that for the computations we use a fixed-point representation to avoid - * floating point arithmetic. Hence, all values are shifted left by - * RC_PID_ARITH_SHIFT. - */ - - -/* Adjust the rate while ensuring that we won't switch to a lower rate if it - * exhibited a worse failed frames behaviour and we'll choose the highest rate - * whose failed frames behaviour is not worse than the one of the original rate - * target. While at it, check that the new rate is valid. */ -static void rate_control_pid_adjust_rate(struct ieee80211_supported_band *sband, - struct ieee80211_sta *sta, - struct rc_pid_sta_info *spinfo, int adj, - struct rc_pid_rateinfo *rinfo) -{ - int cur_sorted, new_sorted, probe, tmp, n_bitrates, band; - int cur = spinfo->txrate_idx; - - band = sband->band; - n_bitrates = sband->n_bitrates; - - /* Map passed arguments to sorted values. */ - cur_sorted = rinfo[cur].rev_index; - new_sorted = cur_sorted + adj; - - /* Check limits. */ - if (new_sorted < 0) - new_sorted = rinfo[0].rev_index; - else if (new_sorted >= n_bitrates) - new_sorted = rinfo[n_bitrates - 1].rev_index; - - tmp = new_sorted; - - if (adj < 0) { - /* Ensure that the rate decrease isn't disadvantageous. */ - for (probe = cur_sorted; probe >= new_sorted; probe--) - if (rinfo[probe].diff <= rinfo[cur_sorted].diff && - rate_supported(sta, band, rinfo[probe].index)) - tmp = probe; - } else { - /* Look for rate increase with zero (or below) cost. */ - for (probe = new_sorted + 1; probe < n_bitrates; probe++) - if (rinfo[probe].diff <= rinfo[new_sorted].diff && - rate_supported(sta, band, rinfo[probe].index)) - tmp = probe; - } - - /* Fit the rate found to the nearest supported rate. */ - do { - if (rate_supported(sta, band, rinfo[tmp].index)) { - spinfo->txrate_idx = rinfo[tmp].index; - break; - } - if (adj < 0) - tmp--; - else - tmp++; - } while (tmp < n_bitrates && tmp >= 0); - -#ifdef CONFIG_MAC80211_DEBUGFS - rate_control_pid_event_rate_change(&spinfo->events, - spinfo->txrate_idx, - sband->bitrates[spinfo->txrate_idx].bitrate); -#endif -} - -/* Normalize the failed frames per-rate differences. */ -static void rate_control_pid_normalize(struct rc_pid_info *pinfo, int l) -{ - int i, norm_offset = pinfo->norm_offset; - struct rc_pid_rateinfo *r = pinfo->rinfo; - - if (r[0].diff > norm_offset) - r[0].diff -= norm_offset; - else if (r[0].diff < -norm_offset) - r[0].diff += norm_offset; - for (i = 0; i < l - 1; i++) - if (r[i + 1].diff > r[i].diff + norm_offset) - r[i + 1].diff -= norm_offset; - else if (r[i + 1].diff <= r[i].diff) - r[i + 1].diff += norm_offset; -} - -static void rate_control_pid_sample(struct rc_pid_info *pinfo, - struct ieee80211_supported_band *sband, - struct ieee80211_sta *sta, - struct rc_pid_sta_info *spinfo) -{ - struct rc_pid_rateinfo *rinfo = pinfo->rinfo; - u32 pf; - s32 err_avg; - u32 err_prop; - u32 err_int; - u32 err_der; - int adj, i, j, tmp; - unsigned long period; - - /* In case nothing happened during the previous control interval, turn - * the sharpening factor on. */ - period = msecs_to_jiffies(pinfo->sampling_period); - if (jiffies - spinfo->last_sample > 2 * period) - spinfo->sharp_cnt = pinfo->sharpen_duration; - - spinfo->last_sample = jiffies; - - /* This should never happen, but in case, we assume the old sample is - * still a good measurement and copy it. */ - if (unlikely(spinfo->tx_num_xmit == 0)) - pf = spinfo->last_pf; - else - pf = spinfo->tx_num_failed * 100 / spinfo->tx_num_xmit; - - spinfo->tx_num_xmit = 0; - spinfo->tx_num_failed = 0; - - /* If we just switched rate, update the rate behaviour info. */ - if (pinfo->oldrate != spinfo->txrate_idx) { - - i = rinfo[pinfo->oldrate].rev_index; - j = rinfo[spinfo->txrate_idx].rev_index; - - tmp = (pf - spinfo->last_pf); - tmp = RC_PID_DO_ARITH_RIGHT_SHIFT(tmp, RC_PID_ARITH_SHIFT); - - rinfo[j].diff = rinfo[i].diff + tmp; - pinfo->oldrate = spinfo->txrate_idx; - } - rate_control_pid_normalize(pinfo, sband->n_bitrates); - - /* Compute the proportional, integral and derivative errors. */ - err_prop = (pinfo->target - pf) << RC_PID_ARITH_SHIFT; - - err_avg = spinfo->err_avg_sc >> pinfo->smoothing_shift; - spinfo->err_avg_sc = spinfo->err_avg_sc - err_avg + err_prop; - err_int = spinfo->err_avg_sc >> pinfo->smoothing_shift; - - err_der = (pf - spinfo->last_pf) * - (1 + pinfo->sharpen_factor * spinfo->sharp_cnt); - spinfo->last_pf = pf; - if (spinfo->sharp_cnt) - spinfo->sharp_cnt--; - -#ifdef CONFIG_MAC80211_DEBUGFS - rate_control_pid_event_pf_sample(&spinfo->events, pf, err_prop, err_int, - err_der); -#endif - - /* Compute the controller output. */ - adj = (err_prop * pinfo->coeff_p + err_int * pinfo->coeff_i - + err_der * pinfo->coeff_d); - adj = RC_PID_DO_ARITH_RIGHT_SHIFT(adj, 2 * RC_PID_ARITH_SHIFT); - - /* Change rate. */ - if (adj) - rate_control_pid_adjust_rate(sband, sta, spinfo, adj, rinfo); -} - -static void rate_control_pid_tx_status(void *priv, struct ieee80211_supported_band *sband, - struct ieee80211_sta *sta, void *priv_sta, - struct sk_buff *skb) -{ - struct rc_pid_info *pinfo = priv; - struct rc_pid_sta_info *spinfo = priv_sta; - unsigned long period; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - - if (!spinfo) - return; - - /* Ignore all frames that were sent with a different rate than the rate - * we currently advise mac80211 to use. */ - if (info->status.rates[0].idx != spinfo->txrate_idx) - return; - - spinfo->tx_num_xmit++; - -#ifdef CONFIG_MAC80211_DEBUGFS - rate_control_pid_event_tx_status(&spinfo->events, info); -#endif - - /* We count frames that totally failed to be transmitted as two bad - * frames, those that made it out but had some retries as one good and - * one bad frame. */ - if (!(info->flags & IEEE80211_TX_STAT_ACK)) { - spinfo->tx_num_failed += 2; - spinfo->tx_num_xmit++; - } else if (info->status.rates[0].count > 1) { - spinfo->tx_num_failed++; - spinfo->tx_num_xmit++; - } - - /* Update PID controller state. */ - period = msecs_to_jiffies(pinfo->sampling_period); - if (time_after(jiffies, spinfo->last_sample + period)) - rate_control_pid_sample(pinfo, sband, sta, spinfo); -} - -static void -rate_control_pid_get_rate(void *priv, struct ieee80211_sta *sta, - void *priv_sta, - struct ieee80211_tx_rate_control *txrc) -{ - struct sk_buff *skb = txrc->skb; - struct ieee80211_supported_band *sband = txrc->sband; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct rc_pid_sta_info *spinfo = priv_sta; - int rateidx; - - if (txrc->rts) - info->control.rates[0].count = - txrc->hw->conf.long_frame_max_tx_count; - else - info->control.rates[0].count = - txrc->hw->conf.short_frame_max_tx_count; - - /* Send management frames and NO_ACK data using lowest rate. */ - if (rate_control_send_low(sta, priv_sta, txrc)) - return; - - rateidx = spinfo->txrate_idx; - - if (rateidx >= sband->n_bitrates) - rateidx = sband->n_bitrates - 1; - - info->control.rates[0].idx = rateidx; - -#ifdef CONFIG_MAC80211_DEBUGFS - rate_control_pid_event_tx_rate(&spinfo->events, - rateidx, sband->bitrates[rateidx].bitrate); -#endif -} - -static void -rate_control_pid_rate_init(void *priv, struct ieee80211_supported_band *sband, - struct cfg80211_chan_def *chandef, - struct ieee80211_sta *sta, void *priv_sta) -{ - struct rc_pid_sta_info *spinfo = priv_sta; - struct rc_pid_info *pinfo = priv; - struct rc_pid_rateinfo *rinfo = pinfo->rinfo; - int i, j, tmp; - bool s; - - /* TODO: This routine should consider using RSSI from previous packets - * as we need to have IEEE 802.1X auth succeed immediately after assoc.. - * Until that method is implemented, we will use the lowest supported - * rate as a workaround. */ - - /* Sort the rates. This is optimized for the most common case (i.e. - * almost-sorted CCK+OFDM rates). Kind of bubble-sort with reversed - * mapping too. */ - for (i = 0; i < sband->n_bitrates; i++) { - rinfo[i].index = i; - rinfo[i].rev_index = i; - if (RC_PID_FAST_START) - rinfo[i].diff = 0; - else - rinfo[i].diff = i * pinfo->norm_offset; - } - for (i = 1; i < sband->n_bitrates; i++) { - s = false; - for (j = 0; j < sband->n_bitrates - i; j++) - if (unlikely(sband->bitrates[rinfo[j].index].bitrate > - sband->bitrates[rinfo[j + 1].index].bitrate)) { - tmp = rinfo[j].index; - rinfo[j].index = rinfo[j + 1].index; - rinfo[j + 1].index = tmp; - rinfo[rinfo[j].index].rev_index = j; - rinfo[rinfo[j + 1].index].rev_index = j + 1; - s = true; - } - if (!s) - break; - } - - spinfo->txrate_idx = rate_lowest_index(sband, sta); -} - -static void *rate_control_pid_alloc(struct ieee80211_hw *hw, - struct dentry *debugfsdir) -{ - struct rc_pid_info *pinfo; - struct rc_pid_rateinfo *rinfo; - struct ieee80211_supported_band *sband; - int i, max_rates = 0; -#ifdef CONFIG_MAC80211_DEBUGFS - struct rc_pid_debugfs_entries *de; -#endif - - pinfo = kmalloc(sizeof(*pinfo), GFP_ATOMIC); - if (!pinfo) - return NULL; - - for (i = 0; i < IEEE80211_NUM_BANDS; i++) { - sband = hw->wiphy->bands[i]; - if (sband && sband->n_bitrates > max_rates) - max_rates = sband->n_bitrates; - } - - rinfo = kmalloc(sizeof(*rinfo) * max_rates, GFP_ATOMIC); - if (!rinfo) { - kfree(pinfo); - return NULL; - } - - pinfo->target = RC_PID_TARGET_PF; - pinfo->sampling_period = RC_PID_INTERVAL; - pinfo->coeff_p = RC_PID_COEFF_P; - pinfo->coeff_i = RC_PID_COEFF_I; - pinfo->coeff_d = RC_PID_COEFF_D; - pinfo->smoothing_shift = RC_PID_SMOOTHING_SHIFT; - pinfo->sharpen_factor = RC_PID_SHARPENING_FACTOR; - pinfo->sharpen_duration = RC_PID_SHARPENING_DURATION; - pinfo->norm_offset = RC_PID_NORM_OFFSET; - pinfo->rinfo = rinfo; - pinfo->oldrate = 0; - -#ifdef CONFIG_MAC80211_DEBUGFS - de = &pinfo->dentries; - de->target = debugfs_create_u32("target_pf", S_IRUSR | S_IWUSR, - debugfsdir, &pinfo->target); - de->sampling_period = debugfs_create_u32("sampling_period", - S_IRUSR | S_IWUSR, debugfsdir, - &pinfo->sampling_period); - de->coeff_p = debugfs_create_u32("coeff_p", S_IRUSR | S_IWUSR, - debugfsdir, (u32 *)&pinfo->coeff_p); - de->coeff_i = debugfs_create_u32("coeff_i", S_IRUSR | S_IWUSR, - debugfsdir, (u32 *)&pinfo->coeff_i); - de->coeff_d = debugfs_create_u32("coeff_d", S_IRUSR | S_IWUSR, - debugfsdir, (u32 *)&pinfo->coeff_d); - de->smoothing_shift = debugfs_create_u32("smoothing_shift", - S_IRUSR | S_IWUSR, debugfsdir, - &pinfo->smoothing_shift); - de->sharpen_factor = debugfs_create_u32("sharpen_factor", - S_IRUSR | S_IWUSR, debugfsdir, - &pinfo->sharpen_factor); - de->sharpen_duration = debugfs_create_u32("sharpen_duration", - S_IRUSR | S_IWUSR, debugfsdir, - &pinfo->sharpen_duration); - de->norm_offset = debugfs_create_u32("norm_offset", - S_IRUSR | S_IWUSR, debugfsdir, - &pinfo->norm_offset); -#endif - - return pinfo; -} - -static void rate_control_pid_free(void *priv) -{ - struct rc_pid_info *pinfo = priv; -#ifdef CONFIG_MAC80211_DEBUGFS - struct rc_pid_debugfs_entries *de = &pinfo->dentries; - - debugfs_remove(de->norm_offset); - debugfs_remove(de->sharpen_duration); - debugfs_remove(de->sharpen_factor); - debugfs_remove(de->smoothing_shift); - debugfs_remove(de->coeff_d); - debugfs_remove(de->coeff_i); - debugfs_remove(de->coeff_p); - debugfs_remove(de->sampling_period); - debugfs_remove(de->target); -#endif - - kfree(pinfo->rinfo); - kfree(pinfo); -} - -static void *rate_control_pid_alloc_sta(void *priv, struct ieee80211_sta *sta, - gfp_t gfp) -{ - struct rc_pid_sta_info *spinfo; - - spinfo = kzalloc(sizeof(*spinfo), gfp); - if (spinfo == NULL) - return NULL; - - spinfo->last_sample = jiffies; - -#ifdef CONFIG_MAC80211_DEBUGFS - spin_lock_init(&spinfo->events.lock); - init_waitqueue_head(&spinfo->events.waitqueue); -#endif - - return spinfo; -} - -static void rate_control_pid_free_sta(void *priv, struct ieee80211_sta *sta, - void *priv_sta) -{ - kfree(priv_sta); -} - -static const struct rate_control_ops mac80211_rcpid = { - .name = "pid", - .tx_status = rate_control_pid_tx_status, - .get_rate = rate_control_pid_get_rate, - .rate_init = rate_control_pid_rate_init, - .alloc = rate_control_pid_alloc, - .free = rate_control_pid_free, - .alloc_sta = rate_control_pid_alloc_sta, - .free_sta = rate_control_pid_free_sta, -#ifdef CONFIG_MAC80211_DEBUGFS - .add_sta_debugfs = rate_control_pid_add_sta_debugfs, - .remove_sta_debugfs = rate_control_pid_remove_sta_debugfs, -#endif -}; - -int __init rc80211_pid_init(void) -{ - return ieee80211_rate_control_register(&mac80211_rcpid); -} - -void rc80211_pid_exit(void) -{ - ieee80211_rate_control_unregister(&mac80211_rcpid); -} diff --git a/net/mac80211/rc80211_pid_debugfs.c b/net/mac80211/rc80211_pid_debugfs.c deleted file mode 100644 index 6ff134650a84..000000000000 --- a/net/mac80211/rc80211_pid_debugfs.c +++ /dev/null @@ -1,228 +0,0 @@ -/* - * Copyright 2007, Mattias Nissler <mattias.nissler@gmx.de> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/sched.h> -#include <linux/spinlock.h> -#include <linux/poll.h> -#include <linux/netdevice.h> -#include <linux/types.h> -#include <linux/skbuff.h> -#include <linux/slab.h> -#include <linux/export.h> - -#include <net/mac80211.h> -#include "rate.h" - -#include "rc80211_pid.h" - -static void rate_control_pid_event(struct rc_pid_event_buffer *buf, - enum rc_pid_event_type type, - union rc_pid_event_data *data) -{ - struct rc_pid_event *ev; - unsigned long status; - - spin_lock_irqsave(&buf->lock, status); - ev = &(buf->ring[buf->next_entry]); - buf->next_entry = (buf->next_entry + 1) % RC_PID_EVENT_RING_SIZE; - - ev->timestamp = jiffies; - ev->id = buf->ev_count++; - ev->type = type; - ev->data = *data; - - spin_unlock_irqrestore(&buf->lock, status); - - wake_up_all(&buf->waitqueue); -} - -void rate_control_pid_event_tx_status(struct rc_pid_event_buffer *buf, - struct ieee80211_tx_info *stat) -{ - union rc_pid_event_data evd; - - evd.flags = stat->flags; - memcpy(&evd.tx_status, stat, sizeof(struct ieee80211_tx_info)); - rate_control_pid_event(buf, RC_PID_EVENT_TYPE_TX_STATUS, &evd); -} - -void rate_control_pid_event_rate_change(struct rc_pid_event_buffer *buf, - int index, int rate) -{ - union rc_pid_event_data evd; - - evd.index = index; - evd.rate = rate; - rate_control_pid_event(buf, RC_PID_EVENT_TYPE_RATE_CHANGE, &evd); -} - -void rate_control_pid_event_tx_rate(struct rc_pid_event_buffer *buf, - int index, int rate) -{ - union rc_pid_event_data evd; - - evd.index = index; - evd.rate = rate; - rate_control_pid_event(buf, RC_PID_EVENT_TYPE_TX_RATE, &evd); -} - -void rate_control_pid_event_pf_sample(struct rc_pid_event_buffer *buf, - s32 pf_sample, s32 prop_err, - s32 int_err, s32 der_err) -{ - union rc_pid_event_data evd; - - evd.pf_sample = pf_sample; - evd.prop_err = prop_err; - evd.int_err = int_err; - evd.der_err = der_err; - rate_control_pid_event(buf, RC_PID_EVENT_TYPE_PF_SAMPLE, &evd); -} - -static int rate_control_pid_events_open(struct inode *inode, struct file *file) -{ - struct rc_pid_sta_info *sinfo = inode->i_private; - struct rc_pid_event_buffer *events = &sinfo->events; - struct rc_pid_events_file_info *file_info; - unsigned long status; - - /* Allocate a state struct */ - file_info = kmalloc(sizeof(*file_info), GFP_KERNEL); - if (file_info == NULL) - return -ENOMEM; - - spin_lock_irqsave(&events->lock, status); - - file_info->next_entry = events->next_entry; - file_info->events = events; - - spin_unlock_irqrestore(&events->lock, status); - - file->private_data = file_info; - - return 0; -} - -static int rate_control_pid_events_release(struct inode *inode, - struct file *file) -{ - struct rc_pid_events_file_info *file_info = file->private_data; - - kfree(file_info); - - return 0; -} - -static unsigned int rate_control_pid_events_poll(struct file *file, - poll_table *wait) -{ - struct rc_pid_events_file_info *file_info = file->private_data; - - poll_wait(file, &file_info->events->waitqueue, wait); - - return POLLIN | POLLRDNORM; -} - -#define RC_PID_PRINT_BUF_SIZE 64 - -static ssize_t rate_control_pid_events_read(struct file *file, char __user *buf, - size_t length, loff_t *offset) -{ - struct rc_pid_events_file_info *file_info = file->private_data; - struct rc_pid_event_buffer *events = file_info->events; - struct rc_pid_event *ev; - char pb[RC_PID_PRINT_BUF_SIZE]; - int ret; - int p; - unsigned long status; - - /* Check if there is something to read. */ - if (events->next_entry == file_info->next_entry) { - if (file->f_flags & O_NONBLOCK) - return -EAGAIN; - - /* Wait */ - ret = wait_event_interruptible(events->waitqueue, - events->next_entry != file_info->next_entry); - - if (ret) - return ret; - } - - /* Write out one event per call. I don't care whether it's a little - * inefficient, this is debugging code anyway. */ - spin_lock_irqsave(&events->lock, status); - - /* Get an event */ - ev = &(events->ring[file_info->next_entry]); - file_info->next_entry = (file_info->next_entry + 1) % - RC_PID_EVENT_RING_SIZE; - - /* Print information about the event. Note that userspace needs to - * provide large enough buffers. */ - length = length < RC_PID_PRINT_BUF_SIZE ? - length : RC_PID_PRINT_BUF_SIZE; - p = scnprintf(pb, length, "%u %lu ", ev->id, ev->timestamp); - switch (ev->type) { - case RC_PID_EVENT_TYPE_TX_STATUS: - p += scnprintf(pb + p, length - p, "tx_status %u %u", - !(ev->data.flags & IEEE80211_TX_STAT_ACK), - ev->data.tx_status.status.rates[0].idx); - break; - case RC_PID_EVENT_TYPE_RATE_CHANGE: - p += scnprintf(pb + p, length - p, "rate_change %d %d", - ev->data.index, ev->data.rate); - break; - case RC_PID_EVENT_TYPE_TX_RATE: - p += scnprintf(pb + p, length - p, "tx_rate %d %d", - ev->data.index, ev->data.rate); - break; - case RC_PID_EVENT_TYPE_PF_SAMPLE: - p += scnprintf(pb + p, length - p, - "pf_sample %d %d %d %d", - ev->data.pf_sample, ev->data.prop_err, - ev->data.int_err, ev->data.der_err); - break; - } - p += scnprintf(pb + p, length - p, "\n"); - - spin_unlock_irqrestore(&events->lock, status); - - if (copy_to_user(buf, pb, p)) - return -EFAULT; - - return p; -} - -#undef RC_PID_PRINT_BUF_SIZE - -static const struct file_operations rc_pid_fop_events = { - .owner = THIS_MODULE, - .read = rate_control_pid_events_read, - .poll = rate_control_pid_events_poll, - .open = rate_control_pid_events_open, - .release = rate_control_pid_events_release, - .llseek = noop_llseek, -}; - -void rate_control_pid_add_sta_debugfs(void *priv, void *priv_sta, - struct dentry *dir) -{ - struct rc_pid_sta_info *spinfo = priv_sta; - - spinfo->events_entry = debugfs_create_file("rc_pid_events", S_IRUGO, - dir, spinfo, - &rc_pid_fop_events); -} - -void rate_control_pid_remove_sta_debugfs(void *priv, void *priv_sta) -{ - struct rc_pid_sta_info *spinfo = priv_sta; - - debugfs_remove(spinfo->events_entry); -} diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 216c45b949e5..bd2c9b22c945 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -54,24 +54,25 @@ static struct sk_buff *remove_monitor_info(struct ieee80211_local *local, return skb; } -static inline int should_drop_frame(struct sk_buff *skb, int present_fcs_len) +static inline bool should_drop_frame(struct sk_buff *skb, int present_fcs_len) { struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); - struct ieee80211_hdr *hdr; - - hdr = (void *)(skb->data); + struct ieee80211_hdr *hdr = (void *)skb->data; if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC | RX_FLAG_AMPDU_IS_ZEROLEN)) - return 1; + return true; + if (unlikely(skb->len < 16 + present_fcs_len)) - return 1; + return true; + if (ieee80211_is_ctl(hdr->frame_control) && !ieee80211_is_pspoll(hdr->frame_control) && !ieee80211_is_back_req(hdr->frame_control)) - return 1; - return 0; + return true; + + return false; } static int @@ -687,20 +688,27 @@ static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata, int index, struct sk_buff_head *frames) { - struct sk_buff *skb = tid_agg_rx->reorder_buf[index]; + struct sk_buff_head *skb_list = &tid_agg_rx->reorder_buf[index]; + struct sk_buff *skb; struct ieee80211_rx_status *status; lockdep_assert_held(&tid_agg_rx->reorder_lock); - if (!skb) + if (skb_queue_empty(skb_list)) goto no_frame; - /* release the frame from the reorder ring buffer */ + if (!ieee80211_rx_reorder_ready(skb_list)) { + __skb_queue_purge(skb_list); + goto no_frame; + } + + /* release frames from the reorder ring buffer */ tid_agg_rx->stored_mpdu_num--; - tid_agg_rx->reorder_buf[index] = NULL; - status = IEEE80211_SKB_RXCB(skb); - status->rx_flags |= IEEE80211_RX_DEFERRED_RELEASE; - __skb_queue_tail(frames, skb); + while ((skb = __skb_dequeue(skb_list))) { + status = IEEE80211_SKB_RXCB(skb); + status->rx_flags |= IEEE80211_RX_DEFERRED_RELEASE; + __skb_queue_tail(frames, skb); + } no_frame: tid_agg_rx->head_seq_num = ieee80211_sn_inc(tid_agg_rx->head_seq_num); @@ -737,13 +745,13 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata, struct tid_ampdu_rx *tid_agg_rx, struct sk_buff_head *frames) { - int index, j; + int index, i, j; lockdep_assert_held(&tid_agg_rx->reorder_lock); /* release the buffer until next missing frame */ index = tid_agg_rx->head_seq_num % tid_agg_rx->buf_size; - if (!tid_agg_rx->reorder_buf[index] && + if (!ieee80211_rx_reorder_ready(&tid_agg_rx->reorder_buf[index]) && tid_agg_rx->stored_mpdu_num) { /* * No buffers ready to be released, but check whether any @@ -752,7 +760,8 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata, int skipped = 1; for (j = (index + 1) % tid_agg_rx->buf_size; j != index; j = (j + 1) % tid_agg_rx->buf_size) { - if (!tid_agg_rx->reorder_buf[j]) { + if (!ieee80211_rx_reorder_ready( + &tid_agg_rx->reorder_buf[j])) { skipped++; continue; } @@ -761,6 +770,11 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata, HT_RX_REORDER_BUF_TIMEOUT)) goto set_release_timer; + /* don't leave incomplete A-MSDUs around */ + for (i = (index + 1) % tid_agg_rx->buf_size; i != j; + i = (i + 1) % tid_agg_rx->buf_size) + __skb_queue_purge(&tid_agg_rx->reorder_buf[i]); + ht_dbg_ratelimited(sdata, "release an RX reorder frame due to timeout on earlier frames\n"); ieee80211_release_reorder_frame(sdata, tid_agg_rx, j, @@ -774,7 +788,8 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata, skipped) & IEEE80211_SN_MASK; skipped = 0; } - } else while (tid_agg_rx->reorder_buf[index]) { + } else while (ieee80211_rx_reorder_ready( + &tid_agg_rx->reorder_buf[index])) { ieee80211_release_reorder_frame(sdata, tid_agg_rx, index, frames); index = tid_agg_rx->head_seq_num % tid_agg_rx->buf_size; @@ -785,7 +800,8 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata, for (; j != (index - 1) % tid_agg_rx->buf_size; j = (j + 1) % tid_agg_rx->buf_size) { - if (tid_agg_rx->reorder_buf[j]) + if (ieee80211_rx_reorder_ready( + &tid_agg_rx->reorder_buf[j])) break; } @@ -810,6 +826,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata struct sk_buff_head *frames) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); u16 sc = le16_to_cpu(hdr->seq_ctrl); u16 mpdu_seq_num = (sc & IEEE80211_SCTL_SEQ) >> 4; u16 head_seq_num, buf_size; @@ -844,7 +861,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata index = mpdu_seq_num % tid_agg_rx->buf_size; /* check if we already stored this frame */ - if (tid_agg_rx->reorder_buf[index]) { + if (ieee80211_rx_reorder_ready(&tid_agg_rx->reorder_buf[index])) { dev_kfree_skb(skb); goto out; } @@ -857,17 +874,20 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata */ if (mpdu_seq_num == tid_agg_rx->head_seq_num && tid_agg_rx->stored_mpdu_num == 0) { - tid_agg_rx->head_seq_num = - ieee80211_sn_inc(tid_agg_rx->head_seq_num); + if (!(status->flag & RX_FLAG_AMSDU_MORE)) + tid_agg_rx->head_seq_num = + ieee80211_sn_inc(tid_agg_rx->head_seq_num); ret = false; goto out; } /* put the frame in the reordering buffer */ - tid_agg_rx->reorder_buf[index] = skb; - tid_agg_rx->reorder_time[index] = jiffies; - tid_agg_rx->stored_mpdu_num++; - ieee80211_sta_reorder_release(sdata, tid_agg_rx, frames); + __skb_queue_tail(&tid_agg_rx->reorder_buf[index], skb); + if (!(status->flag & RX_FLAG_AMSDU_MORE)) { + tid_agg_rx->reorder_time[index] = jiffies; + tid_agg_rx->stored_mpdu_num++; + ieee80211_sta_reorder_release(sdata, tid_agg_rx, frames); + } out: spin_unlock(&tid_agg_rx->reorder_lock); @@ -1106,6 +1126,8 @@ static void sta_ps_end(struct sta_info *sta) return; } + set_sta_flag(sta, WLAN_STA_PS_DELIVER); + clear_sta_flag(sta, WLAN_STA_PS_STA); ieee80211_sta_ps_deliver_wakeup(sta); } @@ -1231,7 +1253,8 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) if (ether_addr_equal(bssid, rx->sdata->u.ibss.bssid) && test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { sta->last_rx = jiffies; - if (ieee80211_is_data(hdr->frame_control)) { + if (ieee80211_is_data(hdr->frame_control) && + !is_multicast_ether_addr(hdr->addr1)) { sta->last_rx_rate_idx = status->rate_idx; sta->last_rx_rate_flag = status->flag; sta->last_rx_rate_vht_flag = status->vht_flag; @@ -3125,6 +3148,14 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx, if (!ieee80211_is_beacon(hdr->frame_control)) return false; status->rx_flags &= ~IEEE80211_RX_RA_MATCH; + } else if (!ieee80211_has_tods(hdr->frame_control)) { + /* ignore data frames to TDLS-peers */ + if (ieee80211_is_data(hdr->frame_control)) + return false; + /* ignore action frames to TDLS-peers */ + if (ieee80211_is_action(hdr->frame_control) && + !ether_addr_equal(bssid, hdr->addr1)) + return false; } break; case NL80211_IFTYPE_WDS: @@ -3190,7 +3221,7 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx, } /* - * This is the actual Rx frames handler. as it blongs to Rx path it must + * This is the actual Rx frames handler. as it belongs to Rx path it must * be called with rcu_read_lock protection. */ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 3ce7f2c8539a..a0a938145dcc 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -235,38 +235,51 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local) { struct cfg80211_scan_request *req = local->scan_req; struct cfg80211_chan_def chandef; - enum ieee80211_band band; + u8 bands_used = 0; int i, ielen, n_chans; if (test_bit(SCAN_HW_CANCELLED, &local->scanning)) return false; - do { - if (local->hw_scan_band == IEEE80211_NUM_BANDS) - return false; - - band = local->hw_scan_band; - n_chans = 0; + if (local->hw.flags & IEEE80211_SINGLE_HW_SCAN_ON_ALL_BANDS) { for (i = 0; i < req->n_channels; i++) { - if (req->channels[i]->band == band) { - local->hw_scan_req->channels[n_chans] = + local->hw_scan_req->req.channels[i] = req->channels[i]; + bands_used |= BIT(req->channels[i]->band); + } + + n_chans = req->n_channels; + } else { + do { + if (local->hw_scan_band == IEEE80211_NUM_BANDS) + return false; + + n_chans = 0; + + for (i = 0; i < req->n_channels; i++) { + if (req->channels[i]->band != + local->hw_scan_band) + continue; + local->hw_scan_req->req.channels[n_chans] = req->channels[i]; n_chans++; + bands_used |= BIT(req->channels[i]->band); } - } - local->hw_scan_band++; - } while (!n_chans); + local->hw_scan_band++; + } while (!n_chans); + } - local->hw_scan_req->n_channels = n_chans; + local->hw_scan_req->req.n_channels = n_chans; ieee80211_prepare_scan_chandef(&chandef, req->scan_width); - ielen = ieee80211_build_preq_ies(local, (u8 *)local->hw_scan_req->ie, + ielen = ieee80211_build_preq_ies(local, + (u8 *)local->hw_scan_req->req.ie, local->hw_scan_ies_bufsize, - req->ie, req->ie_len, band, - req->rates[band], &chandef); - local->hw_scan_req->ie_len = ielen; - local->hw_scan_req->no_cck = req->no_cck; + &local->hw_scan_req->ies, + req->ie, req->ie_len, + bands_used, req->rates, &chandef); + local->hw_scan_req->req.ie_len = ielen; + local->hw_scan_req->req.no_cck = req->no_cck; return true; } @@ -291,7 +304,9 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) if (WARN_ON(!local->scan_req)) return; - if (hw_scan && !aborted && ieee80211_prep_hw_scan(local)) { + if (hw_scan && !aborted && + !(local->hw.flags & IEEE80211_SINGLE_HW_SCAN_ON_ALL_BANDS) && + ieee80211_prep_hw_scan(local)) { int rc; rc = drv_hw_scan(local, @@ -309,7 +324,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) if (local->scan_req != local->int_scan_req) cfg80211_scan_done(local->scan_req, aborted); local->scan_req = NULL; - rcu_assign_pointer(local->scan_sdata, NULL); + RCU_INIT_POINTER(local->scan_sdata, NULL); local->scanning = 0; local->scan_chandef.chan = NULL; @@ -473,6 +488,21 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, u8 *ies; local->hw_scan_ies_bufsize = local->scan_ies_len + req->ie_len; + + if (local->hw.flags & IEEE80211_SINGLE_HW_SCAN_ON_ALL_BANDS) { + int i, n_bands = 0; + u8 bands_counted = 0; + + for (i = 0; i < req->n_channels; i++) { + if (bands_counted & BIT(req->channels[i]->band)) + continue; + bands_counted |= BIT(req->channels[i]->band); + n_bands++; + } + + local->hw_scan_ies_bufsize *= n_bands; + } + local->hw_scan_req = kmalloc( sizeof(*local->hw_scan_req) + req->n_channels * sizeof(req->channels[0]) + @@ -480,13 +510,13 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, if (!local->hw_scan_req) return -ENOMEM; - local->hw_scan_req->ssids = req->ssids; - local->hw_scan_req->n_ssids = req->n_ssids; + local->hw_scan_req->req.ssids = req->ssids; + local->hw_scan_req->req.n_ssids = req->n_ssids; ies = (u8 *)local->hw_scan_req + sizeof(*local->hw_scan_req) + req->n_channels * sizeof(req->channels[0]); - local->hw_scan_req->ie = ies; - local->hw_scan_req->flags = req->flags; + local->hw_scan_req->req.ie = ies; + local->hw_scan_req->req.flags = req->flags; local->hw_scan_band = 0; @@ -559,7 +589,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, ieee80211_recalc_idle(local); local->scan_req = NULL; - rcu_assign_pointer(local->scan_sdata, NULL); + RCU_INIT_POINTER(local->scan_sdata, NULL); } return rc; @@ -773,7 +803,7 @@ void ieee80211_scan_work(struct work_struct *work) int rc; local->scan_req = NULL; - rcu_assign_pointer(local->scan_sdata, NULL); + RCU_INIT_POINTER(local->scan_sdata, NULL); rc = __ieee80211_start_scan(sdata, req); if (rc) { @@ -973,9 +1003,13 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, struct cfg80211_sched_scan_request *req) { struct ieee80211_local *local = sdata->local; - struct ieee80211_sched_scan_ies sched_scan_ies = {}; + struct ieee80211_scan_ies sched_scan_ies = {}; struct cfg80211_chan_def chandef; - int ret, i, iebufsz; + int ret, i, iebufsz, num_bands = 0; + u32 rate_masks[IEEE80211_NUM_BANDS] = {}; + u8 bands_used = 0; + u8 *ie; + size_t len; iebufsz = local->scan_ies_len + req->ie_len; @@ -985,36 +1019,38 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, return -ENOTSUPP; for (i = 0; i < IEEE80211_NUM_BANDS; i++) { - if (!local->hw.wiphy->bands[i]) - continue; - - sched_scan_ies.ie[i] = kzalloc(iebufsz, GFP_KERNEL); - if (!sched_scan_ies.ie[i]) { - ret = -ENOMEM; - goto out_free; + if (local->hw.wiphy->bands[i]) { + bands_used |= BIT(i); + rate_masks[i] = (u32) -1; + num_bands++; } + } - ieee80211_prepare_scan_chandef(&chandef, req->scan_width); - - sched_scan_ies.len[i] = - ieee80211_build_preq_ies(local, sched_scan_ies.ie[i], - iebufsz, req->ie, req->ie_len, - i, (u32) -1, &chandef); + ie = kzalloc(num_bands * iebufsz, GFP_KERNEL); + if (!ie) { + ret = -ENOMEM; + goto out; } + ieee80211_prepare_scan_chandef(&chandef, req->scan_width); + + len = ieee80211_build_preq_ies(local, ie, num_bands * iebufsz, + &sched_scan_ies, req->ie, + req->ie_len, bands_used, + rate_masks, &chandef); + ret = drv_sched_scan_start(local, sdata, req, &sched_scan_ies); if (ret == 0) { rcu_assign_pointer(local->sched_scan_sdata, sdata); local->sched_scan_req = req; } -out_free: - while (i > 0) - kfree(sched_scan_ies.ie[--i]); + kfree(ie); +out: if (ret) { /* Clean in case of failure after HW restart or upon resume. */ - rcu_assign_pointer(local->sched_scan_sdata, NULL); + RCU_INIT_POINTER(local->sched_scan_sdata, NULL); local->sched_scan_req = NULL; } @@ -1076,12 +1112,8 @@ void ieee80211_sched_scan_results(struct ieee80211_hw *hw) } EXPORT_SYMBOL(ieee80211_sched_scan_results); -void ieee80211_sched_scan_stopped_work(struct work_struct *work) +void ieee80211_sched_scan_end(struct ieee80211_local *local) { - struct ieee80211_local *local = - container_of(work, struct ieee80211_local, - sched_scan_stopped_work); - mutex_lock(&local->mtx); if (!rcu_access_pointer(local->sched_scan_sdata)) { @@ -1089,7 +1121,7 @@ void ieee80211_sched_scan_stopped_work(struct work_struct *work) return; } - rcu_assign_pointer(local->sched_scan_sdata, NULL); + RCU_INIT_POINTER(local->sched_scan_sdata, NULL); /* If sched scan was aborted by the driver. */ local->sched_scan_req = NULL; @@ -1099,6 +1131,15 @@ void ieee80211_sched_scan_stopped_work(struct work_struct *work) cfg80211_sched_scan_stopped(local->hw.wiphy); } +void ieee80211_sched_scan_stopped_work(struct work_struct *work) +{ + struct ieee80211_local *local = + container_of(work, struct ieee80211_local, + sched_scan_stopped_work); + + ieee80211_sched_scan_end(local); +} + void ieee80211_sched_scan_stopped(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 137a192e64bc..c6ee2139fbc5 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -100,7 +100,8 @@ static void __cleanup_single_sta(struct sta_info *sta) struct ps_data *ps; if (test_sta_flag(sta, WLAN_STA_PS_STA) || - test_sta_flag(sta, WLAN_STA_PS_DRIVER)) { + test_sta_flag(sta, WLAN_STA_PS_DRIVER) || + test_sta_flag(sta, WLAN_STA_PS_DELIVER)) { if (sta->sdata->vif.type == NL80211_IFTYPE_AP || sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) ps = &sdata->bss->ps; @@ -111,6 +112,7 @@ static void __cleanup_single_sta(struct sta_info *sta) clear_sta_flag(sta, WLAN_STA_PS_STA); clear_sta_flag(sta, WLAN_STA_PS_DRIVER); + clear_sta_flag(sta, WLAN_STA_PS_DELIVER); atomic_dec(&ps->num_sta_ps); sta_info_recalc_tim(sta); @@ -125,7 +127,7 @@ static void __cleanup_single_sta(struct sta_info *sta) if (ieee80211_vif_is_mesh(&sdata->vif)) mesh_sta_cleanup(sta); - cancel_work_sync(&sta->drv_unblock_wk); + cancel_work_sync(&sta->drv_deliver_wk); /* * Destroy aggregation state here. It would be nice to wait for the @@ -240,6 +242,7 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) sta_dbg(sta->sdata, "Destroyed STA %pM\n", sta->sta.addr); + kfree(rcu_dereference_raw(sta->sta.rates)); kfree(sta); } @@ -252,33 +255,23 @@ static void sta_info_hash_add(struct ieee80211_local *local, rcu_assign_pointer(local->sta_hash[STA_HASH(sta->sta.addr)], sta); } -static void sta_unblock(struct work_struct *wk) +static void sta_deliver_ps_frames(struct work_struct *wk) { struct sta_info *sta; - sta = container_of(wk, struct sta_info, drv_unblock_wk); + sta = container_of(wk, struct sta_info, drv_deliver_wk); if (sta->dead) return; - if (!test_sta_flag(sta, WLAN_STA_PS_STA)) { - local_bh_disable(); + local_bh_disable(); + if (!test_sta_flag(sta, WLAN_STA_PS_STA)) ieee80211_sta_ps_deliver_wakeup(sta); - local_bh_enable(); - } else if (test_and_clear_sta_flag(sta, WLAN_STA_PSPOLL)) { - clear_sta_flag(sta, WLAN_STA_PS_DRIVER); - - local_bh_disable(); + else if (test_and_clear_sta_flag(sta, WLAN_STA_PSPOLL)) ieee80211_sta_ps_deliver_poll_response(sta); - local_bh_enable(); - } else if (test_and_clear_sta_flag(sta, WLAN_STA_UAPSD)) { - clear_sta_flag(sta, WLAN_STA_PS_DRIVER); - - local_bh_disable(); + else if (test_and_clear_sta_flag(sta, WLAN_STA_UAPSD)) ieee80211_sta_ps_deliver_uapsd(sta); - local_bh_enable(); - } else - clear_sta_flag(sta, WLAN_STA_PS_DRIVER); + local_bh_enable(); } static int sta_prepare_rate_control(struct ieee80211_local *local, @@ -340,7 +333,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, spin_lock_init(&sta->lock); spin_lock_init(&sta->ps_lock); - INIT_WORK(&sta->drv_unblock_wk, sta_unblock); + INIT_WORK(&sta->drv_deliver_wk, sta_deliver_ps_frames); INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work); mutex_init(&sta->ampdu_mlme.mtx); #ifdef CONFIG_MAC80211_MESH @@ -357,7 +350,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->sta_state = IEEE80211_STA_NONE; - do_posix_clock_monotonic_gettime(&uptime); + ktime_get_ts(&uptime); sta->last_connected = uptime.tv_sec; ewma_init(&sta->avg_signal, 1024, 8); for (i = 0; i < ARRAY_SIZE(sta->chain_signal_avg); i++) @@ -552,7 +545,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU) { struct ieee80211_local *local = sta->local; - int err = 0; + int err; might_sleep(); @@ -570,7 +563,6 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU) return 0; out_free: - BUG_ON(!err); sta_info_free(local, sta); return err; } @@ -1141,14 +1133,22 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) } ieee80211_add_pending_skbs(local, &pending); - clear_sta_flag(sta, WLAN_STA_PS_DRIVER); - clear_sta_flag(sta, WLAN_STA_PS_STA); + + /* now we're no longer in the deliver code */ + clear_sta_flag(sta, WLAN_STA_PS_DELIVER); + + /* The station might have polled and then woken up before we responded, + * so clear these flags now to avoid them sticking around. + */ + clear_sta_flag(sta, WLAN_STA_PSPOLL); + clear_sta_flag(sta, WLAN_STA_UAPSD); spin_unlock(&sta->ps_lock); atomic_dec(&ps->num_sta_ps); /* This station just woke up and isn't aware of our SMPS state */ - if (!ieee80211_smps_is_restrictive(sta->known_smps_mode, + if (!ieee80211_vif_is_mesh(&sdata->vif) && + !ieee80211_smps_is_restrictive(sta->known_smps_mode, sdata->smps_mode) && sta->known_smps_mode != sdata->bss->req_smps && sta_info_tx_streams(sta) != 1) { @@ -1542,10 +1542,26 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw, trace_api_sta_block_awake(sta->local, pubsta, block); - if (block) + if (block) { set_sta_flag(sta, WLAN_STA_PS_DRIVER); - else if (test_sta_flag(sta, WLAN_STA_PS_DRIVER)) - ieee80211_queue_work(hw, &sta->drv_unblock_wk); + return; + } + + if (!test_sta_flag(sta, WLAN_STA_PS_DRIVER)) + return; + + if (!test_sta_flag(sta, WLAN_STA_PS_STA)) { + set_sta_flag(sta, WLAN_STA_PS_DELIVER); + clear_sta_flag(sta, WLAN_STA_PS_DRIVER); + ieee80211_queue_work(hw, &sta->drv_deliver_wk); + } else if (test_sta_flag(sta, WLAN_STA_PSPOLL) || + test_sta_flag(sta, WLAN_STA_UAPSD)) { + /* must be asleep in this case */ + clear_sta_flag(sta, WLAN_STA_PS_DRIVER); + ieee80211_queue_work(hw, &sta->drv_deliver_wk); + } else { + clear_sta_flag(sta, WLAN_STA_PS_DRIVER); + } } EXPORT_SYMBOL(ieee80211_sta_block_awake); @@ -1703,3 +1719,140 @@ u8 sta_info_tx_streams(struct sta_info *sta) return ((ht_cap->mcs.tx_params & IEEE80211_HT_MCS_TX_MAX_STREAMS_MASK) >> IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT) + 1; } + +void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; + struct rate_control_ref *ref = NULL; + struct timespec uptime; + u64 packets = 0; + u32 thr = 0; + int i, ac; + + if (test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) + ref = local->rate_ctrl; + + sinfo->generation = sdata->local->sta_generation; + + sinfo->filled = STATION_INFO_INACTIVE_TIME | + STATION_INFO_RX_BYTES64 | + STATION_INFO_TX_BYTES64 | + STATION_INFO_RX_PACKETS | + STATION_INFO_TX_PACKETS | + STATION_INFO_TX_RETRIES | + STATION_INFO_TX_FAILED | + STATION_INFO_TX_BITRATE | + STATION_INFO_RX_BITRATE | + STATION_INFO_RX_DROP_MISC | + STATION_INFO_BSS_PARAM | + STATION_INFO_CONNECTED_TIME | + STATION_INFO_STA_FLAGS | + STATION_INFO_BEACON_LOSS_COUNT; + + ktime_get_ts(&uptime); + sinfo->connected_time = uptime.tv_sec - sta->last_connected; + + sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx); + sinfo->tx_bytes = 0; + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + sinfo->tx_bytes += sta->tx_bytes[ac]; + packets += sta->tx_packets[ac]; + } + sinfo->tx_packets = packets; + sinfo->rx_bytes = sta->rx_bytes; + sinfo->rx_packets = sta->rx_packets; + sinfo->tx_retries = sta->tx_retry_count; + sinfo->tx_failed = sta->tx_retry_failed; + sinfo->rx_dropped_misc = sta->rx_dropped; + sinfo->beacon_loss_count = sta->beacon_loss_count; + + if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) || + (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) { + sinfo->filled |= STATION_INFO_SIGNAL | STATION_INFO_SIGNAL_AVG; + if (!local->ops->get_rssi || + drv_get_rssi(local, sdata, &sta->sta, &sinfo->signal)) + sinfo->signal = (s8)sta->last_signal; + sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal); + } + if (sta->chains) { + sinfo->filled |= STATION_INFO_CHAIN_SIGNAL | + STATION_INFO_CHAIN_SIGNAL_AVG; + + sinfo->chains = sta->chains; + for (i = 0; i < ARRAY_SIZE(sinfo->chain_signal); i++) { + sinfo->chain_signal[i] = sta->chain_signal_last[i]; + sinfo->chain_signal_avg[i] = + (s8) -ewma_read(&sta->chain_signal_avg[i]); + } + } + + sta_set_rate_info_tx(sta, &sta->last_tx_rate, &sinfo->txrate); + sta_set_rate_info_rx(sta, &sinfo->rxrate); + + if (ieee80211_vif_is_mesh(&sdata->vif)) { +#ifdef CONFIG_MAC80211_MESH + sinfo->filled |= STATION_INFO_LLID | + STATION_INFO_PLID | + STATION_INFO_PLINK_STATE | + STATION_INFO_LOCAL_PM | + STATION_INFO_PEER_PM | + STATION_INFO_NONPEER_PM; + + sinfo->llid = sta->llid; + sinfo->plid = sta->plid; + sinfo->plink_state = sta->plink_state; + if (test_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN)) { + sinfo->filled |= STATION_INFO_T_OFFSET; + sinfo->t_offset = sta->t_offset; + } + sinfo->local_pm = sta->local_pm; + sinfo->peer_pm = sta->peer_pm; + sinfo->nonpeer_pm = sta->nonpeer_pm; +#endif + } + + sinfo->bss_param.flags = 0; + if (sdata->vif.bss_conf.use_cts_prot) + sinfo->bss_param.flags |= BSS_PARAM_FLAGS_CTS_PROT; + if (sdata->vif.bss_conf.use_short_preamble) + sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_PREAMBLE; + if (sdata->vif.bss_conf.use_short_slot) + sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_SLOT_TIME; + sinfo->bss_param.dtim_period = sdata->local->hw.conf.ps_dtim_period; + sinfo->bss_param.beacon_interval = sdata->vif.bss_conf.beacon_int; + + sinfo->sta_flags.set = 0; + sinfo->sta_flags.mask = BIT(NL80211_STA_FLAG_AUTHORIZED) | + BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) | + BIT(NL80211_STA_FLAG_WME) | + BIT(NL80211_STA_FLAG_MFP) | + BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED) | + BIT(NL80211_STA_FLAG_TDLS_PEER); + if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHORIZED); + if (test_sta_flag(sta, WLAN_STA_SHORT_PREAMBLE)) + sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_SHORT_PREAMBLE); + if (test_sta_flag(sta, WLAN_STA_WME)) + sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_WME); + if (test_sta_flag(sta, WLAN_STA_MFP)) + sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_MFP); + if (test_sta_flag(sta, WLAN_STA_AUTH)) + sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHENTICATED); + if (test_sta_flag(sta, WLAN_STA_ASSOC)) + sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_ASSOCIATED); + if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) + sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER); + + /* check if the driver has a SW RC implementation */ + if (ref && ref->ops->get_expected_throughput) + thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv); + else + thr = drv_get_expected_throughput(local, &sta->sta); + + if (thr != 0) { + sinfo->filled |= STATION_INFO_EXPECTED_THROUGHPUT; + sinfo->expected_throughput = thr; + } +} diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 4acc5fc402fa..d411bcc8ef08 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -47,6 +47,8 @@ * @WLAN_STA_TDLS_PEER: Station is a TDLS peer. * @WLAN_STA_TDLS_PEER_AUTH: This TDLS peer is authorized to send direct * packets. This means the link is enabled. + * @WLAN_STA_TDLS_INITIATOR: We are the initiator of the TDLS link with this + * station. * @WLAN_STA_UAPSD: Station requested unscheduled SP while driver was * keeping station in power-save mode, reply when the driver * unblocks the station. @@ -58,6 +60,8 @@ * @WLAN_STA_TOFFSET_KNOWN: toffset calculated for this station is valid. * @WLAN_STA_MPSP_OWNER: local STA is owner of a mesh Peer Service Period. * @WLAN_STA_MPSP_RECIPIENT: local STA is recipient of a MPSP. + * @WLAN_STA_PS_DELIVER: station woke up, but we're still blocking TX + * until pending frames are delivered */ enum ieee80211_sta_info_flags { WLAN_STA_AUTH, @@ -74,6 +78,7 @@ enum ieee80211_sta_info_flags { WLAN_STA_PSPOLL, WLAN_STA_TDLS_PEER, WLAN_STA_TDLS_PEER_AUTH, + WLAN_STA_TDLS_INITIATOR, WLAN_STA_UAPSD, WLAN_STA_SP, WLAN_STA_4ADDR_EVENT, @@ -82,6 +87,7 @@ enum ieee80211_sta_info_flags { WLAN_STA_TOFFSET_KNOWN, WLAN_STA_MPSP_OWNER, WLAN_STA_MPSP_RECIPIENT, + WLAN_STA_PS_DELIVER, }; #define ADDBA_RESP_INTERVAL HZ @@ -149,7 +155,8 @@ struct tid_ampdu_tx { /** * struct tid_ampdu_rx - TID aggregation information (Rx). * - * @reorder_buf: buffer to reorder incoming aggregated MPDUs + * @reorder_buf: buffer to reorder incoming aggregated MPDUs. An MPDU may be an + * A-MSDU with individually reported subframes. * @reorder_time: jiffies when skb was added * @session_timer: check if peer keeps Tx-ing on the TID (by timeout value) * @reorder_timer: releases expired frames from the reorder buffer. @@ -174,7 +181,7 @@ struct tid_ampdu_tx { struct tid_ampdu_rx { struct rcu_head rcu_head; spinlock_t reorder_lock; - struct sk_buff **reorder_buf; + struct sk_buff_head *reorder_buf; unsigned long *reorder_time; struct timer_list session_timer; struct timer_list reorder_timer; @@ -265,7 +272,7 @@ struct ieee80211_tx_latency_stat { * @last_rx_rate_vht_nss: rx status nss of last data packet * @lock: used for locking all fields that require locking, see comments * in the header file. - * @drv_unblock_wk: used for driver PS unblocking + * @drv_deliver_wk: used for delivering frames after driver PS unblocking * @listen_interval: listen interval of this station, when we're acting as AP * @_flags: STA flags, see &enum ieee80211_sta_info_flags, do not use directly * @ps_lock: used for powersave (when mac80211 is the AP) related locking @@ -278,7 +285,6 @@ struct ieee80211_tx_latency_stat { * @driver_buffered_tids: bitmap of TIDs the driver has data buffered on * @rx_packets: Number of MSDUs received from this STA * @rx_bytes: Number of bytes received from this STA - * @wep_weak_iv_count: number of weak WEP IVs received from this station * @last_rx: time (in jiffies) when last frame was received from this STA * @last_connected: time (in seconds) when a station got connected * @num_duplicates: number of duplicate frames received from this STA @@ -303,7 +309,6 @@ struct ieee80211_tx_latency_stat { * @plid: Peer link ID * @reason: Cancel reason on PLINK_HOLDING state * @plink_retries: Retries in establishment - * @ignore_plink_timer: ignore the peer-link timer (used internally) * @plink_state: peer link state * @plink_timeout: timeout of peer link * @plink_timer: peer link watch timer @@ -345,7 +350,7 @@ struct sta_info { void *rate_ctrl_priv; spinlock_t lock; - struct work_struct drv_unblock_wk; + struct work_struct drv_deliver_wk; u16 listen_interval; @@ -367,7 +372,6 @@ struct sta_info { /* Updated from RX path only, no locking requirements */ unsigned long rx_packets; u64 rx_bytes; - unsigned long wep_weak_iv_count; unsigned long last_rx; long last_connected; unsigned long num_duplicates; @@ -418,7 +422,6 @@ struct sta_info { u16 plid; u16 reason; u8 plink_retries; - bool ignore_plink_timer; enum nl80211_plink_state plink_state; u32 plink_timeout; struct timer_list plink_timer; @@ -628,6 +631,8 @@ void sta_set_rate_info_tx(struct sta_info *sta, struct rate_info *rinfo); void sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo); +void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo); + void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, unsigned long exp_time); u8 sta_info_tx_streams(struct sta_info *sta); diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 00ba90b02ab2..aa06dcad336e 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -314,10 +314,9 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, !is_multicast_ether_addr(hdr->addr1)) txflags |= IEEE80211_RADIOTAP_F_TX_FAIL; - if ((info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) || - (info->status.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT)) + if (info->status.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT) txflags |= IEEE80211_RADIOTAP_F_TX_CTS; - else if (info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) + if (info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) txflags |= IEEE80211_RADIOTAP_F_TX_RTS; put_unaligned_le16(txflags, pos); @@ -474,8 +473,6 @@ static void ieee80211_tx_latency_end_msrmnt(struct ieee80211_local *local, struct sta_info *sta, struct ieee80211_hdr *hdr) { - ktime_t skb_dprt; - struct timespec dprt_time; u32 msrmnt; u16 tid; u8 *qc; @@ -507,9 +504,8 @@ static void ieee80211_tx_latency_end_msrmnt(struct ieee80211_local *local, tx_lat = &sta->tx_lat[tid]; - ktime_get_ts(&dprt_time); /* time stamp completion time */ - skb_dprt = ktime_set(dprt_time.tv_sec, dprt_time.tv_nsec); - msrmnt = ktime_to_ms(ktime_sub(skb_dprt, skb_arv)); + /* Calculate the latency */ + msrmnt = ktime_to_ms(ktime_sub(ktime_get(), skb_arv)); if (tx_lat->max < msrmnt) /* update stats */ tx_lat->max = msrmnt; @@ -542,6 +538,23 @@ static void ieee80211_tx_latency_end_msrmnt(struct ieee80211_local *local, */ #define STA_LOST_PKT_THRESHOLD 50 +static void ieee80211_lost_packet(struct sta_info *sta, struct sk_buff *skb) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + + /* This packet was aggregated but doesn't carry status info */ + if ((info->flags & IEEE80211_TX_CTL_AMPDU) && + !(info->flags & IEEE80211_TX_STAT_AMPDU)) + return; + + if (++sta->lost_packets < STA_LOST_PKT_THRESHOLD) + return; + + cfg80211_cqm_pktloss_notify(sta->sdata->dev, sta->sta.addr, + sta->lost_packets, GFP_ATOMIC); + sta->lost_packets = 0; +} + void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) { struct sk_buff *skb2; @@ -681,12 +694,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if (info->flags & IEEE80211_TX_STAT_ACK) { if (sta->lost_packets) sta->lost_packets = 0; - } else if (++sta->lost_packets >= STA_LOST_PKT_THRESHOLD) { - cfg80211_cqm_pktloss_notify(sta->sdata->dev, - sta->sta.addr, - sta->lost_packets, - GFP_ATOMIC); - sta->lost_packets = 0; + } else { + ieee80211_lost_packet(sta, skb); } } diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c new file mode 100644 index 000000000000..1b21050be174 --- /dev/null +++ b/net/mac80211/tdls.c @@ -0,0 +1,883 @@ +/* + * mac80211 TDLS handling code + * + * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> + * Copyright 2014, Intel Corporation + * + * This file is GPLv2 as found in COPYING. + */ + +#include <linux/ieee80211.h> +#include <linux/log2.h> +#include <net/cfg80211.h> +#include "ieee80211_i.h" +#include "driver-ops.h" + +/* give usermode some time for retries in setting up the TDLS session */ +#define TDLS_PEER_SETUP_TIMEOUT (15 * HZ) + +void ieee80211_tdls_peer_del_work(struct work_struct *wk) +{ + struct ieee80211_sub_if_data *sdata; + struct ieee80211_local *local; + + sdata = container_of(wk, struct ieee80211_sub_if_data, + u.mgd.tdls_peer_del_work.work); + local = sdata->local; + + mutex_lock(&local->mtx); + if (!is_zero_ether_addr(sdata->u.mgd.tdls_peer)) { + tdls_dbg(sdata, "TDLS del peer %pM\n", sdata->u.mgd.tdls_peer); + sta_info_destroy_addr(sdata, sdata->u.mgd.tdls_peer); + eth_zero_addr(sdata->u.mgd.tdls_peer); + } + mutex_unlock(&local->mtx); +} + +static void ieee80211_tdls_add_ext_capab(struct sk_buff *skb) +{ + u8 *pos = (void *)skb_put(skb, 7); + + *pos++ = WLAN_EID_EXT_CAPABILITY; + *pos++ = 5; /* len */ + *pos++ = 0x0; + *pos++ = 0x0; + *pos++ = 0x0; + *pos++ = 0x0; + *pos++ = WLAN_EXT_CAPA5_TDLS_ENABLED; +} + +static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata, + u16 status_code) +{ + struct ieee80211_local *local = sdata->local; + u16 capab; + + /* The capability will be 0 when sending a failure code */ + if (status_code != 0) + return 0; + + capab = 0; + if (ieee80211_get_sdata_band(sdata) != IEEE80211_BAND_2GHZ) + return capab; + + if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE)) + capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME; + if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE)) + capab |= WLAN_CAPABILITY_SHORT_PREAMBLE; + + return capab; +} + +static void ieee80211_tdls_add_link_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, const u8 *peer, + bool initiator) +{ + struct ieee80211_tdls_lnkie *lnkid; + const u8 *init_addr, *rsp_addr; + + if (initiator) { + init_addr = sdata->vif.addr; + rsp_addr = peer; + } else { + init_addr = peer; + rsp_addr = sdata->vif.addr; + } + + lnkid = (void *)skb_put(skb, sizeof(struct ieee80211_tdls_lnkie)); + + lnkid->ie_type = WLAN_EID_LINK_ID; + lnkid->ie_len = sizeof(struct ieee80211_tdls_lnkie) - 2; + + memcpy(lnkid->bssid, sdata->u.mgd.bssid, ETH_ALEN); + memcpy(lnkid->init_sta, init_addr, ETH_ALEN); + memcpy(lnkid->resp_sta, rsp_addr, ETH_ALEN); +} + +/* translate numbering in the WMM parameter IE to the mac80211 notation */ +static enum ieee80211_ac_numbers ieee80211_ac_from_wmm(int ac) +{ + switch (ac) { + default: + WARN_ON_ONCE(1); + case 0: + return IEEE80211_AC_BE; + case 1: + return IEEE80211_AC_BK; + case 2: + return IEEE80211_AC_VI; + case 3: + return IEEE80211_AC_VO; + } +} + +static u8 ieee80211_wmm_aci_aifsn(int aifsn, bool acm, int aci) +{ + u8 ret; + + ret = aifsn & 0x0f; + if (acm) + ret |= 0x10; + ret |= (aci << 5) & 0x60; + return ret; +} + +static u8 ieee80211_wmm_ecw(u16 cw_min, u16 cw_max) +{ + return ((ilog2(cw_min + 1) << 0x0) & 0x0f) | + ((ilog2(cw_max + 1) << 0x4) & 0xf0); +} + +static void ieee80211_tdls_add_wmm_param_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) +{ + struct ieee80211_wmm_param_ie *wmm; + struct ieee80211_tx_queue_params *txq; + int i; + + wmm = (void *)skb_put(skb, sizeof(*wmm)); + memset(wmm, 0, sizeof(*wmm)); + + wmm->element_id = WLAN_EID_VENDOR_SPECIFIC; + wmm->len = sizeof(*wmm) - 2; + + wmm->oui[0] = 0x00; /* Microsoft OUI 00:50:F2 */ + wmm->oui[1] = 0x50; + wmm->oui[2] = 0xf2; + wmm->oui_type = 2; /* WME */ + wmm->oui_subtype = 1; /* WME param */ + wmm->version = 1; /* WME ver */ + wmm->qos_info = 0; /* U-APSD not in use */ + + /* + * Use the EDCA parameters defined for the BSS, or default if the AP + * doesn't support it, as mandated by 802.11-2012 section 10.22.4 + */ + for (i = 0; i < IEEE80211_NUM_ACS; i++) { + txq = &sdata->tx_conf[ieee80211_ac_from_wmm(i)]; + wmm->ac[i].aci_aifsn = ieee80211_wmm_aci_aifsn(txq->aifs, + txq->acm, i); + wmm->ac[i].cw = ieee80211_wmm_ecw(txq->cw_min, txq->cw_max); + wmm->ac[i].txop_limit = cpu_to_le16(txq->txop); + } +} + +static void +ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, const u8 *peer, + u8 action_code, bool initiator, + const u8 *extra_ies, size_t extra_ies_len) +{ + enum ieee80211_band band = ieee80211_get_sdata_band(sdata); + struct ieee80211_local *local = sdata->local; + struct ieee80211_supported_band *sband; + struct ieee80211_sta_ht_cap ht_cap; + struct sta_info *sta = NULL; + size_t offset = 0, noffset; + u8 *pos; + + rcu_read_lock(); + + /* we should have the peer STA if we're already responding */ + if (action_code == WLAN_TDLS_SETUP_RESPONSE) { + sta = sta_info_get(sdata, peer); + if (WARN_ON_ONCE(!sta)) { + rcu_read_unlock(); + return; + } + } + + ieee80211_add_srates_ie(sdata, skb, false, band); + ieee80211_add_ext_srates_ie(sdata, skb, false, band); + + /* add any custom IEs that go before Extended Capabilities */ + if (extra_ies_len) { + static const u8 before_ext_cap[] = { + WLAN_EID_SUPP_RATES, + WLAN_EID_COUNTRY, + WLAN_EID_EXT_SUPP_RATES, + WLAN_EID_SUPPORTED_CHANNELS, + WLAN_EID_RSN, + }; + noffset = ieee80211_ie_split(extra_ies, extra_ies_len, + before_ext_cap, + ARRAY_SIZE(before_ext_cap), + offset); + pos = skb_put(skb, noffset - offset); + memcpy(pos, extra_ies + offset, noffset - offset); + offset = noffset; + } + + ieee80211_tdls_add_ext_capab(skb); + + /* add the QoS element if we support it */ + if (local->hw.queues >= IEEE80211_NUM_ACS && + action_code != WLAN_PUB_ACTION_TDLS_DISCOVER_RES) + ieee80211_add_wmm_info_ie(skb_put(skb, 9), 0); /* no U-APSD */ + + /* add any custom IEs that go before HT capabilities */ + if (extra_ies_len) { + static const u8 before_ht_cap[] = { + WLAN_EID_SUPP_RATES, + WLAN_EID_COUNTRY, + WLAN_EID_EXT_SUPP_RATES, + WLAN_EID_SUPPORTED_CHANNELS, + WLAN_EID_RSN, + WLAN_EID_EXT_CAPABILITY, + WLAN_EID_QOS_CAPA, + WLAN_EID_FAST_BSS_TRANSITION, + WLAN_EID_TIMEOUT_INTERVAL, + WLAN_EID_SUPPORTED_REGULATORY_CLASSES, + }; + noffset = ieee80211_ie_split(extra_ies, extra_ies_len, + before_ht_cap, + ARRAY_SIZE(before_ht_cap), + offset); + pos = skb_put(skb, noffset - offset); + memcpy(pos, extra_ies + offset, noffset - offset); + offset = noffset; + } + + /* + * with TDLS we can switch channels, and HT-caps are not necessarily + * the same on all bands. The specification limits the setup to a + * single HT-cap, so use the current band for now. + */ + sband = local->hw.wiphy->bands[band]; + memcpy(&ht_cap, &sband->ht_cap, sizeof(ht_cap)); + if ((action_code == WLAN_TDLS_SETUP_REQUEST || + action_code == WLAN_TDLS_SETUP_RESPONSE) && + ht_cap.ht_supported && (!sta || sta->sta.ht_cap.ht_supported)) { + if (action_code == WLAN_TDLS_SETUP_REQUEST) { + ieee80211_apply_htcap_overrides(sdata, &ht_cap); + + /* disable SMPS in TDLS initiator */ + ht_cap.cap |= (WLAN_HT_CAP_SM_PS_DISABLED + << IEEE80211_HT_CAP_SM_PS_SHIFT); + } else { + /* disable SMPS in TDLS responder */ + sta->sta.ht_cap.cap |= + (WLAN_HT_CAP_SM_PS_DISABLED + << IEEE80211_HT_CAP_SM_PS_SHIFT); + + /* the peer caps are already intersected with our own */ + memcpy(&ht_cap, &sta->sta.ht_cap, sizeof(ht_cap)); + } + + pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2); + ieee80211_ie_build_ht_cap(pos, &ht_cap, ht_cap.cap); + } + + rcu_read_unlock(); + + /* add any remaining IEs */ + if (extra_ies_len) { + noffset = extra_ies_len; + pos = skb_put(skb, noffset - offset); + memcpy(pos, extra_ies + offset, noffset - offset); + } + + ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator); +} + +static void +ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, const u8 *peer, + bool initiator, const u8 *extra_ies, + size_t extra_ies_len) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + size_t offset = 0, noffset; + struct sta_info *sta, *ap_sta; + u8 *pos; + + rcu_read_lock(); + + sta = sta_info_get(sdata, peer); + ap_sta = sta_info_get(sdata, ifmgd->bssid); + if (WARN_ON_ONCE(!sta || !ap_sta)) { + rcu_read_unlock(); + return; + } + + /* add any custom IEs that go before the QoS IE */ + if (extra_ies_len) { + static const u8 before_qos[] = { + WLAN_EID_RSN, + }; + noffset = ieee80211_ie_split(extra_ies, extra_ies_len, + before_qos, + ARRAY_SIZE(before_qos), + offset); + pos = skb_put(skb, noffset - offset); + memcpy(pos, extra_ies + offset, noffset - offset); + offset = noffset; + } + + /* add the QoS param IE if both the peer and we support it */ + if (local->hw.queues >= IEEE80211_NUM_ACS && + test_sta_flag(sta, WLAN_STA_WME)) + ieee80211_tdls_add_wmm_param_ie(sdata, skb); + + /* add any custom IEs that go before HT operation */ + if (extra_ies_len) { + static const u8 before_ht_op[] = { + WLAN_EID_RSN, + WLAN_EID_QOS_CAPA, + WLAN_EID_FAST_BSS_TRANSITION, + WLAN_EID_TIMEOUT_INTERVAL, + }; + noffset = ieee80211_ie_split(extra_ies, extra_ies_len, + before_ht_op, + ARRAY_SIZE(before_ht_op), + offset); + pos = skb_put(skb, noffset - offset); + memcpy(pos, extra_ies + offset, noffset - offset); + offset = noffset; + } + + /* if HT support is only added in TDLS, we need an HT-operation IE */ + if (!ap_sta->sta.ht_cap.ht_supported && sta->sta.ht_cap.ht_supported) { + struct ieee80211_chanctx_conf *chanctx_conf = + rcu_dereference(sdata->vif.chanctx_conf); + if (!WARN_ON(!chanctx_conf)) { + pos = skb_put(skb, 2 + + sizeof(struct ieee80211_ht_operation)); + /* send an empty HT operation IE */ + ieee80211_ie_build_ht_oper(pos, &sta->sta.ht_cap, + &chanctx_conf->def, 0); + } + } + + rcu_read_unlock(); + + /* add any remaining IEs */ + if (extra_ies_len) { + noffset = extra_ies_len; + pos = skb_put(skb, noffset - offset); + memcpy(pos, extra_ies + offset, noffset - offset); + } + + ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator); +} + +static void ieee80211_tdls_add_ies(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, const u8 *peer, + u8 action_code, u16 status_code, + bool initiator, const u8 *extra_ies, + size_t extra_ies_len) +{ + switch (action_code) { + case WLAN_TDLS_SETUP_REQUEST: + case WLAN_TDLS_SETUP_RESPONSE: + case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: + if (status_code == 0) + ieee80211_tdls_add_setup_start_ies(sdata, skb, peer, + action_code, + initiator, + extra_ies, + extra_ies_len); + break; + case WLAN_TDLS_SETUP_CONFIRM: + if (status_code == 0) + ieee80211_tdls_add_setup_cfm_ies(sdata, skb, peer, + initiator, extra_ies, + extra_ies_len); + break; + case WLAN_TDLS_TEARDOWN: + case WLAN_TDLS_DISCOVERY_REQUEST: + if (extra_ies_len) + memcpy(skb_put(skb, extra_ies_len), extra_ies, + extra_ies_len); + if (status_code == 0 || action_code == WLAN_TDLS_TEARDOWN) + ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator); + break; + } + +} + +static int +ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev, + const u8 *peer, u8 action_code, u8 dialog_token, + u16 status_code, struct sk_buff *skb) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_tdls_data *tf; + + tf = (void *)skb_put(skb, offsetof(struct ieee80211_tdls_data, u)); + + memcpy(tf->da, peer, ETH_ALEN); + memcpy(tf->sa, sdata->vif.addr, ETH_ALEN); + tf->ether_type = cpu_to_be16(ETH_P_TDLS); + tf->payload_type = WLAN_TDLS_SNAP_RFTYPE; + + switch (action_code) { + case WLAN_TDLS_SETUP_REQUEST: + tf->category = WLAN_CATEGORY_TDLS; + tf->action_code = WLAN_TDLS_SETUP_REQUEST; + + skb_put(skb, sizeof(tf->u.setup_req)); + tf->u.setup_req.dialog_token = dialog_token; + tf->u.setup_req.capability = + cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata, + status_code)); + break; + case WLAN_TDLS_SETUP_RESPONSE: + tf->category = WLAN_CATEGORY_TDLS; + tf->action_code = WLAN_TDLS_SETUP_RESPONSE; + + skb_put(skb, sizeof(tf->u.setup_resp)); + tf->u.setup_resp.status_code = cpu_to_le16(status_code); + tf->u.setup_resp.dialog_token = dialog_token; + tf->u.setup_resp.capability = + cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata, + status_code)); + break; + case WLAN_TDLS_SETUP_CONFIRM: + tf->category = WLAN_CATEGORY_TDLS; + tf->action_code = WLAN_TDLS_SETUP_CONFIRM; + + skb_put(skb, sizeof(tf->u.setup_cfm)); + tf->u.setup_cfm.status_code = cpu_to_le16(status_code); + tf->u.setup_cfm.dialog_token = dialog_token; + break; + case WLAN_TDLS_TEARDOWN: + tf->category = WLAN_CATEGORY_TDLS; + tf->action_code = WLAN_TDLS_TEARDOWN; + + skb_put(skb, sizeof(tf->u.teardown)); + tf->u.teardown.reason_code = cpu_to_le16(status_code); + break; + case WLAN_TDLS_DISCOVERY_REQUEST: + tf->category = WLAN_CATEGORY_TDLS; + tf->action_code = WLAN_TDLS_DISCOVERY_REQUEST; + + skb_put(skb, sizeof(tf->u.discover_req)); + tf->u.discover_req.dialog_token = dialog_token; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int +ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev, + const u8 *peer, u8 action_code, u8 dialog_token, + u16 status_code, struct sk_buff *skb) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_mgmt *mgmt; + + mgmt = (void *)skb_put(skb, 24); + memset(mgmt, 0, 24); + memcpy(mgmt->da, peer, ETH_ALEN); + memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); + memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN); + + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); + + switch (action_code) { + case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: + skb_put(skb, 1 + sizeof(mgmt->u.action.u.tdls_discover_resp)); + mgmt->u.action.category = WLAN_CATEGORY_PUBLIC; + mgmt->u.action.u.tdls_discover_resp.action_code = + WLAN_PUB_ACTION_TDLS_DISCOVER_RES; + mgmt->u.action.u.tdls_discover_resp.dialog_token = + dialog_token; + mgmt->u.action.u.tdls_discover_resp.capability = + cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata, + status_code)); + break; + default: + return -EINVAL; + } + + return 0; +} + +static int +ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev, + const u8 *peer, u8 action_code, + u8 dialog_token, u16 status_code, + u32 peer_capability, bool initiator, + const u8 *extra_ies, size_t extra_ies_len) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + struct sk_buff *skb = NULL; + bool send_direct; + struct sta_info *sta; + int ret; + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + + max(sizeof(struct ieee80211_mgmt), + sizeof(struct ieee80211_tdls_data)) + + 50 + /* supported rates */ + 7 + /* ext capab */ + 26 + /* max(WMM-info, WMM-param) */ + 2 + max(sizeof(struct ieee80211_ht_cap), + sizeof(struct ieee80211_ht_operation)) + + extra_ies_len + + sizeof(struct ieee80211_tdls_lnkie)); + if (!skb) + return -ENOMEM; + + skb_reserve(skb, local->hw.extra_tx_headroom); + + switch (action_code) { + case WLAN_TDLS_SETUP_REQUEST: + case WLAN_TDLS_SETUP_RESPONSE: + case WLAN_TDLS_SETUP_CONFIRM: + case WLAN_TDLS_TEARDOWN: + case WLAN_TDLS_DISCOVERY_REQUEST: + ret = ieee80211_prep_tdls_encap_data(wiphy, dev, peer, + action_code, dialog_token, + status_code, skb); + send_direct = false; + break; + case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: + ret = ieee80211_prep_tdls_direct(wiphy, dev, peer, action_code, + dialog_token, status_code, + skb); + send_direct = true; + break; + default: + ret = -ENOTSUPP; + break; + } + + if (ret < 0) + goto fail; + + rcu_read_lock(); + sta = sta_info_get(sdata, peer); + + /* infer the initiator if we can, to support old userspace */ + switch (action_code) { + case WLAN_TDLS_SETUP_REQUEST: + if (sta) + set_sta_flag(sta, WLAN_STA_TDLS_INITIATOR); + /* fall-through */ + case WLAN_TDLS_SETUP_CONFIRM: + case WLAN_TDLS_DISCOVERY_REQUEST: + initiator = true; + break; + case WLAN_TDLS_SETUP_RESPONSE: + /* + * In some testing scenarios, we send a request and response. + * Make the last packet sent take effect for the initiator + * value. + */ + if (sta) + clear_sta_flag(sta, WLAN_STA_TDLS_INITIATOR); + /* fall-through */ + case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: + initiator = false; + break; + case WLAN_TDLS_TEARDOWN: + /* any value is ok */ + break; + default: + ret = -ENOTSUPP; + break; + } + + if (sta && test_sta_flag(sta, WLAN_STA_TDLS_INITIATOR)) + initiator = true; + + rcu_read_unlock(); + if (ret < 0) + goto fail; + + ieee80211_tdls_add_ies(sdata, skb, peer, action_code, status_code, + initiator, extra_ies, extra_ies_len); + if (send_direct) { + ieee80211_tx_skb(sdata, skb); + return 0; + } + + /* + * According to 802.11z: Setup req/resp are sent in AC_BK, otherwise + * we should default to AC_VI. + */ + switch (action_code) { + case WLAN_TDLS_SETUP_REQUEST: + case WLAN_TDLS_SETUP_RESPONSE: + skb_set_queue_mapping(skb, IEEE80211_AC_BK); + skb->priority = 2; + break; + default: + skb_set_queue_mapping(skb, IEEE80211_AC_VI); + skb->priority = 5; + break; + } + + /* disable bottom halves when entering the Tx path */ + local_bh_disable(); + ret = ieee80211_subif_start_xmit(skb, dev); + local_bh_enable(); + + return ret; + +fail: + dev_kfree_skb(skb); + return ret; +} + +static int +ieee80211_tdls_mgmt_setup(struct wiphy *wiphy, struct net_device *dev, + const u8 *peer, u8 action_code, u8 dialog_token, + u16 status_code, u32 peer_capability, bool initiator, + const u8 *extra_ies, size_t extra_ies_len) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + int ret; + + mutex_lock(&local->mtx); + + /* we don't support concurrent TDLS peer setups */ + if (!is_zero_ether_addr(sdata->u.mgd.tdls_peer) && + !ether_addr_equal(sdata->u.mgd.tdls_peer, peer)) { + ret = -EBUSY; + goto exit; + } + + /* + * make sure we have a STA representing the peer so we drop or buffer + * non-TDLS-setup frames to the peer. We can't send other packets + * during setup through the AP path. + * Allow error packets to be sent - sometimes we don't even add a STA + * before failing the setup. + */ + if (status_code == 0) { + rcu_read_lock(); + if (!sta_info_get(sdata, peer)) { + rcu_read_unlock(); + ret = -ENOLINK; + goto exit; + } + rcu_read_unlock(); + } + + ieee80211_flush_queues(local, sdata); + + ret = ieee80211_tdls_prep_mgmt_packet(wiphy, dev, peer, action_code, + dialog_token, status_code, + peer_capability, initiator, + extra_ies, extra_ies_len); + if (ret < 0) + goto exit; + + memcpy(sdata->u.mgd.tdls_peer, peer, ETH_ALEN); + ieee80211_queue_delayed_work(&sdata->local->hw, + &sdata->u.mgd.tdls_peer_del_work, + TDLS_PEER_SETUP_TIMEOUT); + +exit: + mutex_unlock(&local->mtx); + return ret; +} + +static int +ieee80211_tdls_mgmt_teardown(struct wiphy *wiphy, struct net_device *dev, + const u8 *peer, u8 action_code, u8 dialog_token, + u16 status_code, u32 peer_capability, + bool initiator, const u8 *extra_ies, + size_t extra_ies_len) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; + int ret; + + /* + * No packets can be transmitted to the peer via the AP during setup - + * the STA is set as a TDLS peer, but is not authorized. + * During teardown, we prevent direct transmissions by stopping the + * queues and flushing all direct packets. + */ + ieee80211_stop_vif_queues(local, sdata, + IEEE80211_QUEUE_STOP_REASON_TDLS_TEARDOWN); + ieee80211_flush_queues(local, sdata); + + ret = ieee80211_tdls_prep_mgmt_packet(wiphy, dev, peer, action_code, + dialog_token, status_code, + peer_capability, initiator, + extra_ies, extra_ies_len); + if (ret < 0) + sdata_err(sdata, "Failed sending TDLS teardown packet %d\n", + ret); + + /* + * Remove the STA AUTH flag to force further traffic through the AP. If + * the STA was unreachable, it was already removed. + */ + rcu_read_lock(); + sta = sta_info_get(sdata, peer); + if (sta) + clear_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH); + rcu_read_unlock(); + + ieee80211_wake_vif_queues(local, sdata, + IEEE80211_QUEUE_STOP_REASON_TDLS_TEARDOWN); + + return 0; +} + +int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, + const u8 *peer, u8 action_code, u8 dialog_token, + u16 status_code, u32 peer_capability, + bool initiator, const u8 *extra_ies, + size_t extra_ies_len) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + int ret; + + if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS)) + return -ENOTSUPP; + + /* make sure we are in managed mode, and associated */ + if (sdata->vif.type != NL80211_IFTYPE_STATION || + !sdata->u.mgd.associated) + return -EINVAL; + + switch (action_code) { + case WLAN_TDLS_SETUP_REQUEST: + case WLAN_TDLS_SETUP_RESPONSE: + ret = ieee80211_tdls_mgmt_setup(wiphy, dev, peer, action_code, + dialog_token, status_code, + peer_capability, initiator, + extra_ies, extra_ies_len); + break; + case WLAN_TDLS_TEARDOWN: + ret = ieee80211_tdls_mgmt_teardown(wiphy, dev, peer, + action_code, dialog_token, + status_code, + peer_capability, initiator, + extra_ies, extra_ies_len); + break; + case WLAN_TDLS_DISCOVERY_REQUEST: + /* + * Protect the discovery so we can hear the TDLS discovery + * response frame. It is transmitted directly and not buffered + * by the AP. + */ + drv_mgd_protect_tdls_discover(sdata->local, sdata); + /* fall-through */ + case WLAN_TDLS_SETUP_CONFIRM: + case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: + /* no special handling */ + ret = ieee80211_tdls_prep_mgmt_packet(wiphy, dev, peer, + action_code, + dialog_token, + status_code, + peer_capability, + initiator, extra_ies, + extra_ies_len); + break; + default: + ret = -EOPNOTSUPP; + break; + } + + tdls_dbg(sdata, "TDLS mgmt action %d peer %pM status %d\n", + action_code, peer, ret); + return ret; +} + +int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, + const u8 *peer, enum nl80211_tdls_operation oper) +{ + struct sta_info *sta; + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + int ret; + + if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS)) + return -ENOTSUPP; + + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return -EINVAL; + + switch (oper) { + case NL80211_TDLS_ENABLE_LINK: + case NL80211_TDLS_DISABLE_LINK: + break; + case NL80211_TDLS_TEARDOWN: + case NL80211_TDLS_SETUP: + case NL80211_TDLS_DISCOVERY_REQ: + /* We don't support in-driver setup/teardown/discovery */ + return -ENOTSUPP; + } + + mutex_lock(&local->mtx); + tdls_dbg(sdata, "TDLS oper %d peer %pM\n", oper, peer); + + switch (oper) { + case NL80211_TDLS_ENABLE_LINK: + rcu_read_lock(); + sta = sta_info_get(sdata, peer); + if (!sta) { + rcu_read_unlock(); + ret = -ENOLINK; + break; + } + + set_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH); + rcu_read_unlock(); + + WARN_ON_ONCE(is_zero_ether_addr(sdata->u.mgd.tdls_peer) || + !ether_addr_equal(sdata->u.mgd.tdls_peer, peer)); + ret = 0; + break; + case NL80211_TDLS_DISABLE_LINK: + /* + * The teardown message in ieee80211_tdls_mgmt_teardown() was + * created while the queues were stopped, so it might still be + * pending. Before flushing the queues we need to be sure the + * message is handled by the tasklet handling pending messages, + * otherwise we might start destroying the station before + * sending the teardown packet. + * Note that this only forces the tasklet to flush pendings - + * not to stop the tasklet from rescheduling itself. + */ + tasklet_kill(&local->tx_pending_tasklet); + /* flush a potentially queued teardown packet */ + ieee80211_flush_queues(local, sdata); + + ret = sta_info_destroy_addr(sdata, peer); + break; + default: + ret = -ENOTSUPP; + break; + } + + if (ret == 0 && ether_addr_equal(sdata->u.mgd.tdls_peer, peer)) { + cancel_delayed_work(&sdata->u.mgd.tdls_peer_del_work); + eth_zero_addr(sdata->u.mgd.tdls_peer); + } + + mutex_unlock(&local->mtx); + return ret; +} + +void ieee80211_tdls_oper_request(struct ieee80211_vif *vif, const u8 *peer, + enum nl80211_tdls_operation oper, + u16 reason_code, gfp_t gfp) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + + if (vif->type != NL80211_IFTYPE_STATION || !vif->bss_conf.assoc) { + sdata_err(sdata, "Discarding TDLS oper %d - not STA or disconnected\n", + oper); + return; + } + + cfg80211_tdls_oper_request(sdata->dev, peer, oper, reason_code, gfp); +} +EXPORT_SYMBOL(ieee80211_tdls_oper_request); diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index a0b0aea76525..02ac535d1274 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -21,10 +21,10 @@ #define VIF_ENTRY __field(enum nl80211_iftype, vif_type) __field(void *, sdata) \ __field(bool, p2p) \ - __string(vif_name, sdata->dev ? sdata->dev->name : "<nodev>") + __string(vif_name, sdata->name) #define VIF_ASSIGN __entry->vif_type = sdata->vif.type; __entry->sdata = sdata; \ __entry->p2p = sdata->vif.p2p; \ - __assign_str(vif_name, sdata->dev ? sdata->dev->name : sdata->name) + __assign_str(vif_name, sdata->name) #define VIF_PR_FMT " vif:%s(%d%s)" #define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : "" @@ -184,6 +184,20 @@ TRACE_EVENT(drv_return_bool, "true" : "false") ); +TRACE_EVENT(drv_return_u32, + TP_PROTO(struct ieee80211_local *local, u32 ret), + TP_ARGS(local, ret), + TP_STRUCT__entry( + LOCAL_ENTRY + __field(u32, ret) + ), + TP_fast_assign( + LOCAL_ASSIGN; + __entry->ret = ret; + ), + TP_printk(LOCAL_PR_FMT " - %u", LOCAL_PR_ARG, __entry->ret) +); + TRACE_EVENT(drv_return_u64, TP_PROTO(struct ieee80211_local *local, u64 ret), TP_ARGS(local, ret), @@ -1316,6 +1330,13 @@ DEFINE_EVENT(local_sdata_evt, drv_mgd_prepare_tx, TP_ARGS(local, sdata) ); +DEFINE_EVENT(local_sdata_evt, drv_mgd_protect_tdls_discover, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata), + + TP_ARGS(local, sdata) +); + DECLARE_EVENT_CLASS(local_chanctx, TP_PROTO(struct ieee80211_local *local, struct ieee80211_chanctx *ctx), @@ -1375,6 +1396,91 @@ TRACE_EVENT(drv_change_chanctx, ) ); +#if !defined(__TRACE_VIF_ENTRY) +#define __TRACE_VIF_ENTRY +struct trace_vif_entry { + enum nl80211_iftype vif_type; + bool p2p; + char vif_name[IFNAMSIZ]; +} __packed; + +struct trace_chandef_entry { + u32 control_freq; + u32 chan_width; + u32 center_freq1; + u32 center_freq2; +} __packed; + +struct trace_switch_entry { + struct trace_vif_entry vif; + struct trace_chandef_entry old_chandef; + struct trace_chandef_entry new_chandef; +} __packed; + +#define SWITCH_ENTRY_ASSIGN(to, from) local_vifs[i].to = vifs[i].from +#endif + +TRACE_EVENT(drv_switch_vif_chanctx, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_vif_chanctx_switch *vifs, + int n_vifs, enum ieee80211_chanctx_switch_mode mode), + TP_ARGS(local, vifs, n_vifs, mode), + + TP_STRUCT__entry( + LOCAL_ENTRY + __field(int, n_vifs) + __field(u32, mode) + __dynamic_array(u8, vifs, + sizeof(struct trace_switch_entry) * n_vifs) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + __entry->n_vifs = n_vifs; + __entry->mode = mode; + { + struct trace_switch_entry *local_vifs = + __get_dynamic_array(vifs); + int i; + + for (i = 0; i < n_vifs; i++) { + struct ieee80211_sub_if_data *sdata; + + sdata = container_of(vifs[i].vif, + struct ieee80211_sub_if_data, + vif); + + SWITCH_ENTRY_ASSIGN(vif.vif_type, vif->type); + SWITCH_ENTRY_ASSIGN(vif.p2p, vif->p2p); + strncpy(local_vifs[i].vif.vif_name, + sdata->name, + sizeof(local_vifs[i].vif.vif_name)); + SWITCH_ENTRY_ASSIGN(old_chandef.control_freq, + old_ctx->def.chan->center_freq); + SWITCH_ENTRY_ASSIGN(old_chandef.chan_width, + old_ctx->def.width); + SWITCH_ENTRY_ASSIGN(old_chandef.center_freq1, + old_ctx->def.center_freq1); + SWITCH_ENTRY_ASSIGN(old_chandef.center_freq2, + old_ctx->def.center_freq2); + SWITCH_ENTRY_ASSIGN(new_chandef.control_freq, + new_ctx->def.chan->center_freq); + SWITCH_ENTRY_ASSIGN(new_chandef.chan_width, + new_ctx->def.width); + SWITCH_ENTRY_ASSIGN(new_chandef.center_freq1, + new_ctx->def.center_freq1); + SWITCH_ENTRY_ASSIGN(new_chandef.center_freq2, + new_ctx->def.center_freq2); + } + } + ), + + TP_printk( + LOCAL_PR_FMT " n_vifs:%d mode:%d", + LOCAL_PR_ARG, __entry->n_vifs, __entry->mode + ) +); + DECLARE_EVENT_CLASS(local_sdata_chanctx, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, @@ -1499,6 +1605,24 @@ DEFINE_EVENT(local_sdata_evt, drv_leave_ibss, TP_ARGS(local, sdata) ); +TRACE_EVENT(drv_get_expected_throughput, + TP_PROTO(struct ieee80211_sta *sta), + + TP_ARGS(sta), + + TP_STRUCT__entry( + STA_ENTRY + ), + + TP_fast_assign( + STA_ASSIGN; + ), + + TP_printk( + STA_PR_FMT, STA_PR_ARG + ) +); + /* * Tracing for API calls that drivers call. */ diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 19d36d4117e0..464106c023d8 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -250,7 +250,8 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx) if (local->hw.conf.flags & IEEE80211_CONF_PS) { ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, - IEEE80211_QUEUE_STOP_REASON_PS); + IEEE80211_QUEUE_STOP_REASON_PS, + false); ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED; ieee80211_queue_work(&local->hw, &local->dynamic_ps_disable_work); @@ -414,6 +415,9 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) if (ieee80211_has_order(hdr->frame_control)) return TX_CONTINUE; + if (ieee80211_is_probe_req(hdr->frame_control)) + return TX_CONTINUE; + if (tx->local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) info->hw_queue = tx->sdata->vif.cab_queue; @@ -463,16 +467,24 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) { struct sta_info *sta = tx->sta; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; struct ieee80211_local *local = tx->local; if (unlikely(!sta)) return TX_CONTINUE; if (unlikely((test_sta_flag(sta, WLAN_STA_PS_STA) || - test_sta_flag(sta, WLAN_STA_PS_DRIVER)) && + test_sta_flag(sta, WLAN_STA_PS_DRIVER) || + test_sta_flag(sta, WLAN_STA_PS_DELIVER)) && !(info->flags & IEEE80211_TX_CTL_NO_PS_BUFFER))) { int ac = skb_get_queue_mapping(tx->skb); + if (ieee80211_is_mgmt(hdr->frame_control) && + !ieee80211_is_bufferable_mmpdu(hdr->frame_control)) { + info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER; + return TX_CONTINUE; + } + ps_dbg(sta->sdata, "STA %pM aid %d: PS buffer for AC %d\n", sta->sta.addr, sta->sta.aid, ac); if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) @@ -486,7 +498,8 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) * ahead and Tx the packet. */ if (!test_sta_flag(sta, WLAN_STA_PS_STA) && - !test_sta_flag(sta, WLAN_STA_PS_DRIVER)) { + !test_sta_flag(sta, WLAN_STA_PS_DRIVER) && + !test_sta_flag(sta, WLAN_STA_PS_DELIVER)) { spin_unlock(&sta->ps_lock); return TX_CONTINUE; } @@ -531,19 +544,9 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) static ieee80211_tx_result debug_noinline ieee80211_tx_h_ps_buf(struct ieee80211_tx_data *tx) { - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; - if (unlikely(tx->flags & IEEE80211_TX_PS_BUFFERED)) return TX_CONTINUE; - if (ieee80211_is_mgmt(hdr->frame_control) && - !ieee80211_is_bufferable_mmpdu(hdr->frame_control)) { - if (tx->flags & IEEE80211_TX_UNICAST) - info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER; - return TX_CONTINUE; - } - if (tx->flags & IEEE80211_TX_UNICAST) return ieee80211_tx_h_unicast_ps_buf(tx); else @@ -1618,12 +1621,12 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_chanctx_conf *chanctx_conf; - struct ieee80211_channel *chan; struct ieee80211_radiotap_header *prthdr = (struct ieee80211_radiotap_header *)skb->data; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_hdr *hdr; struct ieee80211_sub_if_data *tmp_sdata, *sdata; + struct cfg80211_chan_def *chandef; u16 len_rthdr; int hdrlen; @@ -1721,9 +1724,9 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, } if (chanctx_conf) - chan = chanctx_conf->def.chan; + chandef = &chanctx_conf->def; else if (!local->use_chanctx) - chan = local->_oper_chandef.chan; + chandef = &local->_oper_chandef; else goto fail_rcu; @@ -1743,10 +1746,11 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, * radar detection by itself. We can do that later by adding a * monitor flag interfaces used for AP support. */ - if ((chan->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR))) + if (!cfg80211_reg_can_beacon(local->hw.wiphy, chandef, + sdata->vif.type)) goto fail_rcu; - ieee80211_xmit(sdata, skb, chan->band); + ieee80211_xmit(sdata, skb, chandef->chan->band); rcu_read_unlock(); return NETDEV_TX_OK; @@ -1767,15 +1771,12 @@ fail: static void ieee80211_tx_latency_start_msrmnt(struct ieee80211_local *local, struct sk_buff *skb) { - struct timespec skb_arv; struct ieee80211_tx_latency_bin_ranges *tx_latency; tx_latency = rcu_dereference(local->tx_latency); if (!tx_latency) return; - - ktime_get_ts(&skb_arv); - skb->tstamp = ktime_set(skb_arv.tv_sec, skb_arv.tv_nsec); + skb->tstamp = ktime_get(); } /** @@ -1810,7 +1811,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, int nh_pos, h_pos; struct sta_info *sta = NULL; bool wme_sta = false, authorized = false, tdls_auth = false; - bool tdls_direct = false; + bool tdls_peer = false, tdls_setup_frame = false; bool multicast; u32 info_flags = 0; u16 info_id = 0; @@ -1952,34 +1953,35 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, #endif case NL80211_IFTYPE_STATION: if (sdata->wdev.wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS) { - bool tdls_peer = false; - sta = sta_info_get(sdata, skb->data); if (sta) { authorized = test_sta_flag(sta, WLAN_STA_AUTHORIZED); wme_sta = test_sta_flag(sta, WLAN_STA_WME); tdls_peer = test_sta_flag(sta, - WLAN_STA_TDLS_PEER); + WLAN_STA_TDLS_PEER); tdls_auth = test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH); } - /* - * If the TDLS link is enabled, send everything - * directly. Otherwise, allow TDLS setup frames - * to be transmitted indirectly. - */ - tdls_direct = tdls_peer && (tdls_auth || - !(ethertype == ETH_P_TDLS && skb->len > 14 && - skb->data[14] == WLAN_TDLS_SNAP_RFTYPE)); + if (tdls_peer) + tdls_setup_frame = + ethertype == ETH_P_TDLS && + skb->len > 14 && + skb->data[14] == WLAN_TDLS_SNAP_RFTYPE; } - if (tdls_direct) { - /* link during setup - throw out frames to peer */ - if (!tdls_auth) - goto fail_rcu; + /* + * TDLS link during setup - throw out frames to peer. We allow + * TDLS-setup frames to unauthorized peers for the special case + * of a link teardown after a TDLS sta is removed due to being + * unreachable. + */ + if (tdls_peer && !tdls_auth && !tdls_setup_frame) + goto fail_rcu; + /* send direct packets to authorized TDLS peers */ + if (tdls_peer && tdls_auth) { /* DA SA BSSID */ memcpy(hdr.addr1, skb->data, ETH_ALEN); memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); @@ -2328,7 +2330,8 @@ void ieee80211_tx_pending(unsigned long data) /* functions for drivers to get certain frames */ static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, - struct ps_data *ps, struct sk_buff *skb) + struct ps_data *ps, struct sk_buff *skb, + bool is_template) { u8 *pos, *tim; int aid0 = 0; @@ -2341,11 +2344,12 @@ static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, * checking byte-for-byte */ have_bits = !bitmap_empty((unsigned long *)ps->tim, IEEE80211_MAX_AID+1); - - if (ps->dtim_count == 0) - ps->dtim_count = sdata->vif.bss_conf.dtim_period - 1; - else - ps->dtim_count--; + if (!is_template) { + if (ps->dtim_count == 0) + ps->dtim_count = sdata->vif.bss_conf.dtim_period - 1; + else + ps->dtim_count--; + } tim = pos = (u8 *) skb_put(skb, 6); *pos++ = WLAN_EID_TIM; @@ -2391,7 +2395,8 @@ static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, } static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, - struct ps_data *ps, struct sk_buff *skb) + struct ps_data *ps, struct sk_buff *skb, + bool is_template) { struct ieee80211_local *local = sdata->local; @@ -2403,24 +2408,24 @@ static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, * of the tim bitmap in mac80211 and the driver. */ if (local->tim_in_locked_section) { - __ieee80211_beacon_add_tim(sdata, ps, skb); + __ieee80211_beacon_add_tim(sdata, ps, skb, is_template); } else { spin_lock_bh(&local->tim_lock); - __ieee80211_beacon_add_tim(sdata, ps, skb); + __ieee80211_beacon_add_tim(sdata, ps, skb, is_template); spin_unlock_bh(&local->tim_lock); } return 0; } -static void ieee80211_update_csa(struct ieee80211_sub_if_data *sdata, - struct beacon_data *beacon) +static void ieee80211_set_csa(struct ieee80211_sub_if_data *sdata, + struct beacon_data *beacon) { struct probe_resp *resp; - int counter_offset_beacon = sdata->csa_counter_offset_beacon; - int counter_offset_presp = sdata->csa_counter_offset_presp; u8 *beacon_data; size_t beacon_data_len; + int i; + u8 count = beacon->csa_current_counter; switch (sdata->vif.type) { case NL80211_IFTYPE_AP: @@ -2438,32 +2443,56 @@ static void ieee80211_update_csa(struct ieee80211_sub_if_data *sdata, default: return; } - if (WARN_ON(counter_offset_beacon >= beacon_data_len)) - return; - - /* Warn if the driver did not check for/react to csa - * completeness. A beacon with CSA counter set to 0 should - * never occur, because a counter of 1 means switch just - * before the next beacon. - */ - if (WARN_ON(beacon_data[counter_offset_beacon] == 1)) - return; - beacon_data[counter_offset_beacon]--; - - if (sdata->vif.type == NL80211_IFTYPE_AP && counter_offset_presp) { - rcu_read_lock(); + rcu_read_lock(); + for (i = 0; i < IEEE80211_MAX_CSA_COUNTERS_NUM; ++i) { resp = rcu_dereference(sdata->u.ap.probe_resp); - /* if nl80211 accepted the offset, this should not happen. */ - if (WARN_ON(!resp)) { - rcu_read_unlock(); - return; + if (beacon->csa_counter_offsets[i]) { + if (WARN_ON_ONCE(beacon->csa_counter_offsets[i] >= + beacon_data_len)) { + rcu_read_unlock(); + return; + } + + beacon_data[beacon->csa_counter_offsets[i]] = count; } - resp->data[counter_offset_presp]--; - rcu_read_unlock(); + + if (sdata->vif.type == NL80211_IFTYPE_AP && resp) + resp->data[resp->csa_counter_offsets[i]] = count; } + rcu_read_unlock(); +} + +u8 ieee80211_csa_update_counter(struct ieee80211_vif *vif) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct beacon_data *beacon = NULL; + u8 count = 0; + + rcu_read_lock(); + + if (sdata->vif.type == NL80211_IFTYPE_AP) + beacon = rcu_dereference(sdata->u.ap.beacon); + else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) + beacon = rcu_dereference(sdata->u.ibss.presp); + else if (ieee80211_vif_is_mesh(&sdata->vif)) + beacon = rcu_dereference(sdata->u.mesh.beacon); + + if (!beacon) + goto unlock; + + beacon->csa_current_counter--; + + /* the counter should never reach 0 */ + WARN_ON_ONCE(!beacon->csa_current_counter); + count = beacon->csa_current_counter; + +unlock: + rcu_read_unlock(); + return count; } +EXPORT_SYMBOL(ieee80211_csa_update_counter); bool ieee80211_csa_is_complete(struct ieee80211_vif *vif) { @@ -2471,7 +2500,6 @@ bool ieee80211_csa_is_complete(struct ieee80211_vif *vif) struct beacon_data *beacon = NULL; u8 *beacon_data; size_t beacon_data_len; - int counter_beacon = sdata->csa_counter_offset_beacon; int ret = false; if (!ieee80211_sdata_running(sdata)) @@ -2509,10 +2537,13 @@ bool ieee80211_csa_is_complete(struct ieee80211_vif *vif) goto out; } - if (WARN_ON(counter_beacon > beacon_data_len)) + if (!beacon->csa_counter_offsets[0]) + goto out; + + if (WARN_ON_ONCE(beacon->csa_counter_offsets[0] > beacon_data_len)) goto out; - if (beacon_data[counter_beacon] == 1) + if (beacon_data[beacon->csa_counter_offsets[0]] == 1) ret = true; out: rcu_read_unlock(); @@ -2521,17 +2552,21 @@ bool ieee80211_csa_is_complete(struct ieee80211_vif *vif) } EXPORT_SYMBOL(ieee80211_csa_is_complete); -struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - u16 *tim_offset, u16 *tim_length) +static struct sk_buff * +__ieee80211_beacon_get(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_mutable_offsets *offs, + bool is_template) { struct ieee80211_local *local = hw_to_local(hw); + struct beacon_data *beacon = NULL; struct sk_buff *skb = NULL; struct ieee80211_tx_info *info; struct ieee80211_sub_if_data *sdata = NULL; enum ieee80211_band band; struct ieee80211_tx_rate_control txrc; struct ieee80211_chanctx_conf *chanctx_conf; + int csa_off_base = 0; rcu_read_lock(); @@ -2541,18 +2576,20 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, if (!ieee80211_sdata_running(sdata) || !chanctx_conf) goto out; - if (tim_offset) - *tim_offset = 0; - if (tim_length) - *tim_length = 0; + if (offs) + memset(offs, 0, sizeof(*offs)); if (sdata->vif.type == NL80211_IFTYPE_AP) { struct ieee80211_if_ap *ap = &sdata->u.ap; - struct beacon_data *beacon = rcu_dereference(ap->beacon); + beacon = rcu_dereference(ap->beacon); if (beacon) { - if (sdata->vif.csa_active) - ieee80211_update_csa(sdata, beacon); + if (beacon->csa_counter_offsets[0]) { + if (!is_template) + ieee80211_csa_update_counter(vif); + + ieee80211_set_csa(sdata, beacon); + } /* * headroom, head length, @@ -2569,12 +2606,16 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, memcpy(skb_put(skb, beacon->head_len), beacon->head, beacon->head_len); - ieee80211_beacon_add_tim(sdata, &ap->ps, skb); + ieee80211_beacon_add_tim(sdata, &ap->ps, skb, + is_template); - if (tim_offset) - *tim_offset = beacon->head_len; - if (tim_length) - *tim_length = skb->len - beacon->head_len; + if (offs) { + offs->tim_offset = beacon->head_len; + offs->tim_length = skb->len - beacon->head_len; + + /* for AP the csa offsets are from tail */ + csa_off_base = skb->len; + } if (beacon->tail) memcpy(skb_put(skb, beacon->tail_len), @@ -2584,55 +2625,89 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct ieee80211_hdr *hdr; - struct beacon_data *presp = rcu_dereference(ifibss->presp); - if (!presp) + beacon = rcu_dereference(ifibss->presp); + if (!beacon) goto out; - if (sdata->vif.csa_active) - ieee80211_update_csa(sdata, presp); + if (beacon->csa_counter_offsets[0]) { + if (!is_template) + ieee80211_csa_update_counter(vif); + ieee80211_set_csa(sdata, beacon); + } - skb = dev_alloc_skb(local->tx_headroom + presp->head_len + + skb = dev_alloc_skb(local->tx_headroom + beacon->head_len + local->hw.extra_beacon_tailroom); if (!skb) goto out; skb_reserve(skb, local->tx_headroom); - memcpy(skb_put(skb, presp->head_len), presp->head, - presp->head_len); + memcpy(skb_put(skb, beacon->head_len), beacon->head, + beacon->head_len); hdr = (struct ieee80211_hdr *) skb->data; hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_BEACON); } else if (ieee80211_vif_is_mesh(&sdata->vif)) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - struct beacon_data *bcn = rcu_dereference(ifmsh->beacon); - if (!bcn) + beacon = rcu_dereference(ifmsh->beacon); + if (!beacon) goto out; - if (sdata->vif.csa_active) - ieee80211_update_csa(sdata, bcn); + if (beacon->csa_counter_offsets[0]) { + if (!is_template) + /* TODO: For mesh csa_counter is in TU, so + * decrementing it by one isn't correct, but + * for now we leave it consistent with overall + * mac80211's behavior. + */ + ieee80211_csa_update_counter(vif); + + ieee80211_set_csa(sdata, beacon); + } if (ifmsh->sync_ops) - ifmsh->sync_ops->adjust_tbtt(sdata, bcn); + ifmsh->sync_ops->adjust_tbtt(sdata, beacon); skb = dev_alloc_skb(local->tx_headroom + - bcn->head_len + + beacon->head_len + 256 + /* TIM IE */ - bcn->tail_len + + beacon->tail_len + local->hw.extra_beacon_tailroom); if (!skb) goto out; skb_reserve(skb, local->tx_headroom); - memcpy(skb_put(skb, bcn->head_len), bcn->head, bcn->head_len); - ieee80211_beacon_add_tim(sdata, &ifmsh->ps, skb); - memcpy(skb_put(skb, bcn->tail_len), bcn->tail, bcn->tail_len); + memcpy(skb_put(skb, beacon->head_len), beacon->head, + beacon->head_len); + ieee80211_beacon_add_tim(sdata, &ifmsh->ps, skb, is_template); + + if (offs) { + offs->tim_offset = beacon->head_len; + offs->tim_length = skb->len - beacon->head_len; + } + + memcpy(skb_put(skb, beacon->tail_len), beacon->tail, + beacon->tail_len); } else { WARN_ON(1); goto out; } + /* CSA offsets */ + if (offs && beacon) { + int i; + + for (i = 0; i < IEEE80211_MAX_CSA_COUNTERS_NUM; i++) { + u16 csa_off = beacon->csa_counter_offsets[i]; + + if (!csa_off) + continue; + + offs->csa_counter_offs[i] = csa_off_base + csa_off; + } + } + band = chanctx_conf->def.chan->band; info = IEEE80211_SKB_CB(skb); @@ -2663,6 +2738,32 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, out: rcu_read_unlock(); return skb; + +} + +struct sk_buff * +ieee80211_beacon_get_template(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_mutable_offsets *offs) +{ + return __ieee80211_beacon_get(hw, vif, offs, true); +} +EXPORT_SYMBOL(ieee80211_beacon_get_template); + +struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + u16 *tim_offset, u16 *tim_length) +{ + struct ieee80211_mutable_offsets offs = {}; + struct sk_buff *bcn = __ieee80211_beacon_get(hw, vif, &offs, false); + + if (tim_offset) + *tim_offset = offs.tim_offset; + + if (tim_length) + *tim_length = offs.tim_length; + + return bcn; } EXPORT_SYMBOL(ieee80211_beacon_get_tim); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 275c94f995f7..725af7a468d2 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -317,7 +317,8 @@ void ieee80211_propagate_queue_wake(struct ieee80211_local *local, int queue) } static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue, - enum queue_stop_reason reason) + enum queue_stop_reason reason, + bool refcounted) { struct ieee80211_local *local = hw_to_local(hw); @@ -329,7 +330,13 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue, if (!test_bit(reason, &local->queue_stop_reasons[queue])) return; - __clear_bit(reason, &local->queue_stop_reasons[queue]); + if (!refcounted) + local->q_stop_reasons[queue][reason] = 0; + else + local->q_stop_reasons[queue][reason]--; + + if (local->q_stop_reasons[queue][reason] == 0) + __clear_bit(reason, &local->queue_stop_reasons[queue]); if (local->queue_stop_reasons[queue] != 0) /* someone still has this queue stopped */ @@ -344,25 +351,28 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue, } void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue, - enum queue_stop_reason reason) + enum queue_stop_reason reason, + bool refcounted) { struct ieee80211_local *local = hw_to_local(hw); unsigned long flags; spin_lock_irqsave(&local->queue_stop_reason_lock, flags); - __ieee80211_wake_queue(hw, queue, reason); + __ieee80211_wake_queue(hw, queue, reason, refcounted); spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); } void ieee80211_wake_queue(struct ieee80211_hw *hw, int queue) { ieee80211_wake_queue_by_reason(hw, queue, - IEEE80211_QUEUE_STOP_REASON_DRIVER); + IEEE80211_QUEUE_STOP_REASON_DRIVER, + false); } EXPORT_SYMBOL(ieee80211_wake_queue); static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue, - enum queue_stop_reason reason) + enum queue_stop_reason reason, + bool refcounted) { struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_sub_if_data *sdata; @@ -373,10 +383,13 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue, if (WARN_ON(queue >= hw->queues)) return; - if (test_bit(reason, &local->queue_stop_reasons[queue])) - return; + if (!refcounted) + local->q_stop_reasons[queue][reason] = 1; + else + local->q_stop_reasons[queue][reason]++; - __set_bit(reason, &local->queue_stop_reasons[queue]); + if (__test_and_set_bit(reason, &local->queue_stop_reasons[queue])) + return; if (local->hw.queues < IEEE80211_NUM_ACS) n_acs = 1; @@ -398,20 +411,22 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue, } void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue, - enum queue_stop_reason reason) + enum queue_stop_reason reason, + bool refcounted) { struct ieee80211_local *local = hw_to_local(hw); unsigned long flags; spin_lock_irqsave(&local->queue_stop_reason_lock, flags); - __ieee80211_stop_queue(hw, queue, reason); + __ieee80211_stop_queue(hw, queue, reason, refcounted); spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); } void ieee80211_stop_queue(struct ieee80211_hw *hw, int queue) { ieee80211_stop_queue_by_reason(hw, queue, - IEEE80211_QUEUE_STOP_REASON_DRIVER); + IEEE80211_QUEUE_STOP_REASON_DRIVER, + false); } EXPORT_SYMBOL(ieee80211_stop_queue); @@ -429,9 +444,11 @@ void ieee80211_add_pending_skb(struct ieee80211_local *local, } spin_lock_irqsave(&local->queue_stop_reason_lock, flags); - __ieee80211_stop_queue(hw, queue, IEEE80211_QUEUE_STOP_REASON_SKB_ADD); + __ieee80211_stop_queue(hw, queue, IEEE80211_QUEUE_STOP_REASON_SKB_ADD, + false); __skb_queue_tail(&local->pending[queue], skb); - __ieee80211_wake_queue(hw, queue, IEEE80211_QUEUE_STOP_REASON_SKB_ADD); + __ieee80211_wake_queue(hw, queue, IEEE80211_QUEUE_STOP_REASON_SKB_ADD, + false); spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); } @@ -455,20 +472,23 @@ void ieee80211_add_pending_skbs(struct ieee80211_local *local, queue = info->hw_queue; __ieee80211_stop_queue(hw, queue, - IEEE80211_QUEUE_STOP_REASON_SKB_ADD); + IEEE80211_QUEUE_STOP_REASON_SKB_ADD, + false); __skb_queue_tail(&local->pending[queue], skb); } for (i = 0; i < hw->queues; i++) __ieee80211_wake_queue(hw, i, - IEEE80211_QUEUE_STOP_REASON_SKB_ADD); + IEEE80211_QUEUE_STOP_REASON_SKB_ADD, + false); spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); } void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, unsigned long queues, - enum queue_stop_reason reason) + enum queue_stop_reason reason, + bool refcounted) { struct ieee80211_local *local = hw_to_local(hw); unsigned long flags; @@ -477,7 +497,7 @@ void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, spin_lock_irqsave(&local->queue_stop_reason_lock, flags); for_each_set_bit(i, &queues, hw->queues) - __ieee80211_stop_queue(hw, i, reason); + __ieee80211_stop_queue(hw, i, reason, refcounted); spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); } @@ -485,7 +505,8 @@ void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, void ieee80211_stop_queues(struct ieee80211_hw *hw) { ieee80211_stop_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, - IEEE80211_QUEUE_STOP_REASON_DRIVER); + IEEE80211_QUEUE_STOP_REASON_DRIVER, + false); } EXPORT_SYMBOL(ieee80211_stop_queues); @@ -508,7 +529,8 @@ EXPORT_SYMBOL(ieee80211_queue_stopped); void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, unsigned long queues, - enum queue_stop_reason reason) + enum queue_stop_reason reason, + bool refcounted) { struct ieee80211_local *local = hw_to_local(hw); unsigned long flags; @@ -517,7 +539,7 @@ void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, spin_lock_irqsave(&local->queue_stop_reason_lock, flags); for_each_set_bit(i, &queues, hw->queues) - __ieee80211_wake_queue(hw, i, reason); + __ieee80211_wake_queue(hw, i, reason, refcounted); spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); } @@ -525,17 +547,16 @@ void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, void ieee80211_wake_queues(struct ieee80211_hw *hw) { ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, - IEEE80211_QUEUE_STOP_REASON_DRIVER); + IEEE80211_QUEUE_STOP_REASON_DRIVER, + false); } EXPORT_SYMBOL(ieee80211_wake_queues); -void ieee80211_flush_queues(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata) +static unsigned int +ieee80211_get_vif_queues(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) { - u32 queues; - - if (!local->ops->flush) - return; + unsigned int queues; if (sdata && local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) { int ac; @@ -551,13 +572,46 @@ void ieee80211_flush_queues(struct ieee80211_local *local, queues = BIT(local->hw.queues) - 1; } - ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, - IEEE80211_QUEUE_STOP_REASON_FLUSH); + return queues; +} + +void ieee80211_flush_queues(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) +{ + unsigned int queues; + + if (!local->ops->flush) + return; + + queues = ieee80211_get_vif_queues(local, sdata); - drv_flush(local, queues, false); + ieee80211_stop_queues_by_reason(&local->hw, queues, + IEEE80211_QUEUE_STOP_REASON_FLUSH, + false); - ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, - IEEE80211_QUEUE_STOP_REASON_FLUSH); + drv_flush(local, sdata, queues, false); + + ieee80211_wake_queues_by_reason(&local->hw, queues, + IEEE80211_QUEUE_STOP_REASON_FLUSH, + false); +} + +void ieee80211_stop_vif_queues(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + enum queue_stop_reason reason) +{ + ieee80211_stop_queues_by_reason(&local->hw, + ieee80211_get_vif_queues(local, sdata), + reason, true); +} + +void ieee80211_wake_vif_queues(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + enum queue_stop_reason reason) +{ + ieee80211_wake_queues_by_reason(&local->hw, + ieee80211_get_vif_queues(local, sdata), + reason, true); } static void __iterate_active_interfaces(struct ieee80211_local *local, @@ -1096,11 +1150,12 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, int err; /* 24 + 6 = header + auth_algo + auth_transaction + status_code */ - skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24 + 6 + extra_len); + skb = dev_alloc_skb(local->hw.extra_tx_headroom + IEEE80211_WEP_IV_LEN + + 24 + 6 + extra_len + IEEE80211_WEP_ICV_LEN); if (!skb) return; - skb_reserve(skb, local->hw.extra_tx_headroom); + skb_reserve(skb, local->hw.extra_tx_headroom + IEEE80211_WEP_IV_LEN); mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24 + 6); memset(mgmt, 0, 24 + 6); @@ -1165,14 +1220,17 @@ void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, } } -int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, - size_t buffer_len, const u8 *ie, size_t ie_len, - enum ieee80211_band band, u32 rate_mask, - struct cfg80211_chan_def *chandef) +static int ieee80211_build_preq_ies_band(struct ieee80211_local *local, + u8 *buffer, size_t buffer_len, + const u8 *ie, size_t ie_len, + enum ieee80211_band band, + u32 rate_mask, + struct cfg80211_chan_def *chandef, + size_t *offset) { struct ieee80211_supported_band *sband; u8 *pos = buffer, *end = buffer + buffer_len; - size_t offset = 0, noffset; + size_t noffset; int supp_rates_len, i; u8 rates[32]; int num_rates; @@ -1180,6 +1238,8 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, int shift; u32 rate_flags; + *offset = 0; + sband = local->hw.wiphy->bands[band]; if (WARN_ON_ONCE(!sband)) return 0; @@ -1218,12 +1278,12 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, noffset = ieee80211_ie_split(ie, ie_len, before_extrates, ARRAY_SIZE(before_extrates), - offset); - if (end - pos < noffset - offset) + *offset); + if (end - pos < noffset - *offset) goto out_err; - memcpy(pos, ie + offset, noffset - offset); - pos += noffset - offset; - offset = noffset; + memcpy(pos, ie + *offset, noffset - *offset); + pos += noffset - *offset; + *offset = noffset; } ext_rates_len = num_rates - supp_rates_len; @@ -1257,12 +1317,12 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, }; noffset = ieee80211_ie_split(ie, ie_len, before_ht, ARRAY_SIZE(before_ht), - offset); - if (end - pos < noffset - offset) + *offset); + if (end - pos < noffset - *offset) goto out_err; - memcpy(pos, ie + offset, noffset - offset); - pos += noffset - offset; - offset = noffset; + memcpy(pos, ie + *offset, noffset - *offset); + pos += noffset - *offset; + *offset = noffset; } if (sband->ht_cap.ht_supported) { @@ -1297,12 +1357,12 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, }; noffset = ieee80211_ie_split(ie, ie_len, before_vht, ARRAY_SIZE(before_vht), - offset); - if (end - pos < noffset - offset) + *offset); + if (end - pos < noffset - *offset) goto out_err; - memcpy(pos, ie + offset, noffset - offset); - pos += noffset - offset; - offset = noffset; + memcpy(pos, ie + *offset, noffset - *offset); + pos += noffset - *offset; + *offset = noffset; } if (sband->vht_cap.vht_supported) { @@ -1312,21 +1372,54 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, sband->vht_cap.cap); } - /* add any remaining custom IEs */ - if (ie && ie_len) { - noffset = ie_len; - if (end - pos < noffset - offset) - goto out_err; - memcpy(pos, ie + offset, noffset - offset); - pos += noffset - offset; - } - return pos - buffer; out_err: WARN_ONCE(1, "not enough space for preq IEs\n"); return pos - buffer; } +int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, + size_t buffer_len, + struct ieee80211_scan_ies *ie_desc, + const u8 *ie, size_t ie_len, + u8 bands_used, u32 *rate_masks, + struct cfg80211_chan_def *chandef) +{ + size_t pos = 0, old_pos = 0, custom_ie_offset = 0; + int i; + + memset(ie_desc, 0, sizeof(*ie_desc)); + + for (i = 0; i < IEEE80211_NUM_BANDS; i++) { + if (bands_used & BIT(i)) { + pos += ieee80211_build_preq_ies_band(local, + buffer + pos, + buffer_len - pos, + ie, ie_len, i, + rate_masks[i], + chandef, + &custom_ie_offset); + ie_desc->ies[i] = buffer + old_pos; + ie_desc->len[i] = pos - old_pos; + old_pos = pos; + } + } + + /* add any remaining custom IEs */ + if (ie && ie_len) { + if (WARN_ONCE(buffer_len - pos < ie_len - custom_ie_offset, + "not enough space for preq custom IEs\n")) + return pos; + memcpy(buffer + pos, ie + custom_ie_offset, + ie_len - custom_ie_offset); + ie_desc->common_ies = buffer + pos; + ie_desc->common_ie_len = ie_len - custom_ie_offset; + pos += ie_len - custom_ie_offset; + } + + return pos; +}; + struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, u32 ratemask, struct ieee80211_channel *chan, @@ -1339,6 +1432,8 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb; struct ieee80211_mgmt *mgmt; int ies_len; + u32 rate_masks[IEEE80211_NUM_BANDS] = {}; + struct ieee80211_scan_ies dummy_ie_desc; /* * Do not send DS Channel parameter for directed probe requests @@ -1356,10 +1451,11 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, if (!skb) return NULL; + rate_masks[chan->band] = ratemask; ies_len = ieee80211_build_preq_ies(local, skb_tail_pointer(skb), - skb_tailroom(skb), - ie, ie_len, chan->band, - ratemask, &chandef); + skb_tailroom(skb), &dummy_ie_desc, + ie, ie_len, BIT(chan->band), + rate_masks, &chandef); skb_put(skb, ies_len); if (dst) { @@ -1457,6 +1553,44 @@ void ieee80211_stop_device(struct ieee80211_local *local) drv_stop(local); } +static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local) +{ + struct ieee80211_sub_if_data *sdata; + struct ieee80211_chanctx *ctx; + + /* + * We get here if during resume the device can't be restarted properly. + * We might also get here if this happens during HW reset, which is a + * slightly different situation and we need to drop all connections in + * the latter case. + * + * Ask cfg80211 to turn off all interfaces, this will result in more + * warnings but at least we'll then get into a clean stopped state. + */ + + local->resuming = false; + local->suspended = false; + local->started = false; + + /* scheduled scan clearly can't be running any more, but tell + * cfg80211 and clear local state + */ + ieee80211_sched_scan_end(local); + + list_for_each_entry(sdata, &local->interfaces, list) + sdata->flags &= ~IEEE80211_SDATA_IN_DRIVER; + + /* Mark channel contexts as not being in the driver any more to avoid + * removing them from the driver during the shutdown process... + */ + mutex_lock(&local->chanctx_mtx); + list_for_each_entry(ctx, &local->chanctx_list, list) + ctx->driver_present = false; + mutex_unlock(&local->chanctx_mtx); + + cfg80211_shutdown_all_interfaces(local->hw.wiphy); +} + static void ieee80211_assign_chanctx(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { @@ -1520,9 +1654,11 @@ int ieee80211_reconfig(struct ieee80211_local *local) */ res = drv_start(local); if (res) { - WARN(local->suspended, "Hardware became unavailable " - "upon resume. This could be a software issue " - "prior to suspend or a hardware issue.\n"); + if (local->suspended) + WARN(1, "Hardware became unavailable upon resume. This could be a software issue prior to suspend or a hardware issue.\n"); + else + WARN(1, "Hardware became unavailable during restart.\n"); + ieee80211_handle_reconfig_failure(local); return res; } @@ -1546,7 +1682,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) WARN_ON(local->resuming); res = drv_add_interface(local, sdata); if (WARN_ON(res)) { - rcu_assign_pointer(local->monitor_sdata, NULL); + RCU_INIT_POINTER(local->monitor_sdata, NULL); synchronize_net(); kfree(sdata); } @@ -1563,19 +1699,21 @@ int ieee80211_reconfig(struct ieee80211_local *local) if (local->use_chanctx) { mutex_lock(&local->chanctx_mtx); list_for_each_entry(ctx, &local->chanctx_list, list) - WARN_ON(drv_add_chanctx(local, ctx)); + if (ctx->replace_state != + IEEE80211_CHANCTX_REPLACES_OTHER) + WARN_ON(drv_add_chanctx(local, ctx)); mutex_unlock(&local->chanctx_mtx); - } - list_for_each_entry(sdata, &local->interfaces, list) { - if (!ieee80211_sdata_running(sdata)) - continue; - ieee80211_assign_chanctx(local, sdata); - } + list_for_each_entry(sdata, &local->interfaces, list) { + if (!ieee80211_sdata_running(sdata)) + continue; + ieee80211_assign_chanctx(local, sdata); + } - sdata = rtnl_dereference(local->monitor_sdata); - if (sdata && ieee80211_sdata_running(sdata)) - ieee80211_assign_chanctx(local, sdata); + sdata = rtnl_dereference(local->monitor_sdata); + if (sdata && ieee80211_sdata_running(sdata)) + ieee80211_assign_chanctx(local, sdata); + } /* add STAs back */ mutex_lock(&local->sta_mtx); @@ -1671,13 +1809,10 @@ int ieee80211_reconfig(struct ieee80211_local *local) } break; case NL80211_IFTYPE_WDS: - break; case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MONITOR: - /* ignore virtual */ - break; case NL80211_IFTYPE_P2P_DEVICE: - changed = BSS_CHANGED_IDLE; + /* nothing to do */ break; case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: @@ -1760,7 +1895,8 @@ int ieee80211_reconfig(struct ieee80211_local *local) } ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, - IEEE80211_QUEUE_STOP_REASON_SUSPEND); + IEEE80211_QUEUE_STOP_REASON_SUSPEND, + false); /* * Reconfigure sched scan if it was interrupted by FW restart or @@ -1780,7 +1916,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) mutex_unlock(&local->mtx); if (sched_scan_stopped) - cfg80211_sched_scan_stopped(local->hw.wiphy); + cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy); /* * If this is for hw restart things are still running. @@ -2797,3 +2933,168 @@ void ieee80211_recalc_dtim(struct ieee80211_local *local, ps->dtim_count = dtim_count; } + +static u8 ieee80211_chanctx_radar_detect(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx) +{ + struct ieee80211_sub_if_data *sdata; + u8 radar_detect = 0; + + lockdep_assert_held(&local->chanctx_mtx); + + if (WARN_ON(ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED)) + return 0; + + list_for_each_entry(sdata, &ctx->reserved_vifs, reserved_chanctx_list) + if (sdata->reserved_radar_required) + radar_detect |= BIT(sdata->reserved_chandef.width); + + /* + * An in-place reservation context should not have any assigned vifs + * until it replaces the other context. + */ + WARN_ON(ctx->replace_state == IEEE80211_CHANCTX_REPLACES_OTHER && + !list_empty(&ctx->assigned_vifs)); + + list_for_each_entry(sdata, &ctx->assigned_vifs, assigned_chanctx_list) + if (sdata->radar_required) + radar_detect |= BIT(sdata->vif.bss_conf.chandef.width); + + return radar_detect; +} + +int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, + const struct cfg80211_chan_def *chandef, + enum ieee80211_chanctx_mode chanmode, + u8 radar_detect) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_sub_if_data *sdata_iter; + enum nl80211_iftype iftype = sdata->wdev.iftype; + int num[NUM_NL80211_IFTYPES]; + struct ieee80211_chanctx *ctx; + int num_different_channels = 0; + int total = 1; + + lockdep_assert_held(&local->chanctx_mtx); + + if (WARN_ON(hweight32(radar_detect) > 1)) + return -EINVAL; + + if (WARN_ON(chandef && chanmode == IEEE80211_CHANCTX_SHARED && + !chandef->chan)) + return -EINVAL; + + if (chandef) + num_different_channels = 1; + + if (WARN_ON(iftype >= NUM_NL80211_IFTYPES)) + return -EINVAL; + + /* Always allow software iftypes */ + if (local->hw.wiphy->software_iftypes & BIT(iftype)) { + if (radar_detect) + return -EINVAL; + return 0; + } + + memset(num, 0, sizeof(num)); + + if (iftype != NL80211_IFTYPE_UNSPECIFIED) + num[iftype] = 1; + + list_for_each_entry(ctx, &local->chanctx_list, list) { + if (ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED) + continue; + radar_detect |= ieee80211_chanctx_radar_detect(local, ctx); + if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE) { + num_different_channels++; + continue; + } + if (chandef && chanmode == IEEE80211_CHANCTX_SHARED && + cfg80211_chandef_compatible(chandef, + &ctx->conf.def)) + continue; + num_different_channels++; + } + + list_for_each_entry_rcu(sdata_iter, &local->interfaces, list) { + struct wireless_dev *wdev_iter; + + wdev_iter = &sdata_iter->wdev; + + if (sdata_iter == sdata || + rcu_access_pointer(sdata_iter->vif.chanctx_conf) == NULL || + local->hw.wiphy->software_iftypes & BIT(wdev_iter->iftype)) + continue; + + num[wdev_iter->iftype]++; + total++; + } + + if (total == 1 && !radar_detect) + return 0; + + return cfg80211_check_combinations(local->hw.wiphy, + num_different_channels, + radar_detect, num); +} + +static void +ieee80211_iter_max_chans(const struct ieee80211_iface_combination *c, + void *data) +{ + u32 *max_num_different_channels = data; + + *max_num_different_channels = max(*max_num_different_channels, + c->num_different_channels); +} + +int ieee80211_max_num_channels(struct ieee80211_local *local) +{ + struct ieee80211_sub_if_data *sdata; + int num[NUM_NL80211_IFTYPES] = {}; + struct ieee80211_chanctx *ctx; + int num_different_channels = 0; + u8 radar_detect = 0; + u32 max_num_different_channels = 1; + int err; + + lockdep_assert_held(&local->chanctx_mtx); + + list_for_each_entry(ctx, &local->chanctx_list, list) { + if (ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED) + continue; + + num_different_channels++; + + radar_detect |= ieee80211_chanctx_radar_detect(local, ctx); + } + + list_for_each_entry_rcu(sdata, &local->interfaces, list) + num[sdata->wdev.iftype]++; + + err = cfg80211_iter_combinations(local->hw.wiphy, + num_different_channels, radar_detect, + num, ieee80211_iter_max_chans, + &max_num_different_channels); + if (err < 0) + return err; + + return max_num_different_channels; +} + +u8 *ieee80211_add_wmm_info_ie(u8 *buf, u8 qosinfo) +{ + *buf++ = WLAN_EID_VENDOR_SPECIFIC; + *buf++ = 7; /* len */ + *buf++ = 0x00; /* Microsoft OUI 00:50:F2 */ + *buf++ = 0x50; + *buf++ = 0xf2; + *buf++ = 2; /* WME */ + *buf++ = 0; /* WME info */ + *buf++ = 1; /* WME ver */ + *buf++ = qosinfo; /* U-APSD no in use */ + + return buf; +} diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index e9e36a256165..671ce0d27a80 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -129,10 +129,17 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, if (!vht_cap_ie || !sband->vht_cap.vht_supported) return; - /* A VHT STA must support 40 MHz */ - if (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40)) + /* don't support VHT for TDLS peers for now */ + if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) return; + /* + * A VHT STA must support 40 MHz, but if we verify that here + * then we break a few things - some APs (e.g. Netgear R6300v2 + * and others based on the BCM4360 chipset) will unset this + * capability bit when operating in 20 MHz. + */ + vht_cap->vht_supported = true; own_cap = sband->vht_cap; diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index 6ee2b5863572..9181fb6d6437 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -271,22 +271,6 @@ static int ieee80211_wep_decrypt(struct ieee80211_local *local, return ret; } - -static bool ieee80211_wep_is_weak_iv(struct sk_buff *skb, - struct ieee80211_key *key) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - unsigned int hdrlen; - u8 *ivpos; - u32 iv; - - hdrlen = ieee80211_hdrlen(hdr->frame_control); - ivpos = skb->data + hdrlen; - iv = (ivpos[0] << 16) | (ivpos[1] << 8) | ivpos[2]; - - return ieee80211_wep_weak_iv(iv, key->conf.keylen); -} - ieee80211_rx_result ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx) { @@ -301,16 +285,12 @@ ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx) if (!(status->flag & RX_FLAG_DECRYPTED)) { if (skb_linearize(rx->skb)) return RX_DROP_UNUSABLE; - if (rx->sta && ieee80211_wep_is_weak_iv(rx->skb, rx->key)) - rx->sta->wep_weak_iv_count++; if (ieee80211_wep_decrypt(rx->local, rx->skb, rx->key)) return RX_DROP_UNUSABLE; } else if (!(status->flag & RX_FLAG_IV_STRIPPED)) { if (!pskb_may_pull(rx->skb, ieee80211_hdrlen(fc) + IEEE80211_WEP_IV_LEN)) return RX_DROP_UNUSABLE; - if (rx->sta && ieee80211_wep_is_weak_iv(rx->skb, rx->key)) - rx->sta->wep_weak_iv_count++; ieee80211_wep_remove_iv(rx->local, rx->skb, rx->key); /* remove ICV */ if (pskb_trim(rx->skb, rx->skb->len - IEEE80211_WEP_ICV_LEN)) diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index b8600e3c29c8..f7d4ca4c46e0 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -406,7 +406,10 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) if (info->control.hw_key && !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_GENERATE_IV) && - !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)) { + !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) && + !((info->control.hw_key->flags & + IEEE80211_KEY_FLAG_GENERATE_IV_MGMT) && + ieee80211_is_mgmt(hdr->frame_control))) { /* * hwaccel has no need for preallocated room for CCMP * header or MIC fields @@ -808,7 +811,7 @@ ieee80211_crypto_hw_encrypt(struct ieee80211_tx_data *tx) ieee80211_rx_result ieee80211_crypto_hw_decrypt(struct ieee80211_rx_data *rx) { - if (rx->sta->cipher_scheme) + if (rx->sta && rx->sta->cipher_scheme) return ieee80211_crypto_cs_decrypt(rx); return RX_DROP_UNUSABLE; diff --git a/net/mac802154/Kconfig b/net/mac802154/Kconfig index b33dd76d4307..1818a99b3081 100644 --- a/net/mac802154/Kconfig +++ b/net/mac802154/Kconfig @@ -2,6 +2,10 @@ config MAC802154 tristate "Generic IEEE 802.15.4 Soft Networking Stack (mac802154)" depends on IEEE802154 select CRC_CCITT + select CRYPTO_AUTHENC + select CRYPTO_CCM + select CRYPTO_CTR + select CRYPTO_AES ---help--- This option enables the hardware independent IEEE 802.15.4 networking stack for SoftMAC devices (the ones implementing diff --git a/net/mac802154/Makefile b/net/mac802154/Makefile index 15d62df52182..9723d6f3f3e5 100644 --- a/net/mac802154/Makefile +++ b/net/mac802154/Makefile @@ -1,4 +1,5 @@ obj-$(CONFIG_MAC802154) += mac802154.o -mac802154-objs := ieee802154_dev.o rx.o tx.o mac_cmd.o mib.o monitor.o wpan.o +mac802154-objs := ieee802154_dev.o rx.o tx.o mac_cmd.o mib.o \ + monitor.o wpan.o llsec.o ccflags-y += -D__CHECK_ENDIAN__ diff --git a/net/mac802154/ieee802154_dev.c b/net/mac802154/ieee802154_dev.c index 2cf66d885e68..b36b2b996578 100644 --- a/net/mac802154/ieee802154_dev.c +++ b/net/mac802154/ieee802154_dev.c @@ -143,6 +143,7 @@ static void mac802154_del_iface(struct wpan_phy *phy, struct net_device *dev) { struct mac802154_sub_if_data *sdata; + ASSERT_RTNL(); sdata = netdev_priv(dev); @@ -166,11 +167,13 @@ mac802154_add_iface(struct wpan_phy *phy, const char *name, int type) switch (type) { case IEEE802154_DEV_MONITOR: dev = alloc_netdev(sizeof(struct mac802154_sub_if_data), - name, mac802154_monitor_setup); + name, NET_NAME_UNKNOWN, + mac802154_monitor_setup); break; case IEEE802154_DEV_WPAN: dev = alloc_netdev(sizeof(struct mac802154_sub_if_data), - name, mac802154_wpan_setup); + name, NET_NAME_UNKNOWN, + mac802154_wpan_setup); break; default: dev = NULL; @@ -276,7 +279,8 @@ ieee802154_alloc_device(size_t priv_data_len, struct ieee802154_ops *ops) } priv = wpan_phy_priv(phy); - priv->hw.phy = priv->phy = phy; + priv->phy = phy; + priv->hw.phy = priv->phy; priv->hw.priv = (char *)priv + ALIGN(sizeof(*priv), NETDEV_ALIGN); priv->ops = ops; @@ -302,29 +306,61 @@ EXPORT_SYMBOL(ieee802154_free_device); int ieee802154_register_device(struct ieee802154_dev *dev) { struct mac802154_priv *priv = mac802154_to_priv(dev); - int rc = -ENOMEM; + int rc = -ENOSYS; + + if (dev->flags & IEEE802154_HW_TXPOWER) { + if (!priv->ops->set_txpower) + goto out; + + priv->phy->set_txpower = mac802154_set_txpower; + } + + if (dev->flags & IEEE802154_HW_LBT) { + if (!priv->ops->set_lbt) + goto out; + + priv->phy->set_lbt = mac802154_set_lbt; + } + + if (dev->flags & IEEE802154_HW_CCA_MODE) { + if (!priv->ops->set_cca_mode) + goto out; + + priv->phy->set_cca_mode = mac802154_set_cca_mode; + } + + if (dev->flags & IEEE802154_HW_CCA_ED_LEVEL) { + if (!priv->ops->set_cca_ed_level) + goto out; + + priv->phy->set_cca_ed_level = mac802154_set_cca_ed_level; + } + + if (dev->flags & IEEE802154_HW_CSMA_PARAMS) { + if (!priv->ops->set_csma_params) + goto out; + + priv->phy->set_csma_params = mac802154_set_csma_params; + } + + if (dev->flags & IEEE802154_HW_FRAME_RETRIES) { + if (!priv->ops->set_frame_retries) + goto out; + + priv->phy->set_frame_retries = mac802154_set_frame_retries; + } priv->dev_workqueue = create_singlethread_workqueue(wpan_phy_name(priv->phy)); - if (!priv->dev_workqueue) + if (!priv->dev_workqueue) { + rc = -ENOMEM; goto out; + } wpan_phy_set_dev(priv->phy, priv->hw.parent); priv->phy->add_iface = mac802154_add_iface; priv->phy->del_iface = mac802154_del_iface; - if (priv->ops->set_txpower) - priv->phy->set_txpower = mac802154_set_txpower; - if (priv->ops->set_lbt) - priv->phy->set_lbt = mac802154_set_lbt; - if (priv->ops->set_cca_mode) - priv->phy->set_cca_mode = mac802154_set_cca_mode; - if (priv->ops->set_cca_ed_level) - priv->phy->set_cca_ed_level = mac802154_set_cca_ed_level; - if (priv->ops->set_csma_params) - priv->phy->set_csma_params = mac802154_set_csma_params; - if (priv->ops->set_frame_retries) - priv->phy->set_frame_retries = mac802154_set_frame_retries; rc = wpan_phy_register(priv->phy); if (rc < 0) diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c new file mode 100644 index 000000000000..457058142098 --- /dev/null +++ b/net/mac802154/llsec.c @@ -0,0 +1,1071 @@ +/* + * Copyright (C) 2014 Fraunhofer ITWM + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Written by: + * Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de> + */ + +#include <linux/err.h> +#include <linux/bug.h> +#include <linux/completion.h> +#include <net/ieee802154.h> +#include <crypto/algapi.h> + +#include "mac802154.h" +#include "llsec.h" + +static void llsec_key_put(struct mac802154_llsec_key *key); +static bool llsec_key_id_equal(const struct ieee802154_llsec_key_id *a, + const struct ieee802154_llsec_key_id *b); + +static void llsec_dev_free(struct mac802154_llsec_device *dev); + +void mac802154_llsec_init(struct mac802154_llsec *sec) +{ + memset(sec, 0, sizeof(*sec)); + + memset(&sec->params.default_key_source, 0xFF, IEEE802154_ADDR_LEN); + + INIT_LIST_HEAD(&sec->table.security_levels); + INIT_LIST_HEAD(&sec->table.devices); + INIT_LIST_HEAD(&sec->table.keys); + hash_init(sec->devices_short); + hash_init(sec->devices_hw); + rwlock_init(&sec->lock); +} + +void mac802154_llsec_destroy(struct mac802154_llsec *sec) +{ + struct ieee802154_llsec_seclevel *sl, *sn; + struct ieee802154_llsec_device *dev, *dn; + struct ieee802154_llsec_key_entry *key, *kn; + + list_for_each_entry_safe(sl, sn, &sec->table.security_levels, list) { + struct mac802154_llsec_seclevel *msl; + + msl = container_of(sl, struct mac802154_llsec_seclevel, level); + list_del(&sl->list); + kfree(msl); + } + + list_for_each_entry_safe(dev, dn, &sec->table.devices, list) { + struct mac802154_llsec_device *mdev; + + mdev = container_of(dev, struct mac802154_llsec_device, dev); + list_del(&dev->list); + llsec_dev_free(mdev); + } + + list_for_each_entry_safe(key, kn, &sec->table.keys, list) { + struct mac802154_llsec_key *mkey; + + mkey = container_of(key->key, struct mac802154_llsec_key, key); + list_del(&key->list); + llsec_key_put(mkey); + kfree(key); + } +} + + + +int mac802154_llsec_get_params(struct mac802154_llsec *sec, + struct ieee802154_llsec_params *params) +{ + read_lock_bh(&sec->lock); + *params = sec->params; + read_unlock_bh(&sec->lock); + + return 0; +} + +int mac802154_llsec_set_params(struct mac802154_llsec *sec, + const struct ieee802154_llsec_params *params, + int changed) +{ + write_lock_bh(&sec->lock); + + if (changed & IEEE802154_LLSEC_PARAM_ENABLED) + sec->params.enabled = params->enabled; + if (changed & IEEE802154_LLSEC_PARAM_FRAME_COUNTER) + sec->params.frame_counter = params->frame_counter; + if (changed & IEEE802154_LLSEC_PARAM_OUT_LEVEL) + sec->params.out_level = params->out_level; + if (changed & IEEE802154_LLSEC_PARAM_OUT_KEY) + sec->params.out_key = params->out_key; + if (changed & IEEE802154_LLSEC_PARAM_KEY_SOURCE) + sec->params.default_key_source = params->default_key_source; + if (changed & IEEE802154_LLSEC_PARAM_PAN_ID) + sec->params.pan_id = params->pan_id; + if (changed & IEEE802154_LLSEC_PARAM_HWADDR) + sec->params.hwaddr = params->hwaddr; + if (changed & IEEE802154_LLSEC_PARAM_COORD_HWADDR) + sec->params.coord_hwaddr = params->coord_hwaddr; + if (changed & IEEE802154_LLSEC_PARAM_COORD_SHORTADDR) + sec->params.coord_shortaddr = params->coord_shortaddr; + + write_unlock_bh(&sec->lock); + + return 0; +} + + + +static struct mac802154_llsec_key* +llsec_key_alloc(const struct ieee802154_llsec_key *template) +{ + const int authsizes[3] = { 4, 8, 16 }; + struct mac802154_llsec_key *key; + int i; + + key = kzalloc(sizeof(*key), GFP_KERNEL); + if (!key) + return NULL; + + kref_init(&key->ref); + key->key = *template; + + BUILD_BUG_ON(ARRAY_SIZE(authsizes) != ARRAY_SIZE(key->tfm)); + + for (i = 0; i < ARRAY_SIZE(key->tfm); i++) { + key->tfm[i] = crypto_alloc_aead("ccm(aes)", 0, + CRYPTO_ALG_ASYNC); + if (!key->tfm[i]) + goto err_tfm; + if (crypto_aead_setkey(key->tfm[i], template->key, + IEEE802154_LLSEC_KEY_SIZE)) + goto err_tfm; + if (crypto_aead_setauthsize(key->tfm[i], authsizes[i])) + goto err_tfm; + } + + key->tfm0 = crypto_alloc_blkcipher("ctr(aes)", 0, CRYPTO_ALG_ASYNC); + if (!key->tfm0) + goto err_tfm; + + if (crypto_blkcipher_setkey(key->tfm0, template->key, + IEEE802154_LLSEC_KEY_SIZE)) + goto err_tfm0; + + return key; + +err_tfm0: + crypto_free_blkcipher(key->tfm0); +err_tfm: + for (i = 0; i < ARRAY_SIZE(key->tfm); i++) + if (key->tfm[i]) + crypto_free_aead(key->tfm[i]); + + kfree(key); + return NULL; +} + +static void llsec_key_release(struct kref *ref) +{ + struct mac802154_llsec_key *key; + int i; + + key = container_of(ref, struct mac802154_llsec_key, ref); + + for (i = 0; i < ARRAY_SIZE(key->tfm); i++) + crypto_free_aead(key->tfm[i]); + + crypto_free_blkcipher(key->tfm0); + kfree(key); +} + +static struct mac802154_llsec_key* +llsec_key_get(struct mac802154_llsec_key *key) +{ + kref_get(&key->ref); + return key; +} + +static void llsec_key_put(struct mac802154_llsec_key *key) +{ + kref_put(&key->ref, llsec_key_release); +} + +static bool llsec_key_id_equal(const struct ieee802154_llsec_key_id *a, + const struct ieee802154_llsec_key_id *b) +{ + if (a->mode != b->mode) + return false; + + if (a->mode == IEEE802154_SCF_KEY_IMPLICIT) + return ieee802154_addr_equal(&a->device_addr, &b->device_addr); + + if (a->id != b->id) + return false; + + switch (a->mode) { + case IEEE802154_SCF_KEY_INDEX: + return true; + case IEEE802154_SCF_KEY_SHORT_INDEX: + return a->short_source == b->short_source; + case IEEE802154_SCF_KEY_HW_INDEX: + return a->extended_source == b->extended_source; + } + + return false; +} + +int mac802154_llsec_key_add(struct mac802154_llsec *sec, + const struct ieee802154_llsec_key_id *id, + const struct ieee802154_llsec_key *key) +{ + struct mac802154_llsec_key *mkey = NULL; + struct ieee802154_llsec_key_entry *pos, *new; + + if (!(key->frame_types & (1 << IEEE802154_FC_TYPE_MAC_CMD)) && + key->cmd_frame_ids) + return -EINVAL; + + list_for_each_entry(pos, &sec->table.keys, list) { + if (llsec_key_id_equal(&pos->id, id)) + return -EEXIST; + + if (memcmp(pos->key->key, key->key, + IEEE802154_LLSEC_KEY_SIZE)) + continue; + + mkey = container_of(pos->key, struct mac802154_llsec_key, key); + + /* Don't allow multiple instances of the same AES key to have + * different allowed frame types/command frame ids, as this is + * not possible in the 802.15.4 PIB. + */ + if (pos->key->frame_types != key->frame_types || + pos->key->cmd_frame_ids != key->cmd_frame_ids) + return -EEXIST; + + break; + } + + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) + return -ENOMEM; + + if (!mkey) + mkey = llsec_key_alloc(key); + else + mkey = llsec_key_get(mkey); + + if (!mkey) + goto fail; + + new->id = *id; + new->key = &mkey->key; + + list_add_rcu(&new->list, &sec->table.keys); + + return 0; + +fail: + kfree(new); + return -ENOMEM; +} + +int mac802154_llsec_key_del(struct mac802154_llsec *sec, + const struct ieee802154_llsec_key_id *key) +{ + struct ieee802154_llsec_key_entry *pos; + + list_for_each_entry(pos, &sec->table.keys, list) { + struct mac802154_llsec_key *mkey; + + mkey = container_of(pos->key, struct mac802154_llsec_key, key); + + if (llsec_key_id_equal(&pos->id, key)) { + list_del_rcu(&pos->list); + llsec_key_put(mkey); + return 0; + } + } + + return -ENOENT; +} + + + +static bool llsec_dev_use_shortaddr(__le16 short_addr) +{ + return short_addr != cpu_to_le16(IEEE802154_ADDR_UNDEF) && + short_addr != cpu_to_le16(0xffff); +} + +static u32 llsec_dev_hash_short(__le16 short_addr, __le16 pan_id) +{ + return ((__force u16) short_addr) << 16 | (__force u16) pan_id; +} + +static u64 llsec_dev_hash_long(__le64 hwaddr) +{ + return (__force u64) hwaddr; +} + +static struct mac802154_llsec_device* +llsec_dev_find_short(struct mac802154_llsec *sec, __le16 short_addr, + __le16 pan_id) +{ + struct mac802154_llsec_device *dev; + u32 key = llsec_dev_hash_short(short_addr, pan_id); + + hash_for_each_possible_rcu(sec->devices_short, dev, bucket_s, key) { + if (dev->dev.short_addr == short_addr && + dev->dev.pan_id == pan_id) + return dev; + } + + return NULL; +} + +static struct mac802154_llsec_device* +llsec_dev_find_long(struct mac802154_llsec *sec, __le64 hwaddr) +{ + struct mac802154_llsec_device *dev; + u64 key = llsec_dev_hash_long(hwaddr); + + hash_for_each_possible_rcu(sec->devices_hw, dev, bucket_hw, key) { + if (dev->dev.hwaddr == hwaddr) + return dev; + } + + return NULL; +} + +static void llsec_dev_free(struct mac802154_llsec_device *dev) +{ + struct ieee802154_llsec_device_key *pos, *pn; + struct mac802154_llsec_device_key *devkey; + + list_for_each_entry_safe(pos, pn, &dev->dev.keys, list) { + devkey = container_of(pos, struct mac802154_llsec_device_key, + devkey); + + list_del(&pos->list); + kfree(devkey); + } + + kfree(dev); +} + +int mac802154_llsec_dev_add(struct mac802154_llsec *sec, + const struct ieee802154_llsec_device *dev) +{ + struct mac802154_llsec_device *entry; + u32 skey = llsec_dev_hash_short(dev->short_addr, dev->pan_id); + u64 hwkey = llsec_dev_hash_long(dev->hwaddr); + + BUILD_BUG_ON(sizeof(hwkey) != IEEE802154_ADDR_LEN); + + if ((llsec_dev_use_shortaddr(dev->short_addr) && + llsec_dev_find_short(sec, dev->short_addr, dev->pan_id)) || + llsec_dev_find_long(sec, dev->hwaddr)) + return -EEXIST; + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + entry->dev = *dev; + spin_lock_init(&entry->lock); + INIT_LIST_HEAD(&entry->dev.keys); + + if (llsec_dev_use_shortaddr(dev->short_addr)) + hash_add_rcu(sec->devices_short, &entry->bucket_s, skey); + else + INIT_HLIST_NODE(&entry->bucket_s); + + hash_add_rcu(sec->devices_hw, &entry->bucket_hw, hwkey); + list_add_tail_rcu(&entry->dev.list, &sec->table.devices); + + return 0; +} + +static void llsec_dev_free_rcu(struct rcu_head *rcu) +{ + llsec_dev_free(container_of(rcu, struct mac802154_llsec_device, rcu)); +} + +int mac802154_llsec_dev_del(struct mac802154_llsec *sec, __le64 device_addr) +{ + struct mac802154_llsec_device *pos; + + pos = llsec_dev_find_long(sec, device_addr); + if (!pos) + return -ENOENT; + + hash_del_rcu(&pos->bucket_s); + hash_del_rcu(&pos->bucket_hw); + call_rcu(&pos->rcu, llsec_dev_free_rcu); + + return 0; +} + + + +static struct mac802154_llsec_device_key* +llsec_devkey_find(struct mac802154_llsec_device *dev, + const struct ieee802154_llsec_key_id *key) +{ + struct ieee802154_llsec_device_key *devkey; + + list_for_each_entry_rcu(devkey, &dev->dev.keys, list) { + if (!llsec_key_id_equal(key, &devkey->key_id)) + continue; + + return container_of(devkey, struct mac802154_llsec_device_key, + devkey); + } + + return NULL; +} + +int mac802154_llsec_devkey_add(struct mac802154_llsec *sec, + __le64 dev_addr, + const struct ieee802154_llsec_device_key *key) +{ + struct mac802154_llsec_device *dev; + struct mac802154_llsec_device_key *devkey; + + dev = llsec_dev_find_long(sec, dev_addr); + + if (!dev) + return -ENOENT; + + if (llsec_devkey_find(dev, &key->key_id)) + return -EEXIST; + + devkey = kmalloc(sizeof(*devkey), GFP_KERNEL); + if (!devkey) + return -ENOMEM; + + devkey->devkey = *key; + list_add_tail_rcu(&devkey->devkey.list, &dev->dev.keys); + return 0; +} + +int mac802154_llsec_devkey_del(struct mac802154_llsec *sec, + __le64 dev_addr, + const struct ieee802154_llsec_device_key *key) +{ + struct mac802154_llsec_device *dev; + struct mac802154_llsec_device_key *devkey; + + dev = llsec_dev_find_long(sec, dev_addr); + + if (!dev) + return -ENOENT; + + devkey = llsec_devkey_find(dev, &key->key_id); + if (!devkey) + return -ENOENT; + + list_del_rcu(&devkey->devkey.list); + kfree_rcu(devkey, rcu); + return 0; +} + + + +static struct mac802154_llsec_seclevel* +llsec_find_seclevel(const struct mac802154_llsec *sec, + const struct ieee802154_llsec_seclevel *sl) +{ + struct ieee802154_llsec_seclevel *pos; + + list_for_each_entry(pos, &sec->table.security_levels, list) { + if (pos->frame_type != sl->frame_type || + (pos->frame_type == IEEE802154_FC_TYPE_MAC_CMD && + pos->cmd_frame_id != sl->cmd_frame_id) || + pos->device_override != sl->device_override || + pos->sec_levels != sl->sec_levels) + continue; + + return container_of(pos, struct mac802154_llsec_seclevel, + level); + } + + return NULL; +} + +int mac802154_llsec_seclevel_add(struct mac802154_llsec *sec, + const struct ieee802154_llsec_seclevel *sl) +{ + struct mac802154_llsec_seclevel *entry; + + if (llsec_find_seclevel(sec, sl)) + return -EEXIST; + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + entry->level = *sl; + + list_add_tail_rcu(&entry->level.list, &sec->table.security_levels); + + return 0; +} + +int mac802154_llsec_seclevel_del(struct mac802154_llsec *sec, + const struct ieee802154_llsec_seclevel *sl) +{ + struct mac802154_llsec_seclevel *pos; + + pos = llsec_find_seclevel(sec, sl); + if (!pos) + return -ENOENT; + + list_del_rcu(&pos->level.list); + kfree_rcu(pos, rcu); + + return 0; +} + + + +static int llsec_recover_addr(struct mac802154_llsec *sec, + struct ieee802154_addr *addr) +{ + __le16 caddr = sec->params.coord_shortaddr; + + addr->pan_id = sec->params.pan_id; + + if (caddr == cpu_to_le16(IEEE802154_ADDR_BROADCAST)) { + return -EINVAL; + } else if (caddr == cpu_to_le16(IEEE802154_ADDR_UNDEF)) { + addr->extended_addr = sec->params.coord_hwaddr; + addr->mode = IEEE802154_ADDR_LONG; + } else { + addr->short_addr = sec->params.coord_shortaddr; + addr->mode = IEEE802154_ADDR_SHORT; + } + + return 0; +} + +static struct mac802154_llsec_key* +llsec_lookup_key(struct mac802154_llsec *sec, + const struct ieee802154_hdr *hdr, + const struct ieee802154_addr *addr, + struct ieee802154_llsec_key_id *key_id) +{ + struct ieee802154_addr devaddr = *addr; + u8 key_id_mode = hdr->sec.key_id_mode; + struct ieee802154_llsec_key_entry *key_entry; + struct mac802154_llsec_key *key; + + if (key_id_mode == IEEE802154_SCF_KEY_IMPLICIT && + devaddr.mode == IEEE802154_ADDR_NONE) { + if (hdr->fc.type == IEEE802154_FC_TYPE_BEACON) { + devaddr.extended_addr = sec->params.coord_hwaddr; + devaddr.mode = IEEE802154_ADDR_LONG; + } else if (llsec_recover_addr(sec, &devaddr) < 0) { + return NULL; + } + } + + list_for_each_entry_rcu(key_entry, &sec->table.keys, list) { + const struct ieee802154_llsec_key_id *id = &key_entry->id; + + if (!(key_entry->key->frame_types & BIT(hdr->fc.type))) + continue; + + if (id->mode != key_id_mode) + continue; + + if (key_id_mode == IEEE802154_SCF_KEY_IMPLICIT) { + if (ieee802154_addr_equal(&devaddr, &id->device_addr)) + goto found; + } else { + if (id->id != hdr->sec.key_id) + continue; + + if ((key_id_mode == IEEE802154_SCF_KEY_INDEX) || + (key_id_mode == IEEE802154_SCF_KEY_SHORT_INDEX && + id->short_source == hdr->sec.short_src) || + (key_id_mode == IEEE802154_SCF_KEY_HW_INDEX && + id->extended_source == hdr->sec.extended_src)) + goto found; + } + } + + return NULL; + +found: + key = container_of(key_entry->key, struct mac802154_llsec_key, key); + if (key_id) + *key_id = key_entry->id; + return llsec_key_get(key); +} + + +static void llsec_geniv(u8 iv[16], __le64 addr, + const struct ieee802154_sechdr *sec) +{ + __be64 addr_bytes = (__force __be64) swab64((__force u64) addr); + __be32 frame_counter = (__force __be32) swab32((__force u32) sec->frame_counter); + + iv[0] = 1; /* L' = L - 1 = 1 */ + memcpy(iv + 1, &addr_bytes, sizeof(addr_bytes)); + memcpy(iv + 9, &frame_counter, sizeof(frame_counter)); + iv[13] = sec->level; + iv[14] = 0; + iv[15] = 1; +} + +static int +llsec_do_encrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec, + const struct ieee802154_hdr *hdr, + struct mac802154_llsec_key *key) +{ + u8 iv[16]; + struct scatterlist src; + struct blkcipher_desc req = { + .tfm = key->tfm0, + .info = iv, + .flags = 0, + }; + + llsec_geniv(iv, sec->params.hwaddr, &hdr->sec); + sg_init_one(&src, skb->data, skb->len); + return crypto_blkcipher_encrypt_iv(&req, &src, &src, skb->len); +} + +static struct crypto_aead* +llsec_tfm_by_len(struct mac802154_llsec_key *key, int authlen) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(key->tfm); i++) + if (crypto_aead_authsize(key->tfm[i]) == authlen) + return key->tfm[i]; + + BUG(); +} + +static int +llsec_do_encrypt_auth(struct sk_buff *skb, const struct mac802154_llsec *sec, + const struct ieee802154_hdr *hdr, + struct mac802154_llsec_key *key) +{ + u8 iv[16]; + unsigned char *data; + int authlen, assoclen, datalen, rc; + struct scatterlist src, assoc[2], dst[2]; + struct aead_request *req; + + authlen = ieee802154_sechdr_authtag_len(&hdr->sec); + llsec_geniv(iv, sec->params.hwaddr, &hdr->sec); + + req = aead_request_alloc(llsec_tfm_by_len(key, authlen), GFP_ATOMIC); + if (!req) + return -ENOMEM; + + sg_init_table(assoc, 2); + sg_set_buf(&assoc[0], skb_mac_header(skb), skb->mac_len); + assoclen = skb->mac_len; + + data = skb_mac_header(skb) + skb->mac_len; + datalen = skb_tail_pointer(skb) - data; + + if (hdr->sec.level & IEEE802154_SCF_SECLEVEL_ENC) { + sg_set_buf(&assoc[1], data, 0); + } else { + sg_set_buf(&assoc[1], data, datalen); + assoclen += datalen; + datalen = 0; + } + + sg_init_one(&src, data, datalen); + + sg_init_table(dst, 2); + sg_set_buf(&dst[0], data, datalen); + sg_set_buf(&dst[1], skb_put(skb, authlen), authlen); + + aead_request_set_callback(req, 0, NULL, NULL); + aead_request_set_assoc(req, assoc, assoclen); + aead_request_set_crypt(req, &src, dst, datalen, iv); + + rc = crypto_aead_encrypt(req); + + kfree(req); + + return rc; +} + +static int llsec_do_encrypt(struct sk_buff *skb, + const struct mac802154_llsec *sec, + const struct ieee802154_hdr *hdr, + struct mac802154_llsec_key *key) +{ + if (hdr->sec.level == IEEE802154_SCF_SECLEVEL_ENC) + return llsec_do_encrypt_unauth(skb, sec, hdr, key); + else + return llsec_do_encrypt_auth(skb, sec, hdr, key); +} + +int mac802154_llsec_encrypt(struct mac802154_llsec *sec, struct sk_buff *skb) +{ + struct ieee802154_hdr hdr; + int rc, authlen, hlen; + struct mac802154_llsec_key *key; + u32 frame_ctr; + + hlen = ieee802154_hdr_pull(skb, &hdr); + + if (hlen < 0 || hdr.fc.type != IEEE802154_FC_TYPE_DATA) + return -EINVAL; + + if (!hdr.fc.security_enabled || hdr.sec.level == 0) { + skb_push(skb, hlen); + return 0; + } + + authlen = ieee802154_sechdr_authtag_len(&hdr.sec); + + if (skb->len + hlen + authlen + IEEE802154_MFR_SIZE > IEEE802154_MTU) + return -EMSGSIZE; + + rcu_read_lock(); + + read_lock_bh(&sec->lock); + + if (!sec->params.enabled) { + rc = -EINVAL; + goto fail_read; + } + + key = llsec_lookup_key(sec, &hdr, &hdr.dest, NULL); + if (!key) { + rc = -ENOKEY; + goto fail_read; + } + + read_unlock_bh(&sec->lock); + + write_lock_bh(&sec->lock); + + frame_ctr = be32_to_cpu(sec->params.frame_counter); + hdr.sec.frame_counter = cpu_to_le32(frame_ctr); + if (frame_ctr == 0xFFFFFFFF) { + write_unlock_bh(&sec->lock); + llsec_key_put(key); + rc = -EOVERFLOW; + goto fail; + } + + sec->params.frame_counter = cpu_to_be32(frame_ctr + 1); + + write_unlock_bh(&sec->lock); + + rcu_read_unlock(); + + skb->mac_len = ieee802154_hdr_push(skb, &hdr); + skb_reset_mac_header(skb); + + rc = llsec_do_encrypt(skb, sec, &hdr, key); + llsec_key_put(key); + + return rc; + +fail_read: + read_unlock_bh(&sec->lock); +fail: + rcu_read_unlock(); + return rc; +} + + + +static struct mac802154_llsec_device* +llsec_lookup_dev(struct mac802154_llsec *sec, + const struct ieee802154_addr *addr) +{ + struct ieee802154_addr devaddr = *addr; + struct mac802154_llsec_device *dev = NULL; + + if (devaddr.mode == IEEE802154_ADDR_NONE && + llsec_recover_addr(sec, &devaddr) < 0) + return NULL; + + if (devaddr.mode == IEEE802154_ADDR_SHORT) { + u32 key = llsec_dev_hash_short(devaddr.short_addr, + devaddr.pan_id); + + hash_for_each_possible_rcu(sec->devices_short, dev, + bucket_s, key) { + if (dev->dev.pan_id == devaddr.pan_id && + dev->dev.short_addr == devaddr.short_addr) + return dev; + } + } else { + u64 key = llsec_dev_hash_long(devaddr.extended_addr); + + hash_for_each_possible_rcu(sec->devices_hw, dev, + bucket_hw, key) { + if (dev->dev.hwaddr == devaddr.extended_addr) + return dev; + } + } + + return NULL; +} + +static int +llsec_lookup_seclevel(const struct mac802154_llsec *sec, + u8 frame_type, u8 cmd_frame_id, + struct ieee802154_llsec_seclevel *rlevel) +{ + struct ieee802154_llsec_seclevel *level; + + list_for_each_entry_rcu(level, &sec->table.security_levels, list) { + if (level->frame_type == frame_type && + (frame_type != IEEE802154_FC_TYPE_MAC_CMD || + level->cmd_frame_id == cmd_frame_id)) { + *rlevel = *level; + return 0; + } + } + + return -EINVAL; +} + +static int +llsec_do_decrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec, + const struct ieee802154_hdr *hdr, + struct mac802154_llsec_key *key, __le64 dev_addr) +{ + u8 iv[16]; + unsigned char *data; + int datalen; + struct scatterlist src; + struct blkcipher_desc req = { + .tfm = key->tfm0, + .info = iv, + .flags = 0, + }; + + llsec_geniv(iv, dev_addr, &hdr->sec); + data = skb_mac_header(skb) + skb->mac_len; + datalen = skb_tail_pointer(skb) - data; + + sg_init_one(&src, data, datalen); + + return crypto_blkcipher_decrypt_iv(&req, &src, &src, datalen); +} + +static int +llsec_do_decrypt_auth(struct sk_buff *skb, const struct mac802154_llsec *sec, + const struct ieee802154_hdr *hdr, + struct mac802154_llsec_key *key, __le64 dev_addr) +{ + u8 iv[16]; + unsigned char *data; + int authlen, datalen, assoclen, rc; + struct scatterlist src, assoc[2]; + struct aead_request *req; + + authlen = ieee802154_sechdr_authtag_len(&hdr->sec); + llsec_geniv(iv, dev_addr, &hdr->sec); + + req = aead_request_alloc(llsec_tfm_by_len(key, authlen), GFP_ATOMIC); + if (!req) + return -ENOMEM; + + sg_init_table(assoc, 2); + sg_set_buf(&assoc[0], skb_mac_header(skb), skb->mac_len); + assoclen = skb->mac_len; + + data = skb_mac_header(skb) + skb->mac_len; + datalen = skb_tail_pointer(skb) - data; + + if (hdr->sec.level & IEEE802154_SCF_SECLEVEL_ENC) { + sg_set_buf(&assoc[1], data, 0); + } else { + sg_set_buf(&assoc[1], data, datalen - authlen); + assoclen += datalen - authlen; + data += datalen - authlen; + datalen = authlen; + } + + sg_init_one(&src, data, datalen); + + aead_request_set_callback(req, 0, NULL, NULL); + aead_request_set_assoc(req, assoc, assoclen); + aead_request_set_crypt(req, &src, &src, datalen, iv); + + rc = crypto_aead_decrypt(req); + + kfree(req); + skb_trim(skb, skb->len - authlen); + + return rc; +} + +static int +llsec_do_decrypt(struct sk_buff *skb, const struct mac802154_llsec *sec, + const struct ieee802154_hdr *hdr, + struct mac802154_llsec_key *key, __le64 dev_addr) +{ + if (hdr->sec.level == IEEE802154_SCF_SECLEVEL_ENC) + return llsec_do_decrypt_unauth(skb, sec, hdr, key, dev_addr); + else + return llsec_do_decrypt_auth(skb, sec, hdr, key, dev_addr); +} + +static int +llsec_update_devkey_record(struct mac802154_llsec_device *dev, + const struct ieee802154_llsec_key_id *in_key) +{ + struct mac802154_llsec_device_key *devkey; + + devkey = llsec_devkey_find(dev, in_key); + + if (!devkey) { + struct mac802154_llsec_device_key *next; + + next = kzalloc(sizeof(*devkey), GFP_ATOMIC); + if (!next) + return -ENOMEM; + + next->devkey.key_id = *in_key; + + spin_lock_bh(&dev->lock); + + devkey = llsec_devkey_find(dev, in_key); + if (!devkey) + list_add_rcu(&next->devkey.list, &dev->dev.keys); + else + kfree(next); + + spin_unlock_bh(&dev->lock); + } + + return 0; +} + +static int +llsec_update_devkey_info(struct mac802154_llsec_device *dev, + const struct ieee802154_llsec_key_id *in_key, + u32 frame_counter) +{ + struct mac802154_llsec_device_key *devkey = NULL; + + if (dev->dev.key_mode == IEEE802154_LLSEC_DEVKEY_RESTRICT) { + devkey = llsec_devkey_find(dev, in_key); + if (!devkey) + return -ENOENT; + } + + if (dev->dev.key_mode == IEEE802154_LLSEC_DEVKEY_RECORD) { + int rc = llsec_update_devkey_record(dev, in_key); + + if (rc < 0) + return rc; + } + + spin_lock_bh(&dev->lock); + + if ((!devkey && frame_counter < dev->dev.frame_counter) || + (devkey && frame_counter < devkey->devkey.frame_counter)) { + spin_unlock_bh(&dev->lock); + return -EINVAL; + } + + if (devkey) + devkey->devkey.frame_counter = frame_counter + 1; + else + dev->dev.frame_counter = frame_counter + 1; + + spin_unlock_bh(&dev->lock); + + return 0; +} + +int mac802154_llsec_decrypt(struct mac802154_llsec *sec, struct sk_buff *skb) +{ + struct ieee802154_hdr hdr; + struct mac802154_llsec_key *key; + struct ieee802154_llsec_key_id key_id; + struct mac802154_llsec_device *dev; + struct ieee802154_llsec_seclevel seclevel; + int err; + __le64 dev_addr; + u32 frame_ctr; + + if (ieee802154_hdr_peek(skb, &hdr) < 0) + return -EINVAL; + if (!hdr.fc.security_enabled) + return 0; + if (hdr.fc.version == 0) + return -EINVAL; + + read_lock_bh(&sec->lock); + if (!sec->params.enabled) { + read_unlock_bh(&sec->lock); + return -EINVAL; + } + read_unlock_bh(&sec->lock); + + rcu_read_lock(); + + key = llsec_lookup_key(sec, &hdr, &hdr.source, &key_id); + if (!key) { + err = -ENOKEY; + goto fail; + } + + dev = llsec_lookup_dev(sec, &hdr.source); + if (!dev) { + err = -EINVAL; + goto fail_dev; + } + + if (llsec_lookup_seclevel(sec, hdr.fc.type, 0, &seclevel) < 0) { + err = -EINVAL; + goto fail_dev; + } + + if (!(seclevel.sec_levels & BIT(hdr.sec.level)) && + (hdr.sec.level == 0 && seclevel.device_override && + !dev->dev.seclevel_exempt)) { + err = -EINVAL; + goto fail_dev; + } + + frame_ctr = le32_to_cpu(hdr.sec.frame_counter); + + if (frame_ctr == 0xffffffff) { + err = -EOVERFLOW; + goto fail_dev; + } + + err = llsec_update_devkey_info(dev, &key_id, frame_ctr); + if (err) + goto fail_dev; + + dev_addr = dev->dev.hwaddr; + + rcu_read_unlock(); + + err = llsec_do_decrypt(skb, sec, &hdr, key, dev_addr); + llsec_key_put(key); + return err; + +fail_dev: + llsec_key_put(key); +fail: + rcu_read_unlock(); + return err; +} diff --git a/net/mac802154/llsec.h b/net/mac802154/llsec.h new file mode 100644 index 000000000000..950578e1d7be --- /dev/null +++ b/net/mac802154/llsec.h @@ -0,0 +1,108 @@ +/* + * Copyright (C) 2014 Fraunhofer ITWM + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Written by: + * Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de> + */ + +#ifndef MAC802154_LLSEC_H +#define MAC802154_LLSEC_H + +#include <linux/slab.h> +#include <linux/hashtable.h> +#include <linux/crypto.h> +#include <linux/kref.h> +#include <linux/spinlock.h> +#include <net/af_ieee802154.h> +#include <net/ieee802154_netdev.h> + +struct mac802154_llsec_key { + struct ieee802154_llsec_key key; + + /* one tfm for each authsize (4/8/16) */ + struct crypto_aead *tfm[3]; + struct crypto_blkcipher *tfm0; + + struct kref ref; +}; + +struct mac802154_llsec_device_key { + struct ieee802154_llsec_device_key devkey; + + struct rcu_head rcu; +}; + +struct mac802154_llsec_device { + struct ieee802154_llsec_device dev; + + struct hlist_node bucket_s; + struct hlist_node bucket_hw; + + /* protects dev.frame_counter and the elements of dev.keys */ + spinlock_t lock; + + struct rcu_head rcu; +}; + +struct mac802154_llsec_seclevel { + struct ieee802154_llsec_seclevel level; + + struct rcu_head rcu; +}; + +struct mac802154_llsec { + struct ieee802154_llsec_params params; + struct ieee802154_llsec_table table; + + DECLARE_HASHTABLE(devices_short, 6); + DECLARE_HASHTABLE(devices_hw, 6); + + /* protects params, all other fields are fine with RCU */ + rwlock_t lock; +}; + +void mac802154_llsec_init(struct mac802154_llsec *sec); +void mac802154_llsec_destroy(struct mac802154_llsec *sec); + +int mac802154_llsec_get_params(struct mac802154_llsec *sec, + struct ieee802154_llsec_params *params); +int mac802154_llsec_set_params(struct mac802154_llsec *sec, + const struct ieee802154_llsec_params *params, + int changed); + +int mac802154_llsec_key_add(struct mac802154_llsec *sec, + const struct ieee802154_llsec_key_id *id, + const struct ieee802154_llsec_key *key); +int mac802154_llsec_key_del(struct mac802154_llsec *sec, + const struct ieee802154_llsec_key_id *key); + +int mac802154_llsec_dev_add(struct mac802154_llsec *sec, + const struct ieee802154_llsec_device *dev); +int mac802154_llsec_dev_del(struct mac802154_llsec *sec, + __le64 device_addr); + +int mac802154_llsec_devkey_add(struct mac802154_llsec *sec, + __le64 dev_addr, + const struct ieee802154_llsec_device_key *key); +int mac802154_llsec_devkey_del(struct mac802154_llsec *sec, + __le64 dev_addr, + const struct ieee802154_llsec_device_key *key); + +int mac802154_llsec_seclevel_add(struct mac802154_llsec *sec, + const struct ieee802154_llsec_seclevel *sl); +int mac802154_llsec_seclevel_del(struct mac802154_llsec *sec, + const struct ieee802154_llsec_seclevel *sl); + +int mac802154_llsec_encrypt(struct mac802154_llsec *sec, struct sk_buff *skb); +int mac802154_llsec_decrypt(struct mac802154_llsec *sec, struct sk_buff *skb); + +#endif /* MAC802154_LLSEC_H */ diff --git a/net/mac802154/mac802154.h b/net/mac802154/mac802154.h index 28ef59c566e6..762a6f849c6b 100644 --- a/net/mac802154/mac802154.h +++ b/net/mac802154/mac802154.h @@ -23,8 +23,12 @@ #ifndef MAC802154_H #define MAC802154_H +#include <linux/mutex.h> +#include <net/mac802154.h> #include <net/ieee802154_netdev.h> +#include "llsec.h" + /* mac802154 device private data */ struct mac802154_priv { struct ieee802154_dev hw; @@ -90,6 +94,13 @@ struct mac802154_sub_if_data { u8 bsn; /* MAC DSN field */ u8 dsn; + + /* protects sec from concurrent access by netlink. access by + * encrypt/decrypt/header_create safe without additional protection. + */ + struct mutex sec_mtx; + + struct mac802154_llsec sec; }; #define mac802154_to_priv(_hw) container_of(_hw, struct mac802154_priv, hw) @@ -125,4 +136,37 @@ int mac802154_set_mac_params(struct net_device *dev, void mac802154_get_mac_params(struct net_device *dev, struct ieee802154_mac_params *params); +int mac802154_get_params(struct net_device *dev, + struct ieee802154_llsec_params *params); +int mac802154_set_params(struct net_device *dev, + const struct ieee802154_llsec_params *params, + int changed); + +int mac802154_add_key(struct net_device *dev, + const struct ieee802154_llsec_key_id *id, + const struct ieee802154_llsec_key *key); +int mac802154_del_key(struct net_device *dev, + const struct ieee802154_llsec_key_id *id); + +int mac802154_add_dev(struct net_device *dev, + const struct ieee802154_llsec_device *llsec_dev); +int mac802154_del_dev(struct net_device *dev, __le64 dev_addr); + +int mac802154_add_devkey(struct net_device *dev, + __le64 device_addr, + const struct ieee802154_llsec_device_key *key); +int mac802154_del_devkey(struct net_device *dev, + __le64 device_addr, + const struct ieee802154_llsec_device_key *key); + +int mac802154_add_seclevel(struct net_device *dev, + const struct ieee802154_llsec_seclevel *sl); +int mac802154_del_seclevel(struct net_device *dev, + const struct ieee802154_llsec_seclevel *sl); + +void mac802154_lock_table(struct net_device *dev); +void mac802154_get_table(struct net_device *dev, + struct ieee802154_llsec_table **t); +void mac802154_unlock_table(struct net_device *dev); + #endif /* MAC802154_H */ diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c index d40c0928bc62..bf809131eef7 100644 --- a/net/mac802154/mac_cmd.c +++ b/net/mac802154/mac_cmd.c @@ -40,6 +40,9 @@ static int mac802154_mlme_start_req(struct net_device *dev, u8 pan_coord, u8 blx, u8 coord_realign) { + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + int rc = 0; + BUG_ON(addr->mode != IEEE802154_ADDR_SHORT); mac802154_dev_set_pan_id(dev, addr->pan_id); @@ -47,12 +50,31 @@ static int mac802154_mlme_start_req(struct net_device *dev, mac802154_dev_set_ieee_addr(dev); mac802154_dev_set_page_channel(dev, page, channel); + if (ops->llsec) { + struct ieee802154_llsec_params params; + int changed = 0; + + params.coord_shortaddr = addr->short_addr; + changed |= IEEE802154_LLSEC_PARAM_COORD_SHORTADDR; + + params.pan_id = addr->pan_id; + changed |= IEEE802154_LLSEC_PARAM_PAN_ID; + + params.hwaddr = ieee802154_devaddr_from_raw(dev->dev_addr); + changed |= IEEE802154_LLSEC_PARAM_HWADDR; + + params.coord_hwaddr = params.hwaddr; + changed |= IEEE802154_LLSEC_PARAM_COORD_HWADDR; + + rc = ops->llsec->set_params(dev, ¶ms, changed); + } + /* FIXME: add validation for unused parameters to be sane * for SoftMAC */ ieee802154_nl_start_confirm(dev, IEEE802154_SUCCESS); - return 0; + return rc; } static struct wpan_phy *mac802154_get_phy(const struct net_device *dev) @@ -64,6 +86,22 @@ static struct wpan_phy *mac802154_get_phy(const struct net_device *dev) return to_phy(get_device(&priv->hw->phy->dev)); } +static struct ieee802154_llsec_ops mac802154_llsec_ops = { + .get_params = mac802154_get_params, + .set_params = mac802154_set_params, + .add_key = mac802154_add_key, + .del_key = mac802154_del_key, + .add_dev = mac802154_add_dev, + .del_dev = mac802154_del_dev, + .add_devkey = mac802154_add_devkey, + .del_devkey = mac802154_del_devkey, + .add_seclevel = mac802154_add_seclevel, + .del_seclevel = mac802154_del_seclevel, + .lock_table = mac802154_lock_table, + .get_table = mac802154_get_table, + .unlock_table = mac802154_unlock_table, +}; + struct ieee802154_reduced_mlme_ops mac802154_mlme_reduced = { .get_phy = mac802154_get_phy, }; @@ -75,6 +113,8 @@ struct ieee802154_mlme_ops mac802154_mlme_wpan = { .get_short_addr = mac802154_dev_get_short_addr, .get_dsn = mac802154_dev_get_dsn, + .llsec = &mac802154_llsec_ops, + .set_mac_params = mac802154_set_mac_params, .get_mac_params = mac802154_get_mac_params, }; diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c index f0991f2344d4..868a040fd422 100644 --- a/net/mac802154/mib.c +++ b/net/mac802154/mib.c @@ -175,9 +175,9 @@ static void phy_chan_notify(struct work_struct *work) mutex_lock(&priv->hw->phy->pib_lock); res = hw->ops->set_channel(&hw->hw, priv->page, priv->chan); - if (res) + if (res) { pr_debug("set_channel failed\n"); - else { + } else { priv->hw->phy->current_channel = priv->chan; priv->hw->phy->current_page = priv->page; } @@ -210,6 +210,194 @@ void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan) INIT_WORK(&work->work, phy_chan_notify); work->dev = dev; queue_work(priv->hw->dev_workqueue, &work->work); - } else + } else { mutex_unlock(&priv->hw->phy->pib_lock); + } +} + + +int mac802154_get_params(struct net_device *dev, + struct ieee802154_llsec_params *params) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int res; + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); + res = mac802154_llsec_get_params(&priv->sec, params); + mutex_unlock(&priv->sec_mtx); + + return res; +} + +int mac802154_set_params(struct net_device *dev, + const struct ieee802154_llsec_params *params, + int changed) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int res; + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); + res = mac802154_llsec_set_params(&priv->sec, params, changed); + mutex_unlock(&priv->sec_mtx); + + return res; +} + + +int mac802154_add_key(struct net_device *dev, + const struct ieee802154_llsec_key_id *id, + const struct ieee802154_llsec_key *key) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int res; + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); + res = mac802154_llsec_key_add(&priv->sec, id, key); + mutex_unlock(&priv->sec_mtx); + + return res; +} + +int mac802154_del_key(struct net_device *dev, + const struct ieee802154_llsec_key_id *id) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int res; + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); + res = mac802154_llsec_key_del(&priv->sec, id); + mutex_unlock(&priv->sec_mtx); + + return res; +} + + +int mac802154_add_dev(struct net_device *dev, + const struct ieee802154_llsec_device *llsec_dev) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int res; + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); + res = mac802154_llsec_dev_add(&priv->sec, llsec_dev); + mutex_unlock(&priv->sec_mtx); + + return res; +} + +int mac802154_del_dev(struct net_device *dev, __le64 dev_addr) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int res; + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); + res = mac802154_llsec_dev_del(&priv->sec, dev_addr); + mutex_unlock(&priv->sec_mtx); + + return res; +} + + +int mac802154_add_devkey(struct net_device *dev, + __le64 device_addr, + const struct ieee802154_llsec_device_key *key) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int res; + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); + res = mac802154_llsec_devkey_add(&priv->sec, device_addr, key); + mutex_unlock(&priv->sec_mtx); + + return res; +} + +int mac802154_del_devkey(struct net_device *dev, + __le64 device_addr, + const struct ieee802154_llsec_device_key *key) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int res; + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); + res = mac802154_llsec_devkey_del(&priv->sec, device_addr, key); + mutex_unlock(&priv->sec_mtx); + + return res; +} + + +int mac802154_add_seclevel(struct net_device *dev, + const struct ieee802154_llsec_seclevel *sl) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int res; + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); + res = mac802154_llsec_seclevel_add(&priv->sec, sl); + mutex_unlock(&priv->sec_mtx); + + return res; +} + +int mac802154_del_seclevel(struct net_device *dev, + const struct ieee802154_llsec_seclevel *sl) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int res; + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); + res = mac802154_llsec_seclevel_del(&priv->sec, sl); + mutex_unlock(&priv->sec_mtx); + + return res; +} + + +void mac802154_lock_table(struct net_device *dev) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_lock(&priv->sec_mtx); +} + +void mac802154_get_table(struct net_device *dev, + struct ieee802154_llsec_table **t) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + *t = &priv->sec.table; +} + +void mac802154_unlock_table(struct net_device *dev) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + mutex_unlock(&priv->sec_mtx); } diff --git a/net/mac802154/monitor.c b/net/mac802154/monitor.c index 434a26f76a80..a68230e2b25f 100644 --- a/net/mac802154/monitor.c +++ b/net/mac802154/monitor.c @@ -70,7 +70,8 @@ void mac802154_monitors_rx(struct mac802154_priv *priv, struct sk_buff *skb) rcu_read_lock(); list_for_each_entry_rcu(sdata, &priv->slaves, list) { - if (sdata->type != IEEE802154_DEV_MONITOR) + if (sdata->type != IEEE802154_DEV_MONITOR || + !netif_running(sdata->dev)) continue; skb2 = skb_clone(skb, GFP_ATOMIC); diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c index 03855b0677cc..7f820a108a9c 100644 --- a/net/mac802154/rx.c +++ b/net/mac802154/rx.c @@ -59,27 +59,28 @@ mac802154_subif_rx(struct ieee802154_dev *hw, struct sk_buff *skb, u8 lqi) skb->protocol = htons(ETH_P_IEEE802154); skb_reset_mac_header(skb); - BUILD_BUG_ON(sizeof(struct ieee802154_mac_cb) > sizeof(skb->cb)); - if (!(priv->hw.flags & IEEE802154_HW_OMIT_CKSUM)) { u16 crc; if (skb->len < 2) { pr_debug("got invalid frame\n"); - goto out; + goto fail; } crc = crc_ccitt(0, skb->data, skb->len); if (crc) { pr_debug("CRC mismatch\n"); - goto out; + goto fail; } skb_trim(skb, skb->len - 2); /* CRC */ } mac802154_monitors_rx(priv, skb); mac802154_wpans_rx(priv, skb); -out: - dev_kfree_skb(skb); + + return; + +fail: + kfree_skb(skb); } static void mac802154_rx_worker(struct work_struct *work) diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c index 6d1647399d4f..8124353646ae 100644 --- a/net/mac802154/tx.c +++ b/net/mac802154/tx.c @@ -98,6 +98,7 @@ netdev_tx_t mac802154_tx(struct mac802154_priv *priv, struct sk_buff *skb, if (!(priv->hw.flags & IEEE802154_HW_OMIT_CKSUM)) { u16 crc = crc_ccitt(0, skb->data, skb->len); u8 *data = skb_put(skb, 2); + data[0] = crc & 0xff; data[1] = crc >> 8; } diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c index 1df7a6a57386..3c3069fd6971 100644 --- a/net/mac802154/wpan.c +++ b/net/mac802154/wpan.c @@ -35,6 +35,28 @@ #include "mac802154.h" +static int mac802154_wpan_update_llsec(struct net_device *dev) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); + int rc = 0; + + if (ops->llsec) { + struct ieee802154_llsec_params params; + int changed = 0; + + params.pan_id = priv->pan_id; + changed |= IEEE802154_LLSEC_PARAM_PAN_ID; + + params.hwaddr = priv->extended_addr; + changed |= IEEE802154_LLSEC_PARAM_HWADDR; + + rc = ops->llsec->set_params(dev, ¶ms, changed); + } + + return rc; +} + static int mac802154_wpan_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { @@ -81,7 +103,7 @@ mac802154_wpan_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) priv->pan_id = cpu_to_le16(sa->addr.pan_id); priv->short_addr = cpu_to_le16(sa->addr.short_addr); - err = 0; + err = mac802154_wpan_update_llsec(dev); break; } @@ -99,7 +121,7 @@ static int mac802154_wpan_mac_addr(struct net_device *dev, void *p) /* FIXME: validate addr */ memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); mac802154_dev_set_ieee_addr(dev); - return 0; + return mac802154_wpan_update_llsec(dev); } int mac802154_set_mac_params(struct net_device *dev, @@ -124,7 +146,7 @@ void mac802154_get_mac_params(struct net_device *dev, mutex_unlock(&priv->hw->slaves_mtx); } -int mac802154_wpan_open(struct net_device *dev) +static int mac802154_wpan_open(struct net_device *dev) { int rc; struct mac802154_sub_if_data *priv = netdev_priv(dev); @@ -183,6 +205,38 @@ out: return rc; } +static int mac802154_set_header_security(struct mac802154_sub_if_data *priv, + struct ieee802154_hdr *hdr, + const struct ieee802154_mac_cb *cb) +{ + struct ieee802154_llsec_params params; + u8 level; + + mac802154_llsec_get_params(&priv->sec, ¶ms); + + if (!params.enabled && cb->secen_override && cb->secen) + return -EINVAL; + if (!params.enabled || + (cb->secen_override && !cb->secen) || + !params.out_level) + return 0; + if (cb->seclevel_override && !cb->seclevel) + return -EINVAL; + + level = cb->seclevel_override ? cb->seclevel : params.out_level; + + hdr->fc.security_enabled = 1; + hdr->sec.level = level; + hdr->sec.key_id_mode = params.out_key.mode; + if (params.out_key.mode == IEEE802154_SCF_KEY_SHORT_INDEX) + hdr->sec.short_src = params.out_key.short_source; + else if (params.out_key.mode == IEEE802154_SCF_KEY_HW_INDEX) + hdr->sec.extended_src = params.out_key.extended_source; + hdr->sec.key_id = params.out_key.id; + + return 0; +} + static int mac802154_header_create(struct sk_buff *skb, struct net_device *dev, unsigned short type, @@ -192,15 +246,20 @@ static int mac802154_header_create(struct sk_buff *skb, { struct ieee802154_hdr hdr; struct mac802154_sub_if_data *priv = netdev_priv(dev); + struct ieee802154_mac_cb *cb = mac_cb(skb); int hlen; if (!daddr) return -EINVAL; memset(&hdr.fc, 0, sizeof(hdr.fc)); - hdr.fc.type = mac_cb_type(skb); - hdr.fc.security_enabled = mac_cb_is_secen(skb); - hdr.fc.ack_request = mac_cb_is_ackreq(skb); + hdr.fc.type = cb->type; + hdr.fc.security_enabled = cb->secen; + hdr.fc.ack_request = cb->ackreq; + hdr.seq = ieee802154_mlme_ops(dev)->get_dsn(dev); + + if (mac802154_set_header_security(priv, &hdr, cb) < 0) + return -EINVAL; if (!saddr) { spin_lock_bh(&priv->mib_lock); @@ -231,7 +290,7 @@ static int mac802154_header_create(struct sk_buff *skb, skb_reset_mac_header(skb); skb->mac_len = hlen; - if (hlen + len + 2 > dev->mtu) + if (len > ieee802154_max_payload(&hdr)) return -EMSGSIZE; return hlen; @@ -257,6 +316,7 @@ mac802154_wpan_xmit(struct sk_buff *skb, struct net_device *dev) { struct mac802154_sub_if_data *priv; u8 chan, page; + int rc; priv = netdev_priv(dev); @@ -272,6 +332,13 @@ mac802154_wpan_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } + rc = mac802154_llsec_encrypt(&priv->sec, skb); + if (rc) { + pr_warn("encryption failed: %i\n", rc); + kfree_skb(skb); + return NETDEV_TX_OK; + } + skb->skb_iif = dev->ifindex; dev->stats.tx_packets++; dev->stats.tx_bytes += skb->len; @@ -292,6 +359,15 @@ static const struct net_device_ops mac802154_wpan_ops = { .ndo_set_mac_address = mac802154_wpan_mac_addr, }; +static void mac802154_wpan_free(struct net_device *dev) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + + mac802154_llsec_destroy(&priv->sec); + + free_netdev(dev); +} + void mac802154_wpan_setup(struct net_device *dev) { struct mac802154_sub_if_data *priv; @@ -301,14 +377,14 @@ void mac802154_wpan_setup(struct net_device *dev) dev->hard_header_len = MAC802154_FRAME_HARD_HEADER_LEN; dev->header_ops = &mac802154_header_ops; - dev->needed_tailroom = 2; /* FCS */ + dev->needed_tailroom = 2 + 16; /* FCS + MIC */ dev->mtu = IEEE802154_MTU; dev->tx_queue_len = 300; dev->type = ARPHRD_IEEE802154; dev->flags = IFF_NOARP | IFF_BROADCAST; dev->watchdog_timeo = 0; - dev->destructor = free_netdev; + dev->destructor = mac802154_wpan_free; dev->netdev_ops = &mac802154_wpan_ops; dev->ml_priv = &mac802154_mlme_wpan; @@ -319,6 +395,7 @@ void mac802154_wpan_setup(struct net_device *dev) priv->page = 0; spin_lock_init(&priv->mib_lock); + mutex_init(&priv->sec_mtx); get_random_bytes(&priv->bsn, 1); get_random_bytes(&priv->dsn, 1); @@ -331,6 +408,8 @@ void mac802154_wpan_setup(struct net_device *dev) priv->pan_id = cpu_to_le16(IEEE802154_PANID_BROADCAST); priv->short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST); + + mac802154_llsec_init(&priv->sec); } static int mac802154_process_data(struct net_device *dev, struct sk_buff *skb) @@ -339,9 +418,11 @@ static int mac802154_process_data(struct net_device *dev, struct sk_buff *skb) } static int -mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb) +mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb, + const struct ieee802154_hdr *hdr) { __le16 span, sshort; + int rc; pr_debug("getting packet via slave interface %s\n", sdata->dev->name); @@ -388,15 +469,22 @@ mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb) skb->dev = sdata->dev; + rc = mac802154_llsec_decrypt(&sdata->sec, skb); + if (rc) { + pr_debug("decryption failed: %i\n", rc); + kfree_skb(skb); + return NET_RX_DROP; + } + sdata->dev->stats.rx_packets++; sdata->dev->stats.rx_bytes += skb->len; - switch (mac_cb_type(skb)) { + switch (mac_cb(skb)->type) { case IEEE802154_FC_TYPE_DATA: return mac802154_process_data(sdata->dev, skb); default: pr_warn("ieee802154: bad frame received (type = %d)\n", - mac_cb_type(skb)); + mac_cb(skb)->type); kfree_skb(skb); return NET_RX_DROP; } @@ -419,62 +507,58 @@ static void mac802154_print_addr(const char *name, } } -static int mac802154_parse_frame_start(struct sk_buff *skb) +static int mac802154_parse_frame_start(struct sk_buff *skb, + struct ieee802154_hdr *hdr) { int hlen; - struct ieee802154_hdr hdr; + struct ieee802154_mac_cb *cb = mac_cb_init(skb); - hlen = ieee802154_hdr_pull(skb, &hdr); + hlen = ieee802154_hdr_pull(skb, hdr); if (hlen < 0) return -EINVAL; skb->mac_len = hlen; - pr_debug("fc: %04x dsn: %02x\n", le16_to_cpup((__le16 *)&hdr.fc), - hdr.seq); - - mac_cb(skb)->flags = hdr.fc.type; + pr_debug("fc: %04x dsn: %02x\n", le16_to_cpup((__le16 *)&hdr->fc), + hdr->seq); - if (hdr.fc.ack_request) - mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ; - if (hdr.fc.security_enabled) - mac_cb(skb)->flags |= MAC_CB_FLAG_SECEN; + cb->type = hdr->fc.type; + cb->ackreq = hdr->fc.ack_request; + cb->secen = hdr->fc.security_enabled; - mac802154_print_addr("destination", &hdr.dest); - mac802154_print_addr("source", &hdr.source); + mac802154_print_addr("destination", &hdr->dest); + mac802154_print_addr("source", &hdr->source); - mac_cb(skb)->source = hdr.source; - mac_cb(skb)->dest = hdr.dest; + cb->source = hdr->source; + cb->dest = hdr->dest; - if (hdr.fc.security_enabled) { + if (hdr->fc.security_enabled) { u64 key; - pr_debug("seclevel %i\n", hdr.sec.level); + pr_debug("seclevel %i\n", hdr->sec.level); - switch (hdr.sec.key_id_mode) { + switch (hdr->sec.key_id_mode) { case IEEE802154_SCF_KEY_IMPLICIT: pr_debug("implicit key\n"); break; case IEEE802154_SCF_KEY_INDEX: - pr_debug("key %02x\n", hdr.sec.key_id); + pr_debug("key %02x\n", hdr->sec.key_id); break; case IEEE802154_SCF_KEY_SHORT_INDEX: pr_debug("key %04x:%04x %02x\n", - le32_to_cpu(hdr.sec.short_src) >> 16, - le32_to_cpu(hdr.sec.short_src) & 0xffff, - hdr.sec.key_id); + le32_to_cpu(hdr->sec.short_src) >> 16, + le32_to_cpu(hdr->sec.short_src) & 0xffff, + hdr->sec.key_id); break; case IEEE802154_SCF_KEY_HW_INDEX: - key = swab64((__force u64) hdr.sec.extended_src); + key = swab64((__force u64) hdr->sec.extended_src); pr_debug("key source %8phC %02x\n", &key, - hdr.sec.key_id); + hdr->sec.key_id); break; } - - return -EINVAL; } return 0; @@ -483,10 +567,10 @@ static int mac802154_parse_frame_start(struct sk_buff *skb) void mac802154_wpans_rx(struct mac802154_priv *priv, struct sk_buff *skb) { int ret; - struct sk_buff *sskb; struct mac802154_sub_if_data *sdata; + struct ieee802154_hdr hdr; - ret = mac802154_parse_frame_start(skb); + ret = mac802154_parse_frame_start(skb, &hdr); if (ret) { pr_debug("got invalid frame\n"); return; @@ -494,12 +578,16 @@ void mac802154_wpans_rx(struct mac802154_priv *priv, struct sk_buff *skb) rcu_read_lock(); list_for_each_entry_rcu(sdata, &priv->slaves, list) { - if (sdata->type != IEEE802154_DEV_WPAN) + if (sdata->type != IEEE802154_DEV_WPAN || + !netif_running(sdata->dev)) continue; - sskb = skb_clone(skb, GFP_ATOMIC); - if (sskb) - mac802154_subif_frame(sdata, sskb); + mac802154_subif_frame(sdata, skb, &hdr); + skb = NULL; + break; } rcu_read_unlock(); + + if (skb) + kfree_skb(skb); } diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index 851cd880b0c0..6b38d083e1c9 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -33,6 +33,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | SKB_GSO_MPLS))) goto out; diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index e9410d17619d..ad751fe2e82b 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -46,6 +46,9 @@ config NF_CONNTRACK To compile it as a module, choose M here. If unsure, say N. +config NF_LOG_COMMON + tristate + if NF_CONNTRACK config NF_CONNTRACK_MARK @@ -744,6 +747,7 @@ config NETFILTER_XT_TARGET_LED config NETFILTER_XT_TARGET_LOG tristate "LOG target support" + depends on NF_LOG_IPV4 && NF_LOG_IPV6 default m if NETFILTER_ADVANCED=n help This option adds a `LOG' target, which allows you to create rules in diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index bffdad774da7..8308624a406a 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -47,6 +47,9 @@ obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \ nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o +# generic transport layer logging +obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o + obj-$(CONFIG_NF_NAT) += nf_nat.o # NAT protocols (nf_nat) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 117208321f16..ec8114fae50b 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -271,10 +271,7 @@ ip_set_free(void *members) { pr_debug("%p: free with %s\n", members, is_vmalloc_addr(members) ? "vfree" : "kfree"); - if (is_vmalloc_addr(members)) - vfree(members); - else - kfree(members); + kvfree(members); } EXPORT_SYMBOL_GPL(ip_set_free); diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index a8eb0a89326a..610e19c0e13f 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -797,7 +797,6 @@ static void ip_vs_conn_expire(unsigned long data) ip_vs_control_del(cp); if (cp->flags & IP_VS_CONN_F_NFCT) { - ip_vs_conn_drop_conntrack(cp); /* Do not access conntracks during subsys cleanup * because nf_conntrack_find_get can not be used after * conntrack cleanup for the net. diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 4f26ee46b51f..e6836755c45d 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -97,7 +97,7 @@ const char *ip_vs_proto_name(unsigned int proto) return "ICMPv6"; #endif default: - sprintf(buf, "IP_%d", proto); + sprintf(buf, "IP_%u", proto); return buf; } } @@ -1392,15 +1392,19 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) if (ipip) { __be32 info = ic->un.gateway; + __u8 type = ic->type; + __u8 code = ic->code; /* Update the MTU */ if (ic->type == ICMP_DEST_UNREACH && ic->code == ICMP_FRAG_NEEDED) { struct ip_vs_dest *dest = cp->dest; u32 mtu = ntohs(ic->un.frag.mtu); + __be16 frag_off = cih->frag_off; /* Strip outer IP and ICMP, go to IPIP header */ - __skb_pull(skb, ihl + sizeof(_icmph)); + if (pskb_pull(skb, ihl + sizeof(_icmph)) == NULL) + goto ignore_ipip; offset2 -= ihl + sizeof(_icmph); skb_reset_network_header(skb); IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n", @@ -1408,7 +1412,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) ipv4_update_pmtu(skb, dev_net(skb->dev), mtu, 0, 0, 0, 0); /* Client uses PMTUD? */ - if (!(cih->frag_off & htons(IP_DF))) + if (!(frag_off & htons(IP_DF))) goto ignore_ipip; /* Prefer the resulting PMTU */ if (dest) { @@ -1427,12 +1431,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) /* Strip outer IP, ICMP and IPIP, go to IP header of * original request. */ - __skb_pull(skb, offset2); + if (pskb_pull(skb, offset2) == NULL) + goto ignore_ipip; skb_reset_network_header(skb); IP_VS_DBG(12, "Sending ICMP for %pI4->%pI4: t=%u, c=%u, i=%u\n", &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, - ic->type, ic->code, ntohl(info)); - icmp_send(skb, ic->type, ic->code, info); + type, code, ntohl(info)); + icmp_send(skb, type, code, info); /* ICMP can be shorter but anyways, account it */ ip_vs_out_stats(cp, skb); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c42e83d2751c..8416307fdd1d 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1807,92 +1807,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = proc_dointvec, }, #endif -#if 0 - { - .procname = "timeout_established", - .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED], - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "timeout_synsent", - .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT], - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "timeout_synrecv", - .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV], - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "timeout_finwait", - .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT], - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "timeout_timewait", - .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT], - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "timeout_close", - .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE], - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "timeout_closewait", - .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT], - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "timeout_lastack", - .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK], - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "timeout_listen", - .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN], - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "timeout_synack", - .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK], - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "timeout_udp", - .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP], - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "timeout_icmp", - .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP], - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, -#endif { } }; @@ -3778,6 +3692,7 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) cancel_delayed_work_sync(&ipvs->defense_work); cancel_work_sync(&ipvs->defense_work.work); unregister_net_sysctl_table(ipvs->sysctl_hdr); + ip_vs_stop_estimator(net, &ipvs->tot_stats); } #else @@ -3840,7 +3755,6 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net) struct netns_ipvs *ipvs = net_ipvs(net); ip_vs_trash_cleanup(net); - ip_vs_stop_estimator(net, &ipvs->tot_stats); ip_vs_control_net_cleanup_sysctl(net); remove_proc_entry("ip_vs_stats_percpu", net->proc_net); remove_proc_entry("ip_vs_stats", net->proc_net); diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index db801263ee9f..eadffb29dec0 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -886,8 +886,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark); rcu_read_unlock(); if (!cp) { - if (param->pe_data) - kfree(param->pe_data); + kfree(param->pe_data); IP_VS_DBG(2, "BACKUP, add new conn. failed\n"); return; } diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index c47444e4cf8c..6f70bdd3a90a 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -562,7 +562,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_send_check(iph); /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); rcu_read_unlock(); @@ -590,7 +590,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error; /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); rcu_read_unlock(); @@ -684,7 +684,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, MTU problem. */ /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); rcu_read_unlock(); @@ -774,7 +774,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, MTU problem. */ /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); rcu_read_unlock(); @@ -883,10 +883,10 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, iph->daddr = cp->daddr.ip; iph->saddr = saddr; iph->ttl = old_iph->ttl; - ip_select_ident(skb, &rt->dst, NULL); + ip_select_ident(skb, NULL); /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; ret = ip_vs_tunnel_xmit_prepare(skb, cp); if (ret == NF_ACCEPT) @@ -967,14 +967,14 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, iph->nexthdr = IPPROTO_IPV6; iph->payload_len = old_iph->payload_len; be16_add_cpu(&iph->payload_len, sizeof(*old_iph)); - iph->priority = old_iph->priority; memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl)); + ipv6_change_dsfield(iph, 0, ipv6_get_dsfield(old_iph)); iph->daddr = cp->daddr.in6; iph->saddr = saddr; iph->hop_limit = old_iph->hop_limit; /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; ret = ip_vs_tunnel_xmit_prepare(skb, cp); if (ret == NF_ACCEPT) @@ -1023,7 +1023,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_send_check(ip_hdr(skb)); /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); rcu_read_unlock(); @@ -1060,7 +1060,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, } /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); rcu_read_unlock(); @@ -1157,7 +1157,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_vs_nat_icmp(skb, pp, cp, 0); /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); rcu_read_unlock(); @@ -1249,7 +1249,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, ip_vs_nat_icmp_v6(skb, pp, cp, 0); /* Another hack: avoid icmp_send in ip_fragment */ - skb->local_df = 1; + skb->ignore_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); rcu_read_unlock(); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 75421f2ba8be..de88c4ab5146 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -352,40 +352,6 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct) local_bh_enable(); } -static void death_by_event(unsigned long ul_conntrack) -{ - struct nf_conn *ct = (void *)ul_conntrack; - struct net *net = nf_ct_net(ct); - struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct); - - BUG_ON(ecache == NULL); - - if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) { - /* bad luck, let's retry again */ - ecache->timeout.expires = jiffies + - (prandom_u32() % net->ct.sysctl_events_retry_timeout); - add_timer(&ecache->timeout); - return; - } - /* we've got the event delivered, now it's dying */ - set_bit(IPS_DYING_BIT, &ct->status); - nf_ct_put(ct); -} - -static void nf_ct_dying_timeout(struct nf_conn *ct) -{ - struct net *net = nf_ct_net(ct); - struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct); - - BUG_ON(ecache == NULL); - - /* set a new timer to retry event delivery */ - setup_timer(&ecache->timeout, death_by_event, (unsigned long)ct); - ecache->timeout.expires = jiffies + - (prandom_u32() % net->ct.sysctl_events_retry_timeout); - add_timer(&ecache->timeout); -} - bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) { struct nf_conn_tstamp *tstamp; @@ -394,15 +360,20 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) if (tstamp && tstamp->stop == 0) tstamp->stop = ktime_to_ns(ktime_get_real()); - if (!nf_ct_is_dying(ct) && - unlikely(nf_conntrack_event_report(IPCT_DESTROY, ct, - portid, report) < 0)) { + if (nf_ct_is_dying(ct)) + goto delete; + + if (nf_conntrack_event_report(IPCT_DESTROY, ct, + portid, report) < 0) { /* destroy event was not delivered */ nf_ct_delete_from_lists(ct); - nf_ct_dying_timeout(ct); + nf_conntrack_ecache_delayed_work(nf_ct_net(ct)); return false; } + + nf_conntrack_ecache_work(nf_ct_net(ct)); set_bit(IPS_DYING_BIT, &ct->status); + delete: nf_ct_delete_from_lists(ct); nf_ct_put(ct); return true; @@ -914,7 +885,7 @@ void nf_conntrack_free(struct nf_conn *ct) nf_ct_ext_destroy(ct); nf_ct_ext_free(ct); kmem_cache_free(net->ct.nf_conntrack_cachep, ct); - smp_mb__before_atomic_dec(); + smp_mb__before_atomic(); atomic_dec(&net->ct.count); } EXPORT_SYMBOL_GPL(nf_conntrack_free); @@ -1464,26 +1435,6 @@ void nf_conntrack_flush_report(struct net *net, u32 portid, int report) } EXPORT_SYMBOL_GPL(nf_conntrack_flush_report); -static void nf_ct_release_dying_list(struct net *net) -{ - struct nf_conntrack_tuple_hash *h; - struct nf_conn *ct; - struct hlist_nulls_node *n; - int cpu; - - for_each_possible_cpu(cpu) { - struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); - - spin_lock_bh(&pcpu->lock); - hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) { - ct = nf_ct_tuplehash_to_ctrack(h); - /* never fails to remove them, no listeners at this point */ - nf_ct_kill(ct); - } - spin_unlock_bh(&pcpu->lock); - } -} - static int untrack_refs(void) { int cnt = 0, cpu; @@ -1548,7 +1499,6 @@ i_see_dead_people: busy = 0; list_for_each_entry(net, net_exit_list, exit_list) { nf_ct_iterate_cleanup(net, kill_all, NULL, 0, 0); - nf_ct_release_dying_list(net); if (atomic_read(&net->ct.count) != 0) busy = 1; } diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 1df176146567..4e78c57b818f 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -29,6 +29,90 @@ static DEFINE_MUTEX(nf_ct_ecache_mutex); +#define ECACHE_RETRY_WAIT (HZ/10) + +enum retry_state { + STATE_CONGESTED, + STATE_RESTART, + STATE_DONE, +}; + +static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu) +{ + struct nf_conn *refs[16]; + struct nf_conntrack_tuple_hash *h; + struct hlist_nulls_node *n; + unsigned int evicted = 0; + enum retry_state ret = STATE_DONE; + + spin_lock(&pcpu->lock); + + hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) { + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); + + if (nf_ct_is_dying(ct)) + continue; + + if (nf_conntrack_event(IPCT_DESTROY, ct)) { + ret = STATE_CONGESTED; + break; + } + + /* we've got the event delivered, now it's dying */ + set_bit(IPS_DYING_BIT, &ct->status); + refs[evicted] = ct; + + if (++evicted >= ARRAY_SIZE(refs)) { + ret = STATE_RESTART; + break; + } + } + + spin_unlock(&pcpu->lock); + + /* can't _put while holding lock */ + while (evicted) + nf_ct_put(refs[--evicted]); + + return ret; +} + +static void ecache_work(struct work_struct *work) +{ + struct netns_ct *ctnet = + container_of(work, struct netns_ct, ecache_dwork.work); + int cpu, delay = -1; + struct ct_pcpu *pcpu; + + local_bh_disable(); + + for_each_possible_cpu(cpu) { + enum retry_state ret; + + pcpu = per_cpu_ptr(ctnet->pcpu_lists, cpu); + + ret = ecache_work_evict_list(pcpu); + + switch (ret) { + case STATE_CONGESTED: + delay = ECACHE_RETRY_WAIT; + goto out; + case STATE_RESTART: + delay = 0; + break; + case STATE_DONE: + break; + } + } + + out: + local_bh_enable(); + + ctnet->ecache_dwork_pending = delay > 0; + if (delay >= 0) + schedule_delayed_work(&ctnet->ecache_dwork, delay); +} + /* deliver cached events and clear cache entry - must be called with locally * disabled softirqs */ void nf_ct_deliver_cached_events(struct nf_conn *ct) @@ -157,7 +241,6 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); #define NF_CT_EVENTS_DEFAULT 1 static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT; -static int nf_ct_events_retry_timeout __read_mostly = 15*HZ; #ifdef CONFIG_SYSCTL static struct ctl_table event_sysctl_table[] = { @@ -168,13 +251,6 @@ static struct ctl_table event_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { - .procname = "nf_conntrack_events_retry_timeout", - .data = &init_net.ct.sysctl_events_retry_timeout, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, {} }; #endif /* CONFIG_SYSCTL */ @@ -196,7 +272,6 @@ static int nf_conntrack_event_init_sysctl(struct net *net) goto out; table[0].data = &net->ct.sysctl_events; - table[1].data = &net->ct.sysctl_events_retry_timeout; /* Don't export sysctls to unprivileged users */ if (net->user_ns != &init_user_ns) @@ -238,12 +313,13 @@ static void nf_conntrack_event_fini_sysctl(struct net *net) int nf_conntrack_ecache_pernet_init(struct net *net) { net->ct.sysctl_events = nf_ct_events; - net->ct.sysctl_events_retry_timeout = nf_ct_events_retry_timeout; + INIT_DELAYED_WORK(&net->ct.ecache_dwork, ecache_work); return nf_conntrack_event_init_sysctl(net); } void nf_conntrack_ecache_pernet_fini(struct net *net) { + cancel_delayed_work_sync(&net->ct.ecache_dwork); nf_conntrack_event_fini_sysctl(net); } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index ccc46fa5edbc..355a5c4ef763 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -597,6 +597,9 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct) #ifdef CONFIG_NF_CONNTRACK_MARK + nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */ #endif +#ifdef CONFIG_NF_CONNTRACK_ZONES + + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE */ +#endif + ctnetlink_proto_size(ct) + ctnetlink_label_size(ct) ; @@ -742,8 +745,7 @@ static int ctnetlink_done(struct netlink_callback *cb) { if (cb->args[1]) nf_ct_put((struct nf_conn *)cb->args[1]); - if (cb->data) - kfree(cb->data); + kfree(cb->data); return 0; } @@ -1150,7 +1152,7 @@ static int ctnetlink_done_list(struct netlink_callback *cb) static int ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying) { - struct nf_conn *ct, *last = NULL; + struct nf_conn *ct, *last; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); @@ -1163,8 +1165,7 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying if (cb->args[2]) return 0; - if (cb->args[0] == nr_cpu_ids) - return 0; + last = (struct nf_conn *)cb->args[1]; for (cpu = cb->args[0]; cpu < nr_cpu_ids; cpu++) { struct ct_pcpu *pcpu; @@ -1174,7 +1175,6 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); spin_lock_bh(&pcpu->lock); - last = (struct nf_conn *)cb->args[1]; list = dying ? &pcpu->dying : &pcpu->unconfirmed; restart: hlist_nulls_for_each_entry(h, n, list, hnnode) { @@ -1193,7 +1193,9 @@ restart: ct); rcu_read_unlock(); if (res < 0) { - nf_conntrack_get(&ct->ct_general); + if (!atomic_inc_not_zero(&ct->ct_general.use)) + continue; + cb->args[0] = cpu; cb->args[1] = (unsigned long)ct; spin_unlock_bh(&pcpu->lock); goto out; @@ -1202,10 +1204,10 @@ restart: if (cb->args[1]) { cb->args[1] = 0; goto restart; - } else - cb->args[2] = 1; + } spin_unlock_bh(&pcpu->lock); } + cb->args[2] = 1; out: if (last) nf_ct_put(last); @@ -1336,6 +1338,9 @@ ctnetlink_setup_nat(struct nf_conn *ct, const struct nlattr * const cda[]) #ifdef CONFIG_NF_NAT_NEEDED int ret; + if (!cda[CTA_NAT_DST] && !cda[CTA_NAT_SRC]) + return 0; + ret = ctnetlink_parse_nat_setup(ct, NF_NAT_MANIP_DST, cda[CTA_NAT_DST]); if (ret < 0) @@ -2037,6 +2042,9 @@ ctnetlink_nfqueue_build_size(const struct nf_conn *ct) #ifdef CONFIG_NF_CONNTRACK_MARK + nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */ #endif +#ifdef CONFIG_NF_CONNTRACK_ZONES + + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE */ +#endif + ctnetlink_proto_size(ct) ; } diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 85296d4eac0e..daad6022c689 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -16,16 +16,22 @@ #define NF_LOG_PREFIXLEN 128 #define NFLOGGER_NAME_LEN 64 -static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly; +static struct nf_logger __rcu *loggers[NFPROTO_NUMPROTO][NF_LOG_TYPE_MAX] __read_mostly; static DEFINE_MUTEX(nf_log_mutex); static struct nf_logger *__find_logger(int pf, const char *str_logger) { - struct nf_logger *t; + struct nf_logger *log; + int i; + + for (i = 0; i < NF_LOG_TYPE_MAX; i++) { + if (loggers[pf][i] == NULL) + continue; - list_for_each_entry(t, &nf_loggers_l[pf], list[pf]) { - if (!strnicmp(str_logger, t->name, strlen(t->name))) - return t; + log = rcu_dereference_protected(loggers[pf][i], + lockdep_is_held(&nf_log_mutex)); + if (!strnicmp(str_logger, log->name, strlen(log->name))) + return log; } return NULL; @@ -73,17 +79,14 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger) if (pf >= ARRAY_SIZE(init_net.nf.nf_loggers)) return -EINVAL; - for (i = 0; i < ARRAY_SIZE(logger->list); i++) - INIT_LIST_HEAD(&logger->list[i]); - mutex_lock(&nf_log_mutex); if (pf == NFPROTO_UNSPEC) { for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) - list_add_tail(&(logger->list[i]), &(nf_loggers_l[i])); + rcu_assign_pointer(loggers[i][logger->type], logger); } else { /* register at end of list to honor first register win */ - list_add_tail(&logger->list[pf], &nf_loggers_l[pf]); + rcu_assign_pointer(loggers[pf][logger->type], logger); } mutex_unlock(&nf_log_mutex); @@ -98,7 +101,7 @@ void nf_log_unregister(struct nf_logger *logger) mutex_lock(&nf_log_mutex); for (i = 0; i < NFPROTO_NUMPROTO; i++) - list_del(&logger->list[i]); + RCU_INIT_POINTER(loggers[i][logger->type], NULL); mutex_unlock(&nf_log_mutex); } EXPORT_SYMBOL(nf_log_unregister); @@ -129,6 +132,48 @@ void nf_log_unbind_pf(struct net *net, u_int8_t pf) } EXPORT_SYMBOL(nf_log_unbind_pf); +void nf_logger_request_module(int pf, enum nf_log_type type) +{ + if (loggers[pf][type] == NULL) + request_module("nf-logger-%u-%u", pf, type); +} +EXPORT_SYMBOL_GPL(nf_logger_request_module); + +int nf_logger_find_get(int pf, enum nf_log_type type) +{ + struct nf_logger *logger; + int ret = -ENOENT; + + logger = loggers[pf][type]; + if (logger == NULL) + request_module("nf-logger-%u-%u", pf, type); + + rcu_read_lock(); + logger = rcu_dereference(loggers[pf][type]); + if (logger == NULL) + goto out; + + if (logger && try_module_get(logger->me)) + ret = 0; +out: + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(nf_logger_find_get); + +void nf_logger_put(int pf, enum nf_log_type type) +{ + struct nf_logger *logger; + + BUG_ON(loggers[pf][type] == NULL); + + rcu_read_lock(); + logger = rcu_dereference(loggers[pf][type]); + module_put(logger->me); + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(nf_logger_put); + void nf_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, @@ -143,7 +188,11 @@ void nf_log_packet(struct net *net, const struct nf_logger *logger; rcu_read_lock(); - logger = rcu_dereference(net->nf.nf_loggers[pf]); + if (loginfo != NULL) + logger = rcu_dereference(loggers[pf][loginfo->type]); + else + logger = rcu_dereference(net->nf.nf_loggers[pf]); + if (logger) { va_start(args, fmt); vsnprintf(prefix, sizeof(prefix), fmt, args); @@ -154,6 +203,63 @@ void nf_log_packet(struct net *net, } EXPORT_SYMBOL(nf_log_packet); +#define S_SIZE (1024 - (sizeof(unsigned int) + 1)) + +struct nf_log_buf { + unsigned int count; + char buf[S_SIZE + 1]; +}; +static struct nf_log_buf emergency, *emergency_ptr = &emergency; + +__printf(2, 3) int nf_log_buf_add(struct nf_log_buf *m, const char *f, ...) +{ + va_list args; + int len; + + if (likely(m->count < S_SIZE)) { + va_start(args, f); + len = vsnprintf(m->buf + m->count, S_SIZE - m->count, f, args); + va_end(args); + if (likely(m->count + len < S_SIZE)) { + m->count += len; + return 0; + } + } + m->count = S_SIZE; + printk_once(KERN_ERR KBUILD_MODNAME " please increase S_SIZE\n"); + return -1; +} +EXPORT_SYMBOL_GPL(nf_log_buf_add); + +struct nf_log_buf *nf_log_buf_open(void) +{ + struct nf_log_buf *m = kmalloc(sizeof(*m), GFP_ATOMIC); + + if (unlikely(!m)) { + local_bh_disable(); + do { + m = xchg(&emergency_ptr, NULL); + } while (!m); + } + m->count = 0; + return m; +} +EXPORT_SYMBOL_GPL(nf_log_buf_open); + +void nf_log_buf_close(struct nf_log_buf *m) +{ + m->buf[m->count] = 0; + printk("%s\n", m->buf); + + if (likely(m != &emergency)) + kfree(m); + else { + emergency_ptr = m; + local_bh_enable(); + } +} +EXPORT_SYMBOL_GPL(nf_log_buf_close); + #ifdef CONFIG_PROC_FS static void *seq_start(struct seq_file *seq, loff_t *pos) { @@ -188,8 +294,7 @@ static int seq_show(struct seq_file *s, void *v) { loff_t *pos = v; const struct nf_logger *logger; - struct nf_logger *t; - int ret; + int i, ret; struct net *net = seq_file_net(s); logger = rcu_dereference_protected(net->nf.nf_loggers[*pos], @@ -203,11 +308,16 @@ static int seq_show(struct seq_file *s, void *v) if (ret < 0) return ret; - list_for_each_entry(t, &nf_loggers_l[*pos], list[*pos]) { - ret = seq_printf(s, "%s", t->name); + for (i = 0; i < NF_LOG_TYPE_MAX; i++) { + if (loggers[*pos][i] == NULL) + continue; + + logger = rcu_dereference_protected(loggers[*pos][i], + lockdep_is_held(&nf_log_mutex)); + ret = seq_printf(s, "%s", logger->name); if (ret < 0) return ret; - if (&t->list[*pos] != nf_loggers_l[*pos].prev) { + if (i == 0 && loggers[*pos][i + 1] != NULL) { ret = seq_printf(s, ","); if (ret < 0) return ret; @@ -389,14 +499,5 @@ static struct pernet_operations nf_log_net_ops = { int __init netfilter_log_init(void) { - int i, ret; - - ret = register_pernet_subsys(&nf_log_net_ops); - if (ret < 0) - return ret; - - for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) - INIT_LIST_HEAD(&(nf_loggers_l[i])); - - return 0; + return register_pernet_subsys(&nf_log_net_ops); } diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c new file mode 100644 index 000000000000..eeb8ef4ff1a3 --- /dev/null +++ b/net/netfilter/nf_log_common.c @@ -0,0 +1,187 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/skbuff.h> +#include <linux/if_arp.h> +#include <linux/ip.h> +#include <net/icmp.h> +#include <net/udp.h> +#include <net/tcp.h> +#include <net/route.h> + +#include <linux/netfilter.h> +#include <linux/netfilter/xt_LOG.h> +#include <net/netfilter/nf_log.h> + +int nf_log_dump_udp_header(struct nf_log_buf *m, const struct sk_buff *skb, + u8 proto, int fragment, unsigned int offset) +{ + struct udphdr _udph; + const struct udphdr *uh; + + if (proto == IPPROTO_UDP) + /* Max length: 10 "PROTO=UDP " */ + nf_log_buf_add(m, "PROTO=UDP "); + else /* Max length: 14 "PROTO=UDPLITE " */ + nf_log_buf_add(m, "PROTO=UDPLITE "); + + if (fragment) + goto out; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph); + if (uh == NULL) { + nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset); + + return 1; + } + + /* Max length: 20 "SPT=65535 DPT=65535 " */ + nf_log_buf_add(m, "SPT=%u DPT=%u LEN=%u ", + ntohs(uh->source), ntohs(uh->dest), ntohs(uh->len)); + +out: + return 0; +} +EXPORT_SYMBOL_GPL(nf_log_dump_udp_header); + +int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb, + u8 proto, int fragment, unsigned int offset, + unsigned int logflags) +{ + struct tcphdr _tcph; + const struct tcphdr *th; + + /* Max length: 10 "PROTO=TCP " */ + nf_log_buf_add(m, "PROTO=TCP "); + + if (fragment) + return 0; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph); + if (th == NULL) { + nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset); + return 1; + } + + /* Max length: 20 "SPT=65535 DPT=65535 " */ + nf_log_buf_add(m, "SPT=%u DPT=%u ", + ntohs(th->source), ntohs(th->dest)); + /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ + if (logflags & XT_LOG_TCPSEQ) { + nf_log_buf_add(m, "SEQ=%u ACK=%u ", + ntohl(th->seq), ntohl(th->ack_seq)); + } + + /* Max length: 13 "WINDOW=65535 " */ + nf_log_buf_add(m, "WINDOW=%u ", ntohs(th->window)); + /* Max length: 9 "RES=0x3C " */ + nf_log_buf_add(m, "RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) & + TCP_RESERVED_BITS) >> 22)); + /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */ + if (th->cwr) + nf_log_buf_add(m, "CWR "); + if (th->ece) + nf_log_buf_add(m, "ECE "); + if (th->urg) + nf_log_buf_add(m, "URG "); + if (th->ack) + nf_log_buf_add(m, "ACK "); + if (th->psh) + nf_log_buf_add(m, "PSH "); + if (th->rst) + nf_log_buf_add(m, "RST "); + if (th->syn) + nf_log_buf_add(m, "SYN "); + if (th->fin) + nf_log_buf_add(m, "FIN "); + /* Max length: 11 "URGP=65535 " */ + nf_log_buf_add(m, "URGP=%u ", ntohs(th->urg_ptr)); + + if ((logflags & XT_LOG_TCPOPT) && th->doff*4 > sizeof(struct tcphdr)) { + u_int8_t _opt[60 - sizeof(struct tcphdr)]; + const u_int8_t *op; + unsigned int i; + unsigned int optsize = th->doff*4 - sizeof(struct tcphdr); + + op = skb_header_pointer(skb, offset + sizeof(struct tcphdr), + optsize, _opt); + if (op == NULL) { + nf_log_buf_add(m, "OPT (TRUNCATED)"); + return 1; + } + + /* Max length: 127 "OPT (" 15*4*2chars ") " */ + nf_log_buf_add(m, "OPT ("); + for (i = 0; i < optsize; i++) + nf_log_buf_add(m, "%02X", op[i]); + + nf_log_buf_add(m, ") "); + } + + return 0; +} +EXPORT_SYMBOL_GPL(nf_log_dump_tcp_header); + +void nf_log_dump_sk_uid_gid(struct nf_log_buf *m, struct sock *sk) +{ + if (!sk || sk->sk_state == TCP_TIME_WAIT) + return; + + read_lock_bh(&sk->sk_callback_lock); + if (sk->sk_socket && sk->sk_socket->file) { + const struct cred *cred = sk->sk_socket->file->f_cred; + nf_log_buf_add(m, "UID=%u GID=%u ", + from_kuid_munged(&init_user_ns, cred->fsuid), + from_kgid_munged(&init_user_ns, cred->fsgid)); + } + read_unlock_bh(&sk->sk_callback_lock); +} +EXPORT_SYMBOL_GPL(nf_log_dump_sk_uid_gid); + +void +nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf, + unsigned int hooknum, const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, const char *prefix) +{ + nf_log_buf_add(m, KERN_SOH "%c%sIN=%s OUT=%s ", + '0' + loginfo->u.log.level, prefix, + in ? in->name : "", + out ? out->name : ""); +#ifdef CONFIG_BRIDGE_NETFILTER + if (skb->nf_bridge) { + const struct net_device *physindev; + const struct net_device *physoutdev; + + physindev = skb->nf_bridge->physindev; + if (physindev && in != physindev) + nf_log_buf_add(m, "PHYSIN=%s ", physindev->name); + physoutdev = skb->nf_bridge->physoutdev; + if (physoutdev && out != physoutdev) + nf_log_buf_add(m, "PHYSOUT=%s ", physoutdev->name); + } +#endif +} +EXPORT_SYMBOL_GPL(nf_log_dump_packet_common); + +static int __init nf_log_common_init(void) +{ + return 0; +} + +static void __exit nf_log_common_exit(void) {} + +module_init(nf_log_common_init); +module_exit(nf_log_common_exit); + +MODULE_LICENSE("GPL"); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 52ca952b802c..552f97cd9fde 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -358,6 +358,19 @@ out: rcu_read_unlock(); } +struct nf_conn_nat *nf_ct_nat_ext_add(struct nf_conn *ct) +{ + struct nf_conn_nat *nat = nfct_nat(ct); + if (nat) + return nat; + + if (!nf_ct_is_confirmed(ct)) + nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); + + return nat; +} +EXPORT_SYMBOL_GPL(nf_ct_nat_ext_add); + unsigned int nf_nat_setup_info(struct nf_conn *ct, const struct nf_nat_range *range, @@ -368,14 +381,9 @@ nf_nat_setup_info(struct nf_conn *ct, struct nf_conn_nat *nat; /* nat helper or nfctnetlink also setup binding */ - nat = nfct_nat(ct); - if (!nat) { - nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); - if (nat == NULL) { - pr_debug("failed to add NAT extension\n"); - return NF_ACCEPT; - } - } + nat = nf_ct_nat_ext_add(ct); + if (nat == NULL) + return NF_ACCEPT; NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC || maniptype == NF_NAT_MANIP_DST); @@ -517,6 +525,39 @@ static int nf_nat_proto_remove(struct nf_conn *i, void *data) return i->status & IPS_NAT_MASK ? 1 : 0; } +static int nf_nat_proto_clean(struct nf_conn *ct, void *data) +{ + struct nf_conn_nat *nat = nfct_nat(ct); + + if (nf_nat_proto_remove(ct, data)) + return 1; + + if (!nat || !nat->ct) + return 0; + + /* This netns is being destroyed, and conntrack has nat null binding. + * Remove it from bysource hash, as the table will be freed soon. + * + * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack() + * will delete entry from already-freed table. + */ + if (!del_timer(&ct->timeout)) + return 1; + + spin_lock_bh(&nf_nat_lock); + hlist_del_rcu(&nat->bysource); + ct->status &= ~IPS_NAT_DONE_MASK; + nat->ct = NULL; + spin_unlock_bh(&nf_nat_lock); + + add_timer(&ct->timeout); + + /* don't delete conntrack. Although that would make things a lot + * simpler, we'd end up flushing all conntracks on nat rmmod. + */ + return 0; +} + static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto) { struct nf_nat_proto_clean clean = { @@ -669,7 +710,7 @@ static struct nf_ct_ext_type nat_extend __read_mostly = { .flags = NF_CT_EXT_F_PREALLOC, }; -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> @@ -787,7 +828,7 @@ static void __net_exit nf_nat_net_exit(struct net *net) { struct nf_nat_proto_clean clean = {}; - nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean, 0, 0); + nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean, 0, 0); synchronize_rcu(); nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size); } diff --git a/net/netfilter/nf_nat_proto_common.c b/net/netfilter/nf_nat_proto_common.c index 83a72a235cae..fbce552a796e 100644 --- a/net/netfilter/nf_nat_proto_common.c +++ b/net/netfilter/nf_nat_proto_common.c @@ -95,7 +95,7 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto, } EXPORT_SYMBOL_GPL(nf_nat_l4proto_unique_tuple); -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[], struct nf_nat_range *range) { diff --git a/net/netfilter/nf_nat_proto_dccp.c b/net/netfilter/nf_nat_proto_dccp.c index c8be2cdac0bf..b8067b53ff3a 100644 --- a/net/netfilter/nf_nat_proto_dccp.c +++ b/net/netfilter/nf_nat_proto_dccp.c @@ -78,7 +78,7 @@ static const struct nf_nat_l4proto nf_nat_l4proto_dccp = { .manip_pkt = dccp_manip_pkt, .in_range = nf_nat_l4proto_in_range, .unique_tuple = dccp_unique_tuple, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c index 754536f2c674..cbc7ade1487b 100644 --- a/net/netfilter/nf_nat_proto_sctp.c +++ b/net/netfilter/nf_nat_proto_sctp.c @@ -59,7 +59,7 @@ static const struct nf_nat_l4proto nf_nat_l4proto_sctp = { .manip_pkt = sctp_manip_pkt, .in_range = nf_nat_l4proto_in_range, .unique_tuple = sctp_unique_tuple, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; diff --git a/net/netfilter/nf_nat_proto_tcp.c b/net/netfilter/nf_nat_proto_tcp.c index 83ec8a6e4c36..37f5505f4529 100644 --- a/net/netfilter/nf_nat_proto_tcp.c +++ b/net/netfilter/nf_nat_proto_tcp.c @@ -79,7 +79,7 @@ const struct nf_nat_l4proto nf_nat_l4proto_tcp = { .manip_pkt = tcp_manip_pkt, .in_range = nf_nat_l4proto_in_range, .unique_tuple = tcp_unique_tuple, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; diff --git a/net/netfilter/nf_nat_proto_udp.c b/net/netfilter/nf_nat_proto_udp.c index 7df613fb34a2..b0ede2f0d8bc 100644 --- a/net/netfilter/nf_nat_proto_udp.c +++ b/net/netfilter/nf_nat_proto_udp.c @@ -70,7 +70,7 @@ const struct nf_nat_l4proto nf_nat_l4proto_udp = { .manip_pkt = udp_manip_pkt, .in_range = nf_nat_l4proto_in_range, .unique_tuple = udp_unique_tuple, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; diff --git a/net/netfilter/nf_nat_proto_udplite.c b/net/netfilter/nf_nat_proto_udplite.c index 776a0d1317b1..368f14e01e75 100644 --- a/net/netfilter/nf_nat_proto_udplite.c +++ b/net/netfilter/nf_nat_proto_udplite.c @@ -69,7 +69,7 @@ static const struct nf_nat_l4proto nf_nat_l4proto_udplite = { .manip_pkt = udplite_manip_pkt, .in_range = nf_nat_l4proto_in_range, .unique_tuple = udplite_unique_tuple, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3fd159db9f06..b8035c2d6667 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -35,7 +35,7 @@ int nft_register_afinfo(struct net *net, struct nft_af_info *afi) { INIT_LIST_HEAD(&afi->tables); nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_add_tail(&afi->list, &net->nft.af_info); + list_add_tail_rcu(&afi->list, &net->nft.af_info); nfnl_unlock(NFNL_SUBSYS_NFTABLES); return 0; } @@ -51,7 +51,7 @@ EXPORT_SYMBOL_GPL(nft_register_afinfo); void nft_unregister_afinfo(struct nft_af_info *afi) { nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_del(&afi->list); + list_del_rcu(&afi->list); nfnl_unlock(NFNL_SUBSYS_NFTABLES); } EXPORT_SYMBOL_GPL(nft_unregister_afinfo); @@ -88,6 +88,45 @@ nf_tables_afinfo_lookup(struct net *net, int family, bool autoload) return ERR_PTR(-EAFNOSUPPORT); } +static void nft_ctx_init(struct nft_ctx *ctx, + const struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nft_af_info *afi, + struct nft_table *table, + struct nft_chain *chain, + const struct nlattr * const *nla) +{ + ctx->net = sock_net(skb->sk); + ctx->afi = afi; + ctx->table = table; + ctx->chain = chain; + ctx->nla = nla; + ctx->portid = NETLINK_CB(skb).portid; + ctx->report = nlmsg_report(nlh); + ctx->seq = nlh->nlmsg_seq; +} + +static struct nft_trans *nft_trans_alloc(struct nft_ctx *ctx, int msg_type, + u32 size) +{ + struct nft_trans *trans; + + trans = kzalloc(sizeof(struct nft_trans) + size, GFP_KERNEL); + if (trans == NULL) + return NULL; + + trans->msg_type = msg_type; + trans->ctx = *ctx; + + return trans; +} + +static void nft_trans_destroy(struct nft_trans *trans) +{ + list_del(&trans->list); + kfree(trans); +} + /* * Tables */ @@ -197,20 +236,13 @@ nla_put_failure: return -1; } -static int nf_tables_table_notify(const struct sk_buff *oskb, - const struct nlmsghdr *nlh, - const struct nft_table *table, - int event, int family) +static int nf_tables_table_notify(const struct nft_ctx *ctx, int event) { struct sk_buff *skb; - u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; - u32 seq = nlh ? nlh->nlmsg_seq : 0; - struct net *net = oskb ? sock_net(oskb->sk) : &init_net; - bool report; int err; - report = nlh ? nlmsg_report(nlh) : false; - if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + if (!ctx->report && + !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) return 0; err = -ENOBUFS; @@ -218,18 +250,20 @@ static int nf_tables_table_notify(const struct sk_buff *oskb, if (skb == NULL) goto err; - err = nf_tables_fill_table_info(skb, portid, seq, event, 0, - family, table); + err = nf_tables_fill_table_info(skb, ctx->portid, ctx->seq, event, 0, + ctx->afi->family, ctx->table); if (err < 0) { kfree_skb(skb); goto err; } - err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, - GFP_KERNEL); + err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, + ctx->report, GFP_KERNEL); err: - if (err < 0) - nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + if (err < 0) { + nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, + err); + } return err; } @@ -243,11 +277,14 @@ static int nf_tables_dump_tables(struct sk_buff *skb, struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; - list_for_each_entry(afi, &net->nft.af_info, list) { + rcu_read_lock(); + cb->seq = net->nft.base_seq; + + list_for_each_entry_rcu(afi, &net->nft.af_info, list) { if (family != NFPROTO_UNSPEC && family != afi->family) continue; - list_for_each_entry(table, &afi->tables, list) { + list_for_each_entry_rcu(table, &afi->tables, list) { if (idx < s_idx) goto cont; if (idx > s_idx) @@ -260,15 +297,21 @@ static int nf_tables_dump_tables(struct sk_buff *skb, NLM_F_MULTI, afi->family, table) < 0) goto done; + + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: idx++; } } done: + rcu_read_unlock(); cb->args[0] = idx; return skb->len; } +/* Internal table flags */ +#define NFT_TABLE_INACTIVE (1 << 15) + static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -295,6 +338,8 @@ static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb, table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]); if (IS_ERR(table)) return PTR_ERR(table); + if (table->flags & NFT_TABLE_INACTIVE) + return -ENOENT; skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb2) @@ -343,7 +388,7 @@ err: return err; } -static int nf_tables_table_disable(const struct nft_af_info *afi, +static void nf_tables_table_disable(const struct nft_af_info *afi, struct nft_table *table) { struct nft_chain *chain; @@ -353,45 +398,66 @@ static int nf_tables_table_disable(const struct nft_af_info *afi, nf_unregister_hooks(nft_base_chain(chain)->ops, afi->nops); } - - return 0; } -static int nf_tables_updtable(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nla[], - struct nft_af_info *afi, struct nft_table *table) +static int nf_tables_updtable(struct nft_ctx *ctx) { - const struct nfgenmsg *nfmsg = nlmsg_data(nlh); - int family = nfmsg->nfgen_family, ret = 0; + struct nft_trans *trans; + u32 flags; + int ret = 0; - if (nla[NFTA_TABLE_FLAGS]) { - u32 flags; + if (!ctx->nla[NFTA_TABLE_FLAGS]) + return 0; - flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS])); - if (flags & ~NFT_TABLE_F_DORMANT) - return -EINVAL; + flags = ntohl(nla_get_be32(ctx->nla[NFTA_TABLE_FLAGS])); + if (flags & ~NFT_TABLE_F_DORMANT) + return -EINVAL; - if ((flags & NFT_TABLE_F_DORMANT) && - !(table->flags & NFT_TABLE_F_DORMANT)) { - ret = nf_tables_table_disable(afi, table); - if (ret >= 0) - table->flags |= NFT_TABLE_F_DORMANT; - } else if (!(flags & NFT_TABLE_F_DORMANT) && - table->flags & NFT_TABLE_F_DORMANT) { - ret = nf_tables_table_enable(afi, table); - if (ret >= 0) - table->flags &= ~NFT_TABLE_F_DORMANT; + if (flags == ctx->table->flags) + return 0; + + trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE, + sizeof(struct nft_trans_table)); + if (trans == NULL) + return -ENOMEM; + + if ((flags & NFT_TABLE_F_DORMANT) && + !(ctx->table->flags & NFT_TABLE_F_DORMANT)) { + nft_trans_table_enable(trans) = false; + } else if (!(flags & NFT_TABLE_F_DORMANT) && + ctx->table->flags & NFT_TABLE_F_DORMANT) { + ret = nf_tables_table_enable(ctx->afi, ctx->table); + if (ret >= 0) { + ctx->table->flags &= ~NFT_TABLE_F_DORMANT; + nft_trans_table_enable(trans) = true; } - if (ret < 0) - goto err; } + if (ret < 0) + goto err; - nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family); + nft_trans_table_update(trans) = true; + list_add_tail(&trans->list, &ctx->net->nft.commit_list); + return 0; err: + nft_trans_destroy(trans); return ret; } +static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) +{ + struct nft_trans *trans; + + trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_table)); + if (trans == NULL) + return -ENOMEM; + + if (msg_type == NFT_MSG_NEWTABLE) + ctx->table->flags |= NFT_TABLE_INACTIVE; + + list_add_tail(&trans->list, &ctx->net->nft.commit_list); + return 0; +} + static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -403,6 +469,8 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; u32 flags = 0; + struct nft_ctx ctx; + int err; afi = nf_tables_afinfo_lookup(net, family, true); if (IS_ERR(afi)) @@ -417,11 +485,15 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, } if (table != NULL) { + if (table->flags & NFT_TABLE_INACTIVE) + return -ENOENT; if (nlh->nlmsg_flags & NLM_F_EXCL) return -EEXIST; if (nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; - return nf_tables_updtable(nlsk, skb, nlh, nla, afi, table); + + nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla); + return nf_tables_updtable(&ctx); } if (nla[NFTA_TABLE_FLAGS]) { @@ -444,8 +516,14 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, INIT_LIST_HEAD(&table->sets); table->flags = flags; - list_add_tail(&table->list, &afi->tables); - nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family); + nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla); + err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE); + if (err < 0) { + kfree(table); + module_put(afi->owner); + return err; + } + list_add_tail_rcu(&table->list, &afi->tables); return 0; } @@ -457,7 +535,8 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb, struct nft_af_info *afi; struct nft_table *table; struct net *net = sock_net(skb->sk); - int family = nfmsg->nfgen_family; + int family = nfmsg->nfgen_family, err; + struct nft_ctx ctx; afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) @@ -466,17 +545,28 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb, table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]); if (IS_ERR(table)) return PTR_ERR(table); - - if (!list_empty(&table->chains) || !list_empty(&table->sets)) + if (table->flags & NFT_TABLE_INACTIVE) + return -ENOENT; + if (table->use > 0) return -EBUSY; - list_del(&table->list); - nf_tables_table_notify(skb, nlh, table, NFT_MSG_DELTABLE, family); - kfree(table); - module_put(afi->owner); + nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla); + err = nft_trans_table_add(&ctx, NFT_MSG_DELTABLE); + if (err < 0) + return err; + + list_del_rcu(&table->list); return 0; } +static void nf_tables_table_destroy(struct nft_ctx *ctx) +{ + BUG_ON(ctx->table->use > 0); + + kfree(ctx->table); + module_put(ctx->afi->owner); +} + int nft_register_chain_type(const struct nf_chain_type *ctype) { int err = 0; @@ -541,7 +631,7 @@ static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = { .len = NFT_CHAIN_MAXNAMELEN - 1 }, [NFTA_CHAIN_HOOK] = { .type = NLA_NESTED }, [NFTA_CHAIN_POLICY] = { .type = NLA_U32 }, - [NFTA_CHAIN_TYPE] = { .type = NLA_NUL_STRING }, + [NFTA_CHAIN_TYPE] = { .type = NLA_STRING }, [NFTA_CHAIN_COUNTERS] = { .type = NLA_NESTED }, }; @@ -554,13 +644,20 @@ static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats) { struct nft_stats *cpu_stats, total; struct nlattr *nest; + unsigned int seq; + u64 pkts, bytes; int cpu; memset(&total, 0, sizeof(total)); for_each_possible_cpu(cpu) { cpu_stats = per_cpu_ptr(stats, cpu); - total.pkts += cpu_stats->pkts; - total.bytes += cpu_stats->bytes; + do { + seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + pkts = cpu_stats->pkts; + bytes = cpu_stats->bytes; + } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq)); + total.pkts += pkts; + total.bytes += bytes; } nest = nla_nest_start(skb, NFTA_CHAIN_COUNTERS); if (nest == NULL) @@ -637,21 +734,13 @@ nla_put_failure: return -1; } -static int nf_tables_chain_notify(const struct sk_buff *oskb, - const struct nlmsghdr *nlh, - const struct nft_table *table, - const struct nft_chain *chain, - int event, int family) +static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event) { struct sk_buff *skb; - u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; - struct net *net = oskb ? sock_net(oskb->sk) : &init_net; - u32 seq = nlh ? nlh->nlmsg_seq : 0; - bool report; int err; - report = nlh ? nlmsg_report(nlh) : false; - if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + if (!ctx->report && + !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) return 0; err = -ENOBUFS; @@ -659,18 +748,21 @@ static int nf_tables_chain_notify(const struct sk_buff *oskb, if (skb == NULL) goto err; - err = nf_tables_fill_chain_info(skb, portid, seq, event, 0, family, - table, chain); + err = nf_tables_fill_chain_info(skb, ctx->portid, ctx->seq, event, 0, + ctx->afi->family, ctx->table, + ctx->chain); if (err < 0) { kfree_skb(skb); goto err; } - err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, - GFP_KERNEL); + err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, + ctx->report, GFP_KERNEL); err: - if (err < 0) - nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + if (err < 0) { + nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, + err); + } return err; } @@ -685,12 +777,15 @@ static int nf_tables_dump_chains(struct sk_buff *skb, struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; - list_for_each_entry(afi, &net->nft.af_info, list) { + rcu_read_lock(); + cb->seq = net->nft.base_seq; + + list_for_each_entry_rcu(afi, &net->nft.af_info, list) { if (family != NFPROTO_UNSPEC && family != afi->family) continue; - list_for_each_entry(table, &afi->tables, list) { - list_for_each_entry(chain, &table->chains, list) { + list_for_each_entry_rcu(table, &afi->tables, list) { + list_for_each_entry_rcu(chain, &table->chains, list) { if (idx < s_idx) goto cont; if (idx > s_idx) @@ -702,17 +797,19 @@ static int nf_tables_dump_chains(struct sk_buff *skb, NLM_F_MULTI, afi->family, table, chain) < 0) goto done; + + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: idx++; } } } done: + rcu_read_unlock(); cb->args[0] = idx; return skb->len; } - static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -740,10 +837,14 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb, table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); + if (table->flags & NFT_TABLE_INACTIVE) + return -ENOENT; chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]); if (IS_ERR(chain)) return PTR_ERR(chain); + if (chain->flags & NFT_CHAIN_INACTIVE) + return -ENOENT; skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb2) @@ -767,8 +868,7 @@ static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = { [NFTA_COUNTER_BYTES] = { .type = NLA_U64 }, }; -static int -nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr) +static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr) { struct nlattr *tb[NFTA_COUNTER_MAX+1]; struct nft_stats __percpu *newstats; @@ -777,14 +877,14 @@ nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr) err = nla_parse_nested(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy); if (err < 0) - return err; + return ERR_PTR(err); if (!tb[NFTA_COUNTER_BYTES] || !tb[NFTA_COUNTER_PACKETS]) - return -EINVAL; + return ERR_PTR(-EINVAL); - newstats = alloc_percpu(struct nft_stats); + newstats = netdev_alloc_pcpu_stats(struct nft_stats); if (newstats == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); /* Restore old counters on this cpu, no problem. Per-cpu statistics * are not exposed to userspace. @@ -793,6 +893,12 @@ nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr) stats->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES])); stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])); + return newstats; +} + +static void nft_chain_stats_replace(struct nft_base_chain *chain, + struct nft_stats __percpu *newstats) +{ if (chain->stats) { struct nft_stats __percpu *oldstats = nft_dereference(chain->stats); @@ -802,17 +908,43 @@ nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr) free_percpu(oldstats); } else rcu_assign_pointer(chain->stats, newstats); +} + +static int nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) +{ + struct nft_trans *trans; + + trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain)); + if (trans == NULL) + return -ENOMEM; + if (msg_type == NFT_MSG_NEWCHAIN) + ctx->chain->flags |= NFT_CHAIN_INACTIVE; + + list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; } +static void nf_tables_chain_destroy(struct nft_chain *chain) +{ + BUG_ON(chain->use > 0); + + if (chain->flags & NFT_BASE_CHAIN) { + module_put(nft_base_chain(chain)->type->owner); + free_percpu(nft_base_chain(chain)->stats); + kfree(nft_base_chain(chain)); + } else { + kfree(chain); + } +} + static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nlattr * uninitialized_var(name); - const struct nft_af_info *afi; + struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain; struct nft_base_chain *basechain = NULL; @@ -822,8 +954,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, u8 policy = NF_ACCEPT; u64 handle = 0; unsigned int i; + struct nft_stats __percpu *stats; int err; bool create; + struct nft_ctx ctx; create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; @@ -869,6 +1003,11 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, } if (chain != NULL) { + struct nft_stats *stats = NULL; + struct nft_trans *trans; + + if (chain->flags & NFT_CHAIN_INACTIVE) + return -ENOENT; if (nlh->nlmsg_flags & NLM_F_EXCL) return -EEXIST; if (nlh->nlmsg_flags & NLM_F_REPLACE) @@ -882,19 +1021,31 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, if (!(chain->flags & NFT_BASE_CHAIN)) return -EOPNOTSUPP; - err = nf_tables_counters(nft_base_chain(chain), - nla[NFTA_CHAIN_COUNTERS]); - if (err < 0) - return err; + stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]); + if (IS_ERR(stats)) + return PTR_ERR(stats); } - if (nla[NFTA_CHAIN_POLICY]) - nft_base_chain(chain)->policy = policy; + nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); + trans = nft_trans_alloc(&ctx, NFT_MSG_NEWCHAIN, + sizeof(struct nft_trans_chain)); + if (trans == NULL) + return -ENOMEM; + + nft_trans_chain_stats(trans) = stats; + nft_trans_chain_update(trans) = true; - if (nla[NFTA_CHAIN_HANDLE] && name) - nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); + if (nla[NFTA_CHAIN_POLICY]) + nft_trans_chain_policy(trans) = policy; + else + nft_trans_chain_policy(trans) = -1; - goto notify; + if (nla[NFTA_CHAIN_HANDLE] && name) { + nla_strlcpy(nft_trans_chain_name(trans), name, + NFT_CHAIN_MAXNAMELEN); + } + list_add_tail(&trans->list, &net->nft.commit_list); + return 0; } if (table->use == UINT_MAX) @@ -939,23 +1090,21 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, return -ENOMEM; if (nla[NFTA_CHAIN_COUNTERS]) { - err = nf_tables_counters(basechain, - nla[NFTA_CHAIN_COUNTERS]); - if (err < 0) { + stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]); + if (IS_ERR(stats)) { module_put(type->owner); kfree(basechain); - return err; + return PTR_ERR(stats); } + basechain->stats = stats; } else { - struct nft_stats __percpu *newstats; - - newstats = alloc_percpu(struct nft_stats); - if (newstats == NULL) { + stats = netdev_alloc_pcpu_stats(struct nft_stats); + if (IS_ERR(stats)) { module_put(type->owner); kfree(basechain); - return -ENOMEM; + return PTR_ERR(stats); } - rcu_assign_pointer(basechain->stats, newstats); + rcu_assign_pointer(basechain->stats, stats); } basechain->type = type; @@ -992,31 +1141,27 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, if (!(table->flags & NFT_TABLE_F_DORMANT) && chain->flags & NFT_BASE_CHAIN) { err = nf_register_hooks(nft_base_chain(chain)->ops, afi->nops); - if (err < 0) { - module_put(basechain->type->owner); - free_percpu(basechain->stats); - kfree(basechain); - return err; - } + if (err < 0) + goto err1; } - list_add_tail(&chain->list, &table->chains); - table->use++; -notify: - nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_NEWCHAIN, - family); - return 0; -} -static void nf_tables_chain_destroy(struct nft_chain *chain) -{ - BUG_ON(chain->use > 0); + nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); + err = nft_trans_chain_add(&ctx, NFT_MSG_NEWCHAIN); + if (err < 0) + goto err2; - if (chain->flags & NFT_BASE_CHAIN) { - module_put(nft_base_chain(chain)->type->owner); - free_percpu(nft_base_chain(chain)->stats); - kfree(nft_base_chain(chain)); - } else - kfree(chain); + table->use++; + list_add_tail_rcu(&chain->list, &table->chains); + return 0; +err2: + if (!(table->flags & NFT_TABLE_F_DORMANT) && + chain->flags & NFT_BASE_CHAIN) { + nf_unregister_hooks(nft_base_chain(chain)->ops, + afi->nops); + } +err1: + nf_tables_chain_destroy(chain); + return err; } static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, @@ -1024,11 +1169,13 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); - const struct nft_af_info *afi; + struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; + struct nft_ctx ctx; + int err; afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) @@ -1037,48 +1184,27 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); + if (table->flags & NFT_TABLE_INACTIVE) + return -ENOENT; chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]); if (IS_ERR(chain)) return PTR_ERR(chain); - - if (!list_empty(&chain->rules) || chain->use > 0) + if (chain->flags & NFT_CHAIN_INACTIVE) + return -ENOENT; + if (chain->use > 0) return -EBUSY; - list_del(&chain->list); - table->use--; - - if (!(table->flags & NFT_TABLE_F_DORMANT) && - chain->flags & NFT_BASE_CHAIN) - nf_unregister_hooks(nft_base_chain(chain)->ops, afi->nops); - - nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_DELCHAIN, - family); - - /* Make sure all rule references are gone before this is released */ - synchronize_rcu(); + nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); + err = nft_trans_chain_add(&ctx, NFT_MSG_DELCHAIN); + if (err < 0) + return err; - nf_tables_chain_destroy(chain); + table->use--; + list_del_rcu(&chain->list); return 0; } -static void nft_ctx_init(struct nft_ctx *ctx, - const struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nft_af_info *afi, - const struct nft_table *table, - const struct nft_chain *chain, - const struct nlattr * const *nla) -{ - ctx->net = sock_net(skb->sk); - ctx->skb = skb; - ctx->nlh = nlh; - ctx->afi = afi; - ctx->table = table; - ctx->chain = chain; - ctx->nla = nla; -} - /* * Expressions */ @@ -1093,7 +1219,10 @@ static void nft_ctx_init(struct nft_ctx *ctx, int nft_register_expr(struct nft_expr_type *type) { nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_add_tail(&type->list, &nf_tables_expressions); + if (type->family == NFPROTO_UNSPEC) + list_add_tail_rcu(&type->list, &nf_tables_expressions); + else + list_add_rcu(&type->list, &nf_tables_expressions); nfnl_unlock(NFNL_SUBSYS_NFTABLES); return 0; } @@ -1108,7 +1237,7 @@ EXPORT_SYMBOL_GPL(nft_register_expr); void nft_unregister_expr(struct nft_expr_type *type) { nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_del(&type->list); + list_del_rcu(&type->list); nfnl_unlock(NFNL_SUBSYS_NFTABLES); } EXPORT_SYMBOL_GPL(nft_unregister_expr); @@ -1361,22 +1490,15 @@ nla_put_failure: return -1; } -static int nf_tables_rule_notify(const struct sk_buff *oskb, - const struct nlmsghdr *nlh, - const struct nft_table *table, - const struct nft_chain *chain, +static int nf_tables_rule_notify(const struct nft_ctx *ctx, const struct nft_rule *rule, - int event, u32 flags, int family) + int event) { struct sk_buff *skb; - u32 portid = NETLINK_CB(oskb).portid; - struct net *net = oskb ? sock_net(oskb->sk) : &init_net; - u32 seq = nlh->nlmsg_seq; - bool report; int err; - report = nlmsg_report(nlh); - if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + if (!ctx->report && + !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) return 0; err = -ENOBUFS; @@ -1384,18 +1506,21 @@ static int nf_tables_rule_notify(const struct sk_buff *oskb, if (skb == NULL) goto err; - err = nf_tables_fill_rule_info(skb, portid, seq, event, flags, - family, table, chain, rule); + err = nf_tables_fill_rule_info(skb, ctx->portid, ctx->seq, event, 0, + ctx->afi->family, ctx->table, + ctx->chain, rule); if (err < 0) { kfree_skb(skb); goto err; } - err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, - GFP_KERNEL); + err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, + ctx->report, GFP_KERNEL); err: - if (err < 0) - nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + if (err < 0) { + nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, + err); + } return err; } @@ -1445,16 +1570,17 @@ static int nf_tables_dump_rules(struct sk_buff *skb, unsigned int idx = 0, s_idx = cb->args[0]; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; - u8 genctr = ACCESS_ONCE(net->nft.genctr); - u8 gencursor = ACCESS_ONCE(net->nft.gencursor); - list_for_each_entry(afi, &net->nft.af_info, list) { + rcu_read_lock(); + cb->seq = net->nft.base_seq; + + list_for_each_entry_rcu(afi, &net->nft.af_info, list) { if (family != NFPROTO_UNSPEC && family != afi->family) continue; - list_for_each_entry(table, &afi->tables, list) { - list_for_each_entry(chain, &table->chains, list) { - list_for_each_entry(rule, &chain->rules, list) { + list_for_each_entry_rcu(table, &afi->tables, list) { + list_for_each_entry_rcu(chain, &table->chains, list) { + list_for_each_entry_rcu(rule, &chain->rules, list) { if (!nft_rule_is_active(net, rule)) goto cont; if (idx < s_idx) @@ -1468,6 +1594,8 @@ static int nf_tables_dump_rules(struct sk_buff *skb, NLM_F_MULTI | NLM_F_APPEND, afi->family, table, chain, rule) < 0) goto done; + + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: idx++; } @@ -1475,9 +1603,7 @@ cont: } } done: - /* Invalidate this dump, a transition to the new generation happened */ - if (gencursor != net->nft.gencursor || genctr != net->nft.genctr) - return -EBUSY; + rcu_read_unlock(); cb->args[0] = idx; return skb->len; @@ -1511,10 +1637,14 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb, table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); + if (table->flags & NFT_TABLE_INACTIVE) + return -ENOENT; chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); if (IS_ERR(chain)) return PTR_ERR(chain); + if (chain->flags & NFT_CHAIN_INACTIVE) + return -ENOENT; rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); if (IS_ERR(rule)) @@ -1554,37 +1684,36 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx, kfree(rule); } -#define NFT_RULE_MAXEXPRS 128 - -static struct nft_expr_info *info; - -static struct nft_rule_trans * -nf_tables_trans_add(struct nft_ctx *ctx, struct nft_rule *rule) +static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type, + struct nft_rule *rule) { - struct nft_rule_trans *rupd; + struct nft_trans *trans; - rupd = kmalloc(sizeof(struct nft_rule_trans), GFP_KERNEL); - if (rupd == NULL) - return NULL; + trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_rule)); + if (trans == NULL) + return NULL; - rupd->ctx = *ctx; - rupd->rule = rule; - list_add_tail(&rupd->list, &ctx->net->nft.commit_list); + nft_trans_rule(trans) = rule; + list_add_tail(&trans->list, &ctx->net->nft.commit_list); - return rupd; + return trans; } +#define NFT_RULE_MAXEXPRS 128 + +static struct nft_expr_info *info; + static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); - const struct nft_af_info *afi; + struct nft_af_info *afi; struct net *net = sock_net(skb->sk); struct nft_table *table; struct nft_chain *chain; struct nft_rule *rule, *old_rule = NULL; - struct nft_rule_trans *repl = NULL; + struct nft_trans *trans = NULL; struct nft_expr *expr; struct nft_ctx ctx; struct nlattr *tmp; @@ -1623,6 +1752,9 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, if (!create || nlh->nlmsg_flags & NLM_F_REPLACE) return -EINVAL; handle = nf_tables_alloc_handle(table); + + if (chain->use == UINT_MAX) + return -EOVERFLOW; } if (nla[NFTA_RULE_POSITION]) { @@ -1682,13 +1814,15 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, if (nlh->nlmsg_flags & NLM_F_REPLACE) { if (nft_rule_is_active_next(net, old_rule)) { - repl = nf_tables_trans_add(&ctx, old_rule); - if (repl == NULL) { + trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE, + old_rule); + if (trans == NULL) { err = -ENOMEM; goto err2; } nft_rule_disactivate_next(net, old_rule); - list_add_tail(&rule->list, &old_rule->list); + chain->use--; + list_add_tail_rcu(&rule->list, &old_rule->list); } else { err = -ENOENT; goto err2; @@ -1705,19 +1839,20 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, list_add_rcu(&rule->list, &chain->rules); } - if (nf_tables_trans_add(&ctx, rule) == NULL) { + if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) { err = -ENOMEM; goto err3; } + chain->use++; return 0; err3: list_del_rcu(&rule->list); - if (repl) { - list_del_rcu(&repl->rule->list); - list_del(&repl->list); - nft_rule_clear(net, repl->rule); - kfree(repl); + if (trans) { + list_del_rcu(&nft_trans_rule(trans)->list); + nft_rule_clear(net, nft_trans_rule(trans)); + nft_trans_destroy(trans); + chain->use++; } err2: nf_tables_rule_destroy(&ctx, rule); @@ -1734,9 +1869,10 @@ nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule) { /* You cannot delete the same rule twice */ if (nft_rule_is_active_next(ctx->net, rule)) { - if (nf_tables_trans_add(ctx, rule) == NULL) + if (nft_trans_rule_add(ctx, NFT_MSG_DELRULE, rule) == NULL) return -ENOMEM; nft_rule_disactivate_next(ctx->net, rule); + ctx->chain->use--; return 0; } return -ENOENT; @@ -1760,9 +1896,9 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); - const struct nft_af_info *afi; + struct nft_af_info *afi; struct net *net = sock_net(skb->sk); - const struct nft_table *table; + struct nft_table *table; struct nft_chain *chain = NULL; struct nft_rule *rule; int family = nfmsg->nfgen_family, err = 0; @@ -1775,6 +1911,8 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); + if (table->flags & NFT_TABLE_INACTIVE) + return -ENOENT; if (nla[NFTA_RULE_CHAIN]) { chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); @@ -1807,88 +1945,6 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, return err; } -static int nf_tables_commit(struct sk_buff *skb) -{ - struct net *net = sock_net(skb->sk); - struct nft_rule_trans *rupd, *tmp; - - /* Bump generation counter, invalidate any dump in progress */ - net->nft.genctr++; - - /* A new generation has just started */ - net->nft.gencursor = gencursor_next(net); - - /* Make sure all packets have left the previous generation before - * purging old rules. - */ - synchronize_rcu(); - - list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { - /* This rule was inactive in the past and just became active. - * Clear the next bit of the genmask since its meaning has - * changed, now it is the future. - */ - if (nft_rule_is_active(net, rupd->rule)) { - nft_rule_clear(net, rupd->rule); - nf_tables_rule_notify(skb, rupd->ctx.nlh, - rupd->ctx.table, rupd->ctx.chain, - rupd->rule, NFT_MSG_NEWRULE, 0, - rupd->ctx.afi->family); - list_del(&rupd->list); - kfree(rupd); - continue; - } - - /* This rule is in the past, get rid of it */ - list_del_rcu(&rupd->rule->list); - nf_tables_rule_notify(skb, rupd->ctx.nlh, - rupd->ctx.table, rupd->ctx.chain, - rupd->rule, NFT_MSG_DELRULE, 0, - rupd->ctx.afi->family); - } - - /* Make sure we don't see any packet traversing old rules */ - synchronize_rcu(); - - /* Now we can safely release unused old rules */ - list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { - nf_tables_rule_destroy(&rupd->ctx, rupd->rule); - list_del(&rupd->list); - kfree(rupd); - } - - return 0; -} - -static int nf_tables_abort(struct sk_buff *skb) -{ - struct net *net = sock_net(skb->sk); - struct nft_rule_trans *rupd, *tmp; - - list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { - if (!nft_rule_is_active_next(net, rupd->rule)) { - nft_rule_clear(net, rupd->rule); - list_del(&rupd->list); - kfree(rupd); - continue; - } - - /* This rule is inactive, get rid of it */ - list_del_rcu(&rupd->rule->list); - } - - /* Make sure we don't see any packet accessing aborted rules */ - synchronize_rcu(); - - list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { - nf_tables_rule_destroy(&rupd->ctx, rupd->rule); - list_del(&rupd->list); - kfree(rupd); - } - - return 0; -} - /* * Sets */ @@ -1898,7 +1954,7 @@ static LIST_HEAD(nf_tables_set_ops); int nft_register_set(struct nft_set_ops *ops) { nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_add_tail(&ops->list, &nf_tables_set_ops); + list_add_tail_rcu(&ops->list, &nf_tables_set_ops); nfnl_unlock(NFNL_SUBSYS_NFTABLES); return 0; } @@ -1907,14 +1963,23 @@ EXPORT_SYMBOL_GPL(nft_register_set); void nft_unregister_set(struct nft_set_ops *ops) { nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_del(&ops->list); + list_del_rcu(&ops->list); nfnl_unlock(NFNL_SUBSYS_NFTABLES); } EXPORT_SYMBOL_GPL(nft_unregister_set); -static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const nla[]) +/* + * Select a set implementation based on the data characteristics and the + * given policy. The total memory use might not be known if no size is + * given, in that case the amount of memory per element is used. + */ +static const struct nft_set_ops * +nft_select_set_ops(const struct nlattr * const nla[], + const struct nft_set_desc *desc, + enum nft_set_policies policy) { - const struct nft_set_ops *ops; + const struct nft_set_ops *ops, *bops; + struct nft_set_estimate est, best; u32 features; #ifdef CONFIG_MODULES @@ -1932,15 +1997,45 @@ static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const features &= NFT_SET_INTERVAL | NFT_SET_MAP; } - // FIXME: implement selection properly + bops = NULL; + best.size = ~0; + best.class = ~0; + list_for_each_entry(ops, &nf_tables_set_ops, list) { if ((ops->features & features) != features) continue; + if (!ops->estimate(desc, features, &est)) + continue; + + switch (policy) { + case NFT_SET_POL_PERFORMANCE: + if (est.class < best.class) + break; + if (est.class == best.class && est.size < best.size) + break; + continue; + case NFT_SET_POL_MEMORY: + if (est.size < best.size) + break; + if (est.size == best.size && est.class < best.class) + break; + continue; + default: + break; + } + if (!try_module_get(ops->owner)) continue; - return ops; + if (bops != NULL) + module_put(bops->owner); + + bops = ops; + best = est; } + if (bops != NULL) + return bops; + return ERR_PTR(-EOPNOTSUPP); } @@ -1953,6 +2048,13 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { [NFTA_SET_KEY_LEN] = { .type = NLA_U32 }, [NFTA_SET_DATA_TYPE] = { .type = NLA_U32 }, [NFTA_SET_DATA_LEN] = { .type = NLA_U32 }, + [NFTA_SET_POLICY] = { .type = NLA_U32 }, + [NFTA_SET_DESC] = { .type = NLA_NESTED }, + [NFTA_SET_ID] = { .type = NLA_U32 }, +}; + +static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = { + [NFTA_SET_DESC_SIZE] = { .type = NLA_U32 }, }; static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, @@ -1962,8 +2064,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, { struct net *net = sock_net(skb->sk); const struct nfgenmsg *nfmsg = nlmsg_data(nlh); - const struct nft_af_info *afi = NULL; - const struct nft_table *table = NULL; + struct nft_af_info *afi = NULL; + struct nft_table *table = NULL; if (nfmsg->nfgen_family != NFPROTO_UNSPEC) { afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); @@ -1978,6 +2080,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); + if (table->flags & NFT_TABLE_INACTIVE) + return -ENOENT; } nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla); @@ -1999,13 +2103,27 @@ struct nft_set *nf_tables_set_lookup(const struct nft_table *table, return ERR_PTR(-ENOENT); } +struct nft_set *nf_tables_set_lookup_byid(const struct net *net, + const struct nlattr *nla) +{ + struct nft_trans *trans; + u32 id = ntohl(nla_get_be32(nla)); + + list_for_each_entry(trans, &net->nft.commit_list, list) { + if (trans->msg_type == NFT_MSG_NEWSET && + id == nft_trans_set_id(trans)) + return nft_trans_set(trans); + } + return ERR_PTR(-ENOENT); +} + static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, const char *name) { const struct nft_set *i; const char *p; unsigned long *inuse; - unsigned int n = 0; + unsigned int n = 0, min = 0; p = strnchr(name, IFNAMSIZ, '%'); if (p != NULL) { @@ -2015,23 +2133,28 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, inuse = (unsigned long *)get_zeroed_page(GFP_KERNEL); if (inuse == NULL) return -ENOMEM; - +cont: list_for_each_entry(i, &ctx->table->sets, list) { int tmp; if (!sscanf(i->name, name, &tmp)) continue; - if (tmp < 0 || tmp >= BITS_PER_BYTE * PAGE_SIZE) + if (tmp < min || tmp >= min + BITS_PER_BYTE * PAGE_SIZE) continue; - set_bit(tmp, inuse); + set_bit(tmp - min, inuse); } n = find_first_zero_bit(inuse, BITS_PER_BYTE * PAGE_SIZE); + if (n >= BITS_PER_BYTE * PAGE_SIZE) { + min += BITS_PER_BYTE * PAGE_SIZE; + memset(inuse, 0, PAGE_SIZE); + goto cont; + } free_page((unsigned long)inuse); } - snprintf(set->name, sizeof(set->name), name, n); + snprintf(set->name, sizeof(set->name), name, min + n); list_for_each_entry(i, &ctx->table->sets, list) { if (!strcmp(set->name, i->name)) return -ENFILE; @@ -2044,8 +2167,9 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, { struct nfgenmsg *nfmsg; struct nlmsghdr *nlh; - u32 portid = NETLINK_CB(ctx->skb).portid; - u32 seq = ctx->nlh->nlmsg_seq; + struct nlattr *desc; + u32 portid = ctx->portid; + u32 seq = ctx->seq; event |= NFNL_SUBSYS_NFTABLES << 8; nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), @@ -2077,6 +2201,14 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, goto nla_put_failure; } + desc = nla_nest_start(skb, NFTA_SET_DESC); + if (desc == NULL) + goto nla_put_failure; + if (set->size && + nla_put_be32(skb, NFTA_SET_DESC_SIZE, htonl(set->size))) + goto nla_put_failure; + nla_nest_end(skb, desc); + return nlmsg_end(skb, nlh); nla_put_failure: @@ -2086,19 +2218,18 @@ nla_put_failure: static int nf_tables_set_notify(const struct nft_ctx *ctx, const struct nft_set *set, - int event) + int event, gfp_t gfp_flags) { struct sk_buff *skb; - u32 portid = NETLINK_CB(ctx->skb).portid; - bool report; + u32 portid = ctx->portid; int err; - report = nlmsg_report(ctx->nlh); - if (!report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) + if (!ctx->report && + !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) return 0; err = -ENOBUFS; - skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + skb = nlmsg_new(NLMSG_GOODSIZE, gfp_flags); if (skb == NULL) goto err; @@ -2108,112 +2239,59 @@ static int nf_tables_set_notify(const struct nft_ctx *ctx, goto err; } - err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, report, - GFP_KERNEL); + err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, + ctx->report, gfp_flags); err: if (err < 0) nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, err); return err; } -static int nf_tables_dump_sets_table(struct nft_ctx *ctx, struct sk_buff *skb, - struct netlink_callback *cb) -{ - const struct nft_set *set; - unsigned int idx = 0, s_idx = cb->args[0]; - - if (cb->args[1]) - return skb->len; - - list_for_each_entry(set, &ctx->table->sets, list) { - if (idx < s_idx) - goto cont; - if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET, - NLM_F_MULTI) < 0) { - cb->args[0] = idx; - goto done; - } -cont: - idx++; - } - cb->args[1] = 1; -done: - return skb->len; -} - -static int nf_tables_dump_sets_family(struct nft_ctx *ctx, struct sk_buff *skb, - struct netlink_callback *cb) -{ - const struct nft_set *set; - unsigned int idx, s_idx = cb->args[0]; - struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2]; - - if (cb->args[1]) - return skb->len; - - list_for_each_entry(table, &ctx->afi->tables, list) { - if (cur_table) { - if (cur_table != table) - continue; - - cur_table = NULL; - } - ctx->table = table; - idx = 0; - list_for_each_entry(set, &ctx->table->sets, list) { - if (idx < s_idx) - goto cont; - if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET, - NLM_F_MULTI) < 0) { - cb->args[0] = idx; - cb->args[2] = (unsigned long) table; - goto done; - } -cont: - idx++; - } - } - cb->args[1] = 1; -done: - return skb->len; -} - -static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb, - struct netlink_callback *cb) +static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) { const struct nft_set *set; unsigned int idx, s_idx = cb->args[0]; - const struct nft_af_info *afi; + struct nft_af_info *afi; struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2]; struct net *net = sock_net(skb->sk); int cur_family = cb->args[3]; + struct nft_ctx *ctx = cb->data, ctx_set; if (cb->args[1]) return skb->len; - list_for_each_entry(afi, &net->nft.af_info, list) { + rcu_read_lock(); + cb->seq = net->nft.base_seq; + + list_for_each_entry_rcu(afi, &net->nft.af_info, list) { + if (ctx->afi && ctx->afi != afi) + continue; + if (cur_family) { if (afi->family != cur_family) continue; cur_family = 0; } + list_for_each_entry_rcu(table, &afi->tables, list) { + if (ctx->table && ctx->table != table) + continue; - list_for_each_entry(table, &afi->tables, list) { if (cur_table) { if (cur_table != table) continue; cur_table = NULL; } - - ctx->table = table; - ctx->afi = afi; idx = 0; - list_for_each_entry(set, &ctx->table->sets, list) { + list_for_each_entry_rcu(set, &table->sets, list) { if (idx < s_idx) goto cont; - if (nf_tables_fill_set(skb, ctx, set, + + ctx_set = *ctx; + ctx_set.table = table; + ctx_set.afi = afi; + if (nf_tables_fill_set(skb, &ctx_set, set, NFT_MSG_NEWSET, NLM_F_MULTI) < 0) { cb->args[0] = idx; @@ -2221,6 +2299,7 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb, cb->args[3] = afi->family; goto done; } + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: idx++; } @@ -2230,36 +2309,18 @@ cont: } cb->args[1] = 1; done: + rcu_read_unlock(); return skb->len; } -static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) +static int nf_tables_dump_sets_done(struct netlink_callback *cb) { - const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); - struct nlattr *nla[NFTA_SET_MAX + 1]; - struct nft_ctx ctx; - int err, ret; - - err = nlmsg_parse(cb->nlh, sizeof(*nfmsg), nla, NFTA_SET_MAX, - nft_set_policy); - if (err < 0) - return err; - - err = nft_ctx_init_from_setattr(&ctx, cb->skb, cb->nlh, (void *)nla); - if (err < 0) - return err; - - if (ctx.table == NULL) { - if (ctx.afi == NULL) - ret = nf_tables_dump_sets_all(&ctx, skb, cb); - else - ret = nf_tables_dump_sets_family(&ctx, skb, cb); - } else - ret = nf_tables_dump_sets_table(&ctx, skb, cb); - - return ret; + kfree(cb->data); + return 0; } +#define NFT_SET_INACTIVE (1 << 15) /* Internal set flag */ + static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -2278,7 +2339,17 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb, if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = nf_tables_dump_sets, + .done = nf_tables_dump_sets_done, }; + struct nft_ctx *ctx_dump; + + ctx_dump = kmalloc(sizeof(*ctx_dump), GFP_KERNEL); + if (ctx_dump == NULL) + return -ENOMEM; + + *ctx_dump = ctx; + c.data = ctx_dump; + return netlink_dump_start(nlsk, skb, nlh, &c); } @@ -2289,6 +2360,8 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb, set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); if (IS_ERR(set)) return PTR_ERR(set); + if (set->flags & NFT_SET_INACTIVE) + return -ENOENT; skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (skb2 == NULL) @@ -2305,13 +2378,50 @@ err: return err; } +static int nf_tables_set_desc_parse(const struct nft_ctx *ctx, + struct nft_set_desc *desc, + const struct nlattr *nla) +{ + struct nlattr *da[NFTA_SET_DESC_MAX + 1]; + int err; + + err = nla_parse_nested(da, NFTA_SET_DESC_MAX, nla, nft_set_desc_policy); + if (err < 0) + return err; + + if (da[NFTA_SET_DESC_SIZE] != NULL) + desc->size = ntohl(nla_get_be32(da[NFTA_SET_DESC_SIZE])); + + return 0; +} + +static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type, + struct nft_set *set) +{ + struct nft_trans *trans; + + trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_set)); + if (trans == NULL) + return -ENOMEM; + + if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) { + nft_trans_set_id(trans) = + ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID])); + set->flags |= NFT_SET_INACTIVE; + } + nft_trans_set(trans) = set; + list_add_tail(&trans->list, &ctx->net->nft.commit_list); + + return 0; +} + static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_set_ops *ops; - const struct nft_af_info *afi; + struct nft_af_info *afi; struct net *net = sock_net(skb->sk); struct nft_table *table; struct nft_set *set; @@ -2319,14 +2429,18 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, char name[IFNAMSIZ]; unsigned int size; bool create; - u32 ktype, klen, dlen, dtype, flags; + u32 ktype, dtype, flags, policy; + struct nft_set_desc desc; int err; if (nla[NFTA_SET_TABLE] == NULL || nla[NFTA_SET_NAME] == NULL || - nla[NFTA_SET_KEY_LEN] == NULL) + nla[NFTA_SET_KEY_LEN] == NULL || + nla[NFTA_SET_ID] == NULL) return -EINVAL; + memset(&desc, 0, sizeof(desc)); + ktype = NFT_DATA_VALUE; if (nla[NFTA_SET_KEY_TYPE] != NULL) { ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE])); @@ -2334,8 +2448,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, return -EINVAL; } - klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN])); - if (klen == 0 || klen > FIELD_SIZEOF(struct nft_data, data)) + desc.klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN])); + if (desc.klen == 0 || desc.klen > FIELD_SIZEOF(struct nft_data, data)) return -EINVAL; flags = 0; @@ -2347,7 +2461,6 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, } dtype = 0; - dlen = 0; if (nla[NFTA_SET_DATA_TYPE] != NULL) { if (!(flags & NFT_SET_MAP)) return -EINVAL; @@ -2360,15 +2473,25 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, if (dtype != NFT_DATA_VERDICT) { if (nla[NFTA_SET_DATA_LEN] == NULL) return -EINVAL; - dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN])); - if (dlen == 0 || - dlen > FIELD_SIZEOF(struct nft_data, data)) + desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN])); + if (desc.dlen == 0 || + desc.dlen > FIELD_SIZEOF(struct nft_data, data)) return -EINVAL; } else - dlen = sizeof(struct nft_data); + desc.dlen = sizeof(struct nft_data); } else if (flags & NFT_SET_MAP) return -EINVAL; + policy = NFT_SET_POL_PERFORMANCE; + if (nla[NFTA_SET_POLICY] != NULL) + policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY])); + + if (nla[NFTA_SET_DESC] != NULL) { + err = nf_tables_set_desc_parse(&ctx, &desc, nla[NFTA_SET_DESC]); + if (err < 0) + return err; + } + create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create); @@ -2399,7 +2522,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, if (!(nlh->nlmsg_flags & NLM_F_CREATE)) return -ENOENT; - ops = nft_select_set_ops(nla); + ops = nft_select_set_ops(nla, &desc, policy); if (IS_ERR(ops)) return PTR_ERR(ops); @@ -2420,17 +2543,22 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, INIT_LIST_HEAD(&set->bindings); set->ops = ops; set->ktype = ktype; - set->klen = klen; + set->klen = desc.klen; set->dtype = dtype; - set->dlen = dlen; + set->dlen = desc.dlen; set->flags = flags; + set->size = desc.size; + + err = ops->init(set, &desc, nla); + if (err < 0) + goto err2; - err = ops->init(set, nla); + err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set); if (err < 0) goto err2; - list_add_tail(&set->list, &table->sets); - nf_tables_set_notify(&ctx, set, NFT_MSG_NEWSET); + list_add_tail_rcu(&set->list, &table->sets); + table->use++; return 0; err2: @@ -2440,16 +2568,20 @@ err1: return err; } -static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) +static void nft_set_destroy(struct nft_set *set) { - list_del(&set->list); - nf_tables_set_notify(ctx, set, NFT_MSG_DELSET); - set->ops->destroy(set); module_put(set->ops->owner); kfree(set); } +static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) +{ + list_del_rcu(&set->list); + nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_ATOMIC); + nft_set_destroy(set); +} + static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -2471,10 +2603,17 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb, set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); if (IS_ERR(set)) return PTR_ERR(set); + if (set->flags & NFT_SET_INACTIVE) + return -ENOENT; if (!list_empty(&set->bindings)) return -EBUSY; - nf_tables_set_destroy(&ctx, set); + err = nft_trans_set_add(&ctx, NFT_MSG_DELSET, set); + if (err < 0) + return err; + + list_del_rcu(&set->list); + ctx.table->use--; return 0; } @@ -2525,16 +2664,17 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, } bind: binding->chain = ctx->chain; - list_add_tail(&binding->list, &set->bindings); + list_add_tail_rcu(&binding->list, &set->bindings); return 0; } void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding) { - list_del(&binding->list); + list_del_rcu(&binding->list); - if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS) + if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS && + !(set->flags & NFT_SET_INACTIVE)) nf_tables_set_destroy(ctx, set); } @@ -2552,16 +2692,18 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + [NFTA_SET_ELEM_LIST_TABLE] = { .type = NLA_STRING }, [NFTA_SET_ELEM_LIST_SET] = { .type = NLA_STRING }, [NFTA_SET_ELEM_LIST_ELEMENTS] = { .type = NLA_NESTED }, + [NFTA_SET_ELEM_LIST_SET_ID] = { .type = NLA_U32 }, }; static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, const struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + bool trans) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); - const struct nft_af_info *afi; - const struct nft_table *table; + struct nft_af_info *afi; + struct nft_table *table; struct net *net = sock_net(skb->sk); afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); @@ -2571,6 +2713,8 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); + if (!trans && (table->flags & NFT_TABLE_INACTIVE)) + return -ENOENT; nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla); return 0; @@ -2644,13 +2788,16 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) if (err < 0) return err; - err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla); + err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla, + false); if (err < 0) return err; set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); if (IS_ERR(set)) return PTR_ERR(set); + if (set->flags & NFT_SET_INACTIVE) + return -ENOENT; event = NFT_MSG_NEWSETELEM; event |= NFNL_SUBSYS_NFTABLES << 8; @@ -2663,7 +2810,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) goto nla_put_failure; nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = NFPROTO_UNSPEC; + nfmsg->nfgen_family = ctx.afi->family; nfmsg->version = NFNETLINK_V0; nfmsg->res_id = 0; @@ -2707,13 +2854,15 @@ static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb, struct nft_ctx ctx; int err; - err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla); + err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false); if (err < 0) return err; set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); if (IS_ERR(set)) return PTR_ERR(set); + if (set->flags & NFT_SET_INACTIVE) + return -ENOENT; if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { @@ -2724,7 +2873,98 @@ static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb, return -EOPNOTSUPP; } -static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set, +static int nf_tables_fill_setelem_info(struct sk_buff *skb, + const struct nft_ctx *ctx, u32 seq, + u32 portid, int event, u16 flags, + const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nfgenmsg *nfmsg; + struct nlmsghdr *nlh; + struct nlattr *nest; + int err; + + event |= NFNL_SUBSYS_NFTABLES << 8; + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), + flags); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = ctx->afi->family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name)) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_SET_NAME, set->name)) + goto nla_put_failure; + + nest = nla_nest_start(skb, NFTA_SET_ELEM_LIST_ELEMENTS); + if (nest == NULL) + goto nla_put_failure; + + err = nf_tables_fill_setelem(skb, set, elem); + if (err < 0) + goto nla_put_failure; + + nla_nest_end(skb, nest); + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_trim(skb, nlh); + return -1; +} + +static int nf_tables_setelem_notify(const struct nft_ctx *ctx, + const struct nft_set *set, + const struct nft_set_elem *elem, + int event, u16 flags) +{ + struct net *net = ctx->net; + u32 portid = ctx->portid; + struct sk_buff *skb; + int err; + + if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + return 0; + + err = -ENOBUFS; + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto err; + + err = nf_tables_fill_setelem_info(skb, ctx, 0, portid, event, flags, + set, elem); + if (err < 0) { + kfree_skb(skb); + goto err; + } + + err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, ctx->report, + GFP_KERNEL); +err: + if (err < 0) + nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + return err; +} + +static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx, + int msg_type, + struct nft_set *set) +{ + struct nft_trans *trans; + + trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_elem)); + if (trans == NULL) + return NULL; + + nft_trans_elem_set(trans) = set; + return trans; +} + +static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; @@ -2732,8 +2972,12 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_elem elem; struct nft_set_binding *binding; enum nft_registers dreg; + struct nft_trans *trans; int err; + if (set->size && set->nelems == set->size) + return -ENFILE; + err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, nft_set_elem_policy); if (err < 0) @@ -2786,7 +3030,7 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set, struct nft_ctx bind_ctx = { .afi = ctx->afi, .table = ctx->table, - .chain = binding->chain, + .chain = (struct nft_chain *)binding->chain, }; err = nft_validate_data_load(&bind_ctx, dreg, @@ -2796,12 +3040,20 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set, } } + trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set); + if (trans == NULL) + goto err3; + err = set->ops->insert(set, &elem); if (err < 0) - goto err3; + goto err4; + nft_trans_elem(trans) = elem; + list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; +err4: + kfree(trans); err3: if (nla[NFTA_SET_ELEM_DATA] != NULL) nft_data_uninit(&elem.data, d2.type); @@ -2815,35 +3067,49 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { + struct net *net = sock_net(skb->sk); const struct nlattr *attr; struct nft_set *set; struct nft_ctx ctx; - int rem, err; + int rem, err = 0; - err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla); + if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) + return -EINVAL; + + err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, true); if (err < 0) return err; set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); - if (IS_ERR(set)) - return PTR_ERR(set); + if (IS_ERR(set)) { + if (nla[NFTA_SET_ELEM_LIST_SET_ID]) { + set = nf_tables_set_lookup_byid(net, + nla[NFTA_SET_ELEM_LIST_SET_ID]); + } + if (IS_ERR(set)) + return PTR_ERR(set); + } + if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) return -EBUSY; nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { err = nft_add_set_elem(&ctx, set, attr); if (err < 0) - return err; + break; + + set->nelems++; } - return 0; + return err; } -static int nft_del_setelem(const struct nft_ctx *ctx, struct nft_set *set, +static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; struct nft_data_desc desc; struct nft_set_elem elem; + struct nft_trans *trans; int err; err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, @@ -2867,12 +3133,18 @@ static int nft_del_setelem(const struct nft_ctx *ctx, struct nft_set *set, if (err < 0) goto err2; - set->ops->remove(set, &elem); + trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set); + if (trans == NULL) + goto err2; + + nft_trans_elem(trans) = elem; + list_add_tail(&trans->list, &ctx->net->nft.commit_list); nft_data_uninit(&elem.key, NFT_DATA_VALUE); if (set->flags & NFT_SET_MAP) nft_data_uninit(&elem.data, set->dtype); + return 0; err2: nft_data_uninit(&elem.key, desc.type); err1: @@ -2886,9 +3158,12 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb, const struct nlattr *attr; struct nft_set *set; struct nft_ctx ctx; - int rem, err; + int rem, err = 0; - err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla); + if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) + return -EINVAL; + + err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false); if (err < 0) return err; @@ -2901,14 +3176,16 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb, nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { err = nft_del_setelem(&ctx, set, attr); if (err < 0) - return err; + break; + + set->nelems--; } - return 0; + return err; } static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { [NFT_MSG_NEWTABLE] = { - .call = nf_tables_newtable, + .call_batch = nf_tables_newtable, .attr_count = NFTA_TABLE_MAX, .policy = nft_table_policy, }, @@ -2918,12 +3195,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_table_policy, }, [NFT_MSG_DELTABLE] = { - .call = nf_tables_deltable, + .call_batch = nf_tables_deltable, .attr_count = NFTA_TABLE_MAX, .policy = nft_table_policy, }, [NFT_MSG_NEWCHAIN] = { - .call = nf_tables_newchain, + .call_batch = nf_tables_newchain, .attr_count = NFTA_CHAIN_MAX, .policy = nft_chain_policy, }, @@ -2933,7 +3210,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_chain_policy, }, [NFT_MSG_DELCHAIN] = { - .call = nf_tables_delchain, + .call_batch = nf_tables_delchain, .attr_count = NFTA_CHAIN_MAX, .policy = nft_chain_policy, }, @@ -2953,7 +3230,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_rule_policy, }, [NFT_MSG_NEWSET] = { - .call = nf_tables_newset, + .call_batch = nf_tables_newset, .attr_count = NFTA_SET_MAX, .policy = nft_set_policy, }, @@ -2963,12 +3240,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_set_policy, }, [NFT_MSG_DELSET] = { - .call = nf_tables_delset, + .call_batch = nf_tables_delset, .attr_count = NFTA_SET_MAX, .policy = nft_set_policy, }, [NFT_MSG_NEWSETELEM] = { - .call = nf_tables_newsetelem, + .call_batch = nf_tables_newsetelem, .attr_count = NFTA_SET_ELEM_LIST_MAX, .policy = nft_set_elem_list_policy, }, @@ -2978,12 +3255,282 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_set_elem_list_policy, }, [NFT_MSG_DELSETELEM] = { - .call = nf_tables_delsetelem, + .call_batch = nf_tables_delsetelem, .attr_count = NFTA_SET_ELEM_LIST_MAX, .policy = nft_set_elem_list_policy, }, }; +static void nft_chain_commit_update(struct nft_trans *trans) +{ + struct nft_base_chain *basechain; + + if (nft_trans_chain_name(trans)[0]) + strcpy(trans->ctx.chain->name, nft_trans_chain_name(trans)); + + if (!(trans->ctx.chain->flags & NFT_BASE_CHAIN)) + return; + + basechain = nft_base_chain(trans->ctx.chain); + nft_chain_stats_replace(basechain, nft_trans_chain_stats(trans)); + + switch (nft_trans_chain_policy(trans)) { + case NF_DROP: + case NF_ACCEPT: + basechain->policy = nft_trans_chain_policy(trans); + break; + } +} + +/* Schedule objects for release via rcu to make sure no packets are accesing + * removed rules. + */ +static void nf_tables_commit_release_rcu(struct rcu_head *rt) +{ + struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head); + + switch (trans->msg_type) { + case NFT_MSG_DELTABLE: + nf_tables_table_destroy(&trans->ctx); + break; + case NFT_MSG_DELCHAIN: + nf_tables_chain_destroy(trans->ctx.chain); + break; + case NFT_MSG_DELRULE: + nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); + break; + case NFT_MSG_DELSET: + nft_set_destroy(nft_trans_set(trans)); + break; + } + kfree(trans); +} + +static int nf_tables_commit(struct sk_buff *skb) +{ + struct net *net = sock_net(skb->sk); + struct nft_trans *trans, *next; + struct nft_set *set; + + /* Bump generation counter, invalidate any dump in progress */ + while (++net->nft.base_seq == 0); + + /* A new generation has just started */ + net->nft.gencursor = gencursor_next(net); + + /* Make sure all packets have left the previous generation before + * purging old rules. + */ + synchronize_rcu(); + + list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { + switch (trans->msg_type) { + case NFT_MSG_NEWTABLE: + if (nft_trans_table_update(trans)) { + if (!nft_trans_table_enable(trans)) { + nf_tables_table_disable(trans->ctx.afi, + trans->ctx.table); + trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; + } + } else { + trans->ctx.table->flags &= ~NFT_TABLE_INACTIVE; + } + nf_tables_table_notify(&trans->ctx, NFT_MSG_NEWTABLE); + nft_trans_destroy(trans); + break; + case NFT_MSG_DELTABLE: + nf_tables_table_notify(&trans->ctx, NFT_MSG_DELTABLE); + break; + case NFT_MSG_NEWCHAIN: + if (nft_trans_chain_update(trans)) + nft_chain_commit_update(trans); + else + trans->ctx.chain->flags &= ~NFT_CHAIN_INACTIVE; + + nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN); + nft_trans_destroy(trans); + break; + case NFT_MSG_DELCHAIN: + nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN); + if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) && + trans->ctx.chain->flags & NFT_BASE_CHAIN) { + nf_unregister_hooks(nft_base_chain(trans->ctx.chain)->ops, + trans->ctx.afi->nops); + } + break; + case NFT_MSG_NEWRULE: + nft_rule_clear(trans->ctx.net, nft_trans_rule(trans)); + nf_tables_rule_notify(&trans->ctx, + nft_trans_rule(trans), + NFT_MSG_NEWRULE); + nft_trans_destroy(trans); + break; + case NFT_MSG_DELRULE: + list_del_rcu(&nft_trans_rule(trans)->list); + nf_tables_rule_notify(&trans->ctx, + nft_trans_rule(trans), + NFT_MSG_DELRULE); + break; + case NFT_MSG_NEWSET: + nft_trans_set(trans)->flags &= ~NFT_SET_INACTIVE; + /* This avoids hitting -EBUSY when deleting the table + * from the transaction. + */ + if (nft_trans_set(trans)->flags & NFT_SET_ANONYMOUS && + !list_empty(&nft_trans_set(trans)->bindings)) + trans->ctx.table->use--; + + nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), + NFT_MSG_NEWSET, GFP_KERNEL); + nft_trans_destroy(trans); + break; + case NFT_MSG_DELSET: + nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), + NFT_MSG_DELSET, GFP_KERNEL); + break; + case NFT_MSG_NEWSETELEM: + nf_tables_setelem_notify(&trans->ctx, + nft_trans_elem_set(trans), + &nft_trans_elem(trans), + NFT_MSG_NEWSETELEM, 0); + nft_trans_destroy(trans); + break; + case NFT_MSG_DELSETELEM: + nf_tables_setelem_notify(&trans->ctx, + nft_trans_elem_set(trans), + &nft_trans_elem(trans), + NFT_MSG_DELSETELEM, 0); + set = nft_trans_elem_set(trans); + set->ops->get(set, &nft_trans_elem(trans)); + set->ops->remove(set, &nft_trans_elem(trans)); + nft_trans_destroy(trans); + break; + } + } + + list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { + list_del(&trans->list); + trans->ctx.nla = NULL; + call_rcu(&trans->rcu_head, nf_tables_commit_release_rcu); + } + + return 0; +} + +/* Schedule objects for release via rcu to make sure no packets are accesing + * aborted rules. + */ +static void nf_tables_abort_release_rcu(struct rcu_head *rt) +{ + struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head); + + switch (trans->msg_type) { + case NFT_MSG_NEWTABLE: + nf_tables_table_destroy(&trans->ctx); + break; + case NFT_MSG_NEWCHAIN: + nf_tables_chain_destroy(trans->ctx.chain); + break; + case NFT_MSG_NEWRULE: + nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); + break; + case NFT_MSG_NEWSET: + nft_set_destroy(nft_trans_set(trans)); + break; + } + kfree(trans); +} + +static int nf_tables_abort(struct sk_buff *skb) +{ + struct net *net = sock_net(skb->sk); + struct nft_trans *trans, *next; + struct nft_set *set; + + list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { + switch (trans->msg_type) { + case NFT_MSG_NEWTABLE: + if (nft_trans_table_update(trans)) { + if (nft_trans_table_enable(trans)) { + nf_tables_table_disable(trans->ctx.afi, + trans->ctx.table); + trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; + } + nft_trans_destroy(trans); + } else { + list_del_rcu(&trans->ctx.table->list); + } + break; + case NFT_MSG_DELTABLE: + list_add_tail_rcu(&trans->ctx.table->list, + &trans->ctx.afi->tables); + nft_trans_destroy(trans); + break; + case NFT_MSG_NEWCHAIN: + if (nft_trans_chain_update(trans)) { + if (nft_trans_chain_stats(trans)) + free_percpu(nft_trans_chain_stats(trans)); + + nft_trans_destroy(trans); + } else { + trans->ctx.table->use--; + list_del_rcu(&trans->ctx.chain->list); + if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) && + trans->ctx.chain->flags & NFT_BASE_CHAIN) { + nf_unregister_hooks(nft_base_chain(trans->ctx.chain)->ops, + trans->ctx.afi->nops); + } + } + break; + case NFT_MSG_DELCHAIN: + trans->ctx.table->use++; + list_add_tail_rcu(&trans->ctx.chain->list, + &trans->ctx.table->chains); + nft_trans_destroy(trans); + break; + case NFT_MSG_NEWRULE: + trans->ctx.chain->use--; + list_del_rcu(&nft_trans_rule(trans)->list); + break; + case NFT_MSG_DELRULE: + trans->ctx.chain->use++; + nft_rule_clear(trans->ctx.net, nft_trans_rule(trans)); + nft_trans_destroy(trans); + break; + case NFT_MSG_NEWSET: + trans->ctx.table->use--; + list_del_rcu(&nft_trans_set(trans)->list); + break; + case NFT_MSG_DELSET: + trans->ctx.table->use++; + list_add_tail_rcu(&nft_trans_set(trans)->list, + &trans->ctx.table->sets); + nft_trans_destroy(trans); + break; + case NFT_MSG_NEWSETELEM: + nft_trans_elem_set(trans)->nelems--; + set = nft_trans_elem_set(trans); + set->ops->get(set, &nft_trans_elem(trans)); + set->ops->remove(set, &nft_trans_elem(trans)); + nft_trans_destroy(trans); + break; + case NFT_MSG_DELSETELEM: + nft_trans_elem_set(trans)->nelems++; + nft_trans_destroy(trans); + break; + } + } + + list_for_each_entry_safe_reverse(trans, next, + &net->nft.commit_list, list) { + list_del(&trans->list); + trans->ctx.nla = NULL; + call_rcu(&trans->rcu_head, nf_tables_abort_release_rcu); + } + + return 0; +} + static const struct nfnetlink_subsystem nf_tables_subsys = { .name = "nf_tables", .subsys_id = NFNL_SUBSYS_NFTABLES, @@ -3371,6 +3918,7 @@ static int nf_tables_init_net(struct net *net) { INIT_LIST_HEAD(&net->nft.af_info); INIT_LIST_HEAD(&net->nft.commit_list); + net->nft.base_seq = 1; return 0; } diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 804105391b9a..3b90eb2b2c55 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -66,20 +66,6 @@ struct nft_jumpstack { int rulenum; }; -static inline void -nft_chain_stats(const struct nft_chain *this, const struct nft_pktinfo *pkt, - struct nft_jumpstack *jumpstack, unsigned int stackptr) -{ - struct nft_stats __percpu *stats; - const struct nft_chain *chain = stackptr ? jumpstack[0].chain : this; - - rcu_read_lock_bh(); - stats = rcu_dereference(nft_base_chain(chain)->stats); - __this_cpu_inc(stats->pkts); - __this_cpu_add(stats->bytes, pkt->skb->len); - rcu_read_unlock_bh(); -} - enum nft_trace { NFT_TRACE_RULE, NFT_TRACE_RETURN, @@ -117,13 +103,14 @@ static void nft_trace_packet(const struct nft_pktinfo *pkt, unsigned int nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) { - const struct nft_chain *chain = ops->priv; + const struct nft_chain *chain = ops->priv, *basechain = chain; const struct nft_rule *rule; const struct nft_expr *expr, *last; struct nft_data data[NFT_REG_MAX + 1]; unsigned int stackptr = 0; struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; - int rulenum = 0; + struct nft_stats *stats; + int rulenum; /* * Cache cursor to avoid problems in case that the cursor is updated * while traversing the ruleset. @@ -131,6 +118,7 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) unsigned int gencursor = ACCESS_ONCE(chain->net->nft.gencursor); do_chain: + rulenum = 0; rule = list_entry(&chain->rules, struct nft_rule, list); next_rule: data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; @@ -156,8 +144,10 @@ next_rule: switch (data[NFT_REG_VERDICT].verdict) { case NFT_BREAK: data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; - /* fall through */ + continue; case NFT_CONTINUE: + if (unlikely(pkt->skb->nf_trace)) + nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); continue; } break; @@ -183,37 +173,46 @@ next_rule: jumpstack[stackptr].rule = rule; jumpstack[stackptr].rulenum = rulenum; stackptr++; - /* fall through */ + chain = data[NFT_REG_VERDICT].chain; + goto do_chain; case NFT_GOTO: + if (unlikely(pkt->skb->nf_trace)) + nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + chain = data[NFT_REG_VERDICT].chain; goto do_chain; case NFT_RETURN: if (unlikely(pkt->skb->nf_trace)) nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN); - - /* fall through */ + break; case NFT_CONTINUE: + if (unlikely(pkt->skb->nf_trace && !(chain->flags & NFT_BASE_CHAIN))) + nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_RETURN); break; default: WARN_ON(1); } if (stackptr > 0) { - if (unlikely(pkt->skb->nf_trace)) - nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_RETURN); - stackptr--; chain = jumpstack[stackptr].chain; rule = jumpstack[stackptr].rule; rulenum = jumpstack[stackptr].rulenum; goto next_rule; } - nft_chain_stats(chain, pkt, jumpstack, stackptr); if (unlikely(pkt->skb->nf_trace)) - nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_POLICY); + nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY); + + rcu_read_lock_bh(); + stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats)); + u64_stats_update_begin(&stats->syncp); + stats->pkts++; + stats->bytes += pkt->skb->len; + u64_stats_update_end(&stats->syncp); + rcu_read_unlock_bh(); - return nft_base_chain(chain)->policy; + return nft_base_chain(basechain)->policy; } EXPORT_SYMBOL_GPL(nft_do_chain); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index e8138da4c14f..c138b8fbe280 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -256,15 +256,15 @@ replay: #endif { nfnl_unlock(subsys_id); - kfree_skb(nskb); - return netlink_ack(skb, nlh, -EOPNOTSUPP); + netlink_ack(skb, nlh, -EOPNOTSUPP); + return kfree_skb(nskb); } } if (!ss->commit || !ss->abort) { nfnl_unlock(subsys_id); - kfree_skb(nskb); - return netlink_ack(skb, nlh, -EOPNOTSUPP); + netlink_ack(skb, nlh, -EOPNOTSUPP); + return kfree_skb(skb); } while (skb->len >= nlmsg_total_size(0)) { @@ -368,14 +368,13 @@ done: static void nfnetlink_rcv(struct sk_buff *skb) { struct nlmsghdr *nlh = nlmsg_hdr(skb); - struct net *net = sock_net(skb->sk); int msglen; if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) return; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) { + if (!netlink_net_capable(skb, CAP_NET_ADMIN)) { netlink_ack(skb, nlh, -EPERM); return; } @@ -400,19 +399,17 @@ static void nfnetlink_rcv(struct sk_buff *skb) } #ifdef CONFIG_MODULES -static void nfnetlink_bind(int group) +static int nfnetlink_bind(int group) { const struct nfnetlink_subsystem *ss; int type = nfnl_group2type[group]; rcu_read_lock(); ss = nfnetlink_get_subsys(type); - if (!ss) { - rcu_read_unlock(); - request_module("nfnetlink-subsys-%d", type); - return; - } rcu_read_unlock(); + if (!ss) + request_module("nfnetlink-subsys-%d", type); + return 0; } #endif diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index c7b6d466a662..3ea0eacbd970 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -32,18 +32,25 @@ static LIST_HEAD(nfnl_acct_list); struct nf_acct { atomic64_t pkts; atomic64_t bytes; + unsigned long flags; struct list_head head; atomic_t refcnt; char name[NFACCT_NAME_MAX]; struct rcu_head rcu_head; + char data[0]; }; +#define NFACCT_F_QUOTA (NFACCT_F_QUOTA_PKTS | NFACCT_F_QUOTA_BYTES) +#define NFACCT_OVERQUOTA_BIT 2 /* NFACCT_F_OVERQUOTA */ + static int nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const tb[]) { struct nf_acct *nfacct, *matching = NULL; char *acct_name; + unsigned int size = 0; + u32 flags = 0; if (!tb[NFACCT_NAME]) return -EINVAL; @@ -68,15 +75,39 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, /* reset counters if you request a replacement. */ atomic64_set(&matching->pkts, 0); atomic64_set(&matching->bytes, 0); + smp_mb__before_atomic(); + /* reset overquota flag if quota is enabled. */ + if ((matching->flags & NFACCT_F_QUOTA)) + clear_bit(NFACCT_OVERQUOTA_BIT, + &matching->flags); return 0; } return -EBUSY; } - nfacct = kzalloc(sizeof(struct nf_acct), GFP_KERNEL); + if (tb[NFACCT_FLAGS]) { + flags = ntohl(nla_get_be32(tb[NFACCT_FLAGS])); + if (flags & ~NFACCT_F_QUOTA) + return -EOPNOTSUPP; + if ((flags & NFACCT_F_QUOTA) == NFACCT_F_QUOTA) + return -EINVAL; + if (flags & NFACCT_F_OVERQUOTA) + return -EINVAL; + + size += sizeof(u64); + } + + nfacct = kzalloc(sizeof(struct nf_acct) + size, GFP_KERNEL); if (nfacct == NULL) return -ENOMEM; + if (flags & NFACCT_F_QUOTA) { + u64 *quota = (u64 *)nfacct->data; + + *quota = be64_to_cpu(nla_get_be64(tb[NFACCT_QUOTA])); + nfacct->flags = flags; + } + strncpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX); if (tb[NFACCT_BYTES]) { @@ -100,6 +131,7 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0; u64 pkts, bytes; + u32 old_flags; event |= NFNL_SUBSYS_ACCT << 8; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); @@ -114,9 +146,13 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, if (nla_put_string(skb, NFACCT_NAME, acct->name)) goto nla_put_failure; + old_flags = acct->flags; if (type == NFNL_MSG_ACCT_GET_CTRZERO) { pkts = atomic64_xchg(&acct->pkts, 0); bytes = atomic64_xchg(&acct->bytes, 0); + smp_mb__before_atomic(); + if (acct->flags & NFACCT_F_QUOTA) + clear_bit(NFACCT_OVERQUOTA_BIT, &acct->flags); } else { pkts = atomic64_read(&acct->pkts); bytes = atomic64_read(&acct->bytes); @@ -125,7 +161,13 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes)) || nla_put_be32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt)))) goto nla_put_failure; + if (acct->flags & NFACCT_F_QUOTA) { + u64 *quota = (u64 *)acct->data; + if (nla_put_be32(skb, NFACCT_FLAGS, htonl(old_flags)) || + nla_put_be64(skb, NFACCT_QUOTA, cpu_to_be64(*quota))) + goto nla_put_failure; + } nlmsg_end(skb, nlh); return skb->len; @@ -270,6 +312,8 @@ static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = { [NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 }, [NFACCT_BYTES] = { .type = NLA_U64 }, [NFACCT_PKTS] = { .type = NLA_U64 }, + [NFACCT_FLAGS] = { .type = NLA_U32 }, + [NFACCT_QUOTA] = { .type = NLA_U64 }, }; static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = { @@ -336,6 +380,50 @@ void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct) } EXPORT_SYMBOL_GPL(nfnl_acct_update); +static void nfnl_overquota_report(struct nf_acct *nfacct) +{ + int ret; + struct sk_buff *skb; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); + if (skb == NULL) + return; + + ret = nfnl_acct_fill_info(skb, 0, 0, NFNL_MSG_ACCT_OVERQUOTA, 0, + nfacct); + if (ret <= 0) { + kfree_skb(skb); + return; + } + netlink_broadcast(init_net.nfnl, skb, 0, NFNLGRP_ACCT_QUOTA, + GFP_ATOMIC); +} + +int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct) +{ + u64 now; + u64 *quota; + int ret = NFACCT_UNDERQUOTA; + + /* no place here if we don't have a quota */ + if (!(nfacct->flags & NFACCT_F_QUOTA)) + return NFACCT_NO_QUOTA; + + quota = (u64 *)nfacct->data; + now = (nfacct->flags & NFACCT_F_QUOTA_PKTS) ? + atomic64_read(&nfacct->pkts) : atomic64_read(&nfacct->bytes); + + ret = now > *quota; + + if (now >= *quota && + !test_and_set_bit(NFACCT_OVERQUOTA_BIT, &nfacct->flags)) { + nfnl_overquota_report(nfacct); + } + + return ret; +} +EXPORT_SYMBOL_GPL(nfnl_acct_overquota); + static int __init nfnl_acct_init(void) { int ret; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index d292c8d286eb..a11c5ff2f720 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -773,6 +773,7 @@ nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, static struct nf_logger nfulnl_logger __read_mostly = { .name = "nfnetlink_log", + .type = NF_LOG_TYPE_ULOG, .logfn = &nfulnl_log_packet, .me = THIS_MODULE, }; @@ -1105,6 +1106,9 @@ MODULE_DESCRIPTION("netfilter userspace logging"); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ULOG); +MODULE_ALIAS_NF_LOGGER(AF_INET, 1); +MODULE_ALIAS_NF_LOGGER(AF_INET6, 1); +MODULE_ALIAS_NF_LOGGER(AF_BRIDGE, 1); module_init(nfnetlink_log_init); module_exit(nfnetlink_log_fini); diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 8a779be832fb..1840989092ed 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -195,6 +195,15 @@ static void nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { struct xt_target *target = expr->ops->data; + void *info = nft_expr_priv(expr); + struct xt_tgdtor_param par; + + par.net = ctx->net; + par.target = target; + par.targinfo = info; + par.family = ctx->afi->family; + if (par.target->destroy != NULL) + par.target->destroy(&par); module_put(target->me); } @@ -382,6 +391,15 @@ static void nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { struct xt_match *match = expr->ops->data; + void *info = nft_expr_priv(expr); + struct xt_mtdtor_param par; + + par.net = ctx->net; + par.match = match; + par.matchinfo = info; + par.family = ctx->afi->family; + if (par.match->destroy != NULL) + par.match->destroy(&par); module_put(match->me); } diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index bd0d41e69341..cc5603016242 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -215,22 +215,14 @@ static void nft_ct_l3proto_module_put(uint8_t family) nf_ct_l3proto_module_put(family); } -static int nft_ct_init_validate_get(const struct nft_expr *expr, - const struct nlattr * const tb[]) +static int nft_ct_get_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) { struct nft_ct *priv = nft_expr_priv(expr); + int err; - if (tb[NFTA_CT_DIRECTION] != NULL) { - priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]); - switch (priv->dir) { - case IP_CT_DIR_ORIGINAL: - case IP_CT_DIR_REPLY: - break; - default: - return -EINVAL; - } - } - + priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY])); switch (priv->key) { case NFT_CT_STATE: case NFT_CT_DIRECTION: @@ -262,55 +254,55 @@ static int nft_ct_init_validate_get(const struct nft_expr *expr, return -EOPNOTSUPP; } - return 0; -} - -static int nft_ct_init_validate_set(uint32_t key) -{ - switch (key) { - case NFT_CT_MARK: - break; - default: - return -EOPNOTSUPP; + if (tb[NFTA_CT_DIRECTION] != NULL) { + priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]); + switch (priv->dir) { + case IP_CT_DIR_ORIGINAL: + case IP_CT_DIR_REPLY: + break; + default: + return -EINVAL; + } } + priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + + err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + if (err < 0) + return err; + + err = nft_ct_l3proto_try_module_get(ctx->afi->family); + if (err < 0) + return err; + return 0; } -static int nft_ct_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) +static int nft_ct_set_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) { struct nft_ct *priv = nft_expr_priv(expr); int err; priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY])); - - if (tb[NFTA_CT_DREG]) { - err = nft_ct_init_validate_get(expr, tb); - if (err < 0) - return err; - - priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG])); - err = nft_validate_output_register(priv->dreg); - if (err < 0) - return err; - - err = nft_validate_data_load(ctx, priv->dreg, NULL, - NFT_DATA_VALUE); - if (err < 0) - return err; - } else { - err = nft_ct_init_validate_set(priv->key); - if (err < 0) - return err; - - priv->sreg = ntohl(nla_get_be32(tb[NFTA_CT_SREG])); - err = nft_validate_input_register(priv->sreg); - if (err < 0) - return err; + switch (priv->key) { +#ifdef CONFIG_NF_CONNTRACK_MARK + case NFT_CT_MARK: + break; +#endif + default: + return -EOPNOTSUPP; } + priv->sreg = ntohl(nla_get_be32(tb[NFTA_CT_SREG])); + err = nft_validate_input_register(priv->sreg); + if (err < 0) + return err; + err = nft_ct_l3proto_try_module_get(ctx->afi->family); if (err < 0) return err; @@ -370,7 +362,7 @@ static const struct nft_expr_ops nft_ct_get_ops = { .type = &nft_ct_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)), .eval = nft_ct_get_eval, - .init = nft_ct_init, + .init = nft_ct_get_init, .destroy = nft_ct_destroy, .dump = nft_ct_get_dump, }; @@ -379,7 +371,7 @@ static const struct nft_expr_ops nft_ct_set_ops = { .type = &nft_ct_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)), .eval = nft_ct_set_eval, - .init = nft_ct_init, + .init = nft_ct_set_init, .destroy = nft_ct_destroy, .dump = nft_ct_set_dump, }; diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 3b1ad876d6b0..28fb8f38e6ba 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -12,212 +12,43 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/list.h> +#include <linux/log2.h> #include <linux/jhash.h> #include <linux/netlink.h> -#include <linux/vmalloc.h> +#include <linux/rhashtable.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> -#define NFT_HASH_MIN_SIZE 4 - -struct nft_hash { - struct nft_hash_table __rcu *tbl; -}; - -struct nft_hash_table { - unsigned int size; - unsigned int elements; - struct nft_hash_elem __rcu *buckets[]; -}; +/* We target a hash table size of 4, element hint is 75% of final size */ +#define NFT_HASH_ELEMENT_HINT 3 struct nft_hash_elem { - struct nft_hash_elem __rcu *next; + struct rhash_head node; struct nft_data key; struct nft_data data[]; }; -#define nft_hash_for_each_entry(i, head) \ - for (i = nft_dereference(head); i != NULL; i = nft_dereference(i->next)) -#define nft_hash_for_each_entry_rcu(i, head) \ - for (i = rcu_dereference(head); i != NULL; i = rcu_dereference(i->next)) - -static u32 nft_hash_rnd __read_mostly; -static bool nft_hash_rnd_initted __read_mostly; - -static unsigned int nft_hash_data(const struct nft_data *data, - unsigned int hsize, unsigned int len) -{ - unsigned int h; - - h = jhash(data->data, len, nft_hash_rnd); - return h & (hsize - 1); -} - static bool nft_hash_lookup(const struct nft_set *set, const struct nft_data *key, struct nft_data *data) { - const struct nft_hash *priv = nft_set_priv(set); - const struct nft_hash_table *tbl = rcu_dereference(priv->tbl); + const struct rhashtable *priv = nft_set_priv(set); const struct nft_hash_elem *he; - unsigned int h; - - h = nft_hash_data(key, tbl->size, set->klen); - nft_hash_for_each_entry_rcu(he, tbl->buckets[h]) { - if (nft_data_cmp(&he->key, key, set->klen)) - continue; - if (set->flags & NFT_SET_MAP) - nft_data_copy(data, he->data); - return true; - } - return false; -} - -static void nft_hash_tbl_free(const struct nft_hash_table *tbl) -{ - if (is_vmalloc_addr(tbl)) - vfree(tbl); - else - kfree(tbl); -} - -static struct nft_hash_table *nft_hash_tbl_alloc(unsigned int nbuckets) -{ - struct nft_hash_table *tbl; - size_t size; - - size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); - tbl = kzalloc(size, GFP_KERNEL | __GFP_REPEAT | __GFP_NOWARN); - if (tbl == NULL) - tbl = vzalloc(size); - if (tbl == NULL) - return NULL; - tbl->size = nbuckets; - - return tbl; -} - -static void nft_hash_chain_unzip(const struct nft_set *set, - const struct nft_hash_table *ntbl, - struct nft_hash_table *tbl, unsigned int n) -{ - struct nft_hash_elem *he, *last, *next; - unsigned int h; - - he = nft_dereference(tbl->buckets[n]); - if (he == NULL) - return; - h = nft_hash_data(&he->key, ntbl->size, set->klen); - - /* Find last element of first chain hashing to bucket h */ - last = he; - nft_hash_for_each_entry(he, he->next) { - if (nft_hash_data(&he->key, ntbl->size, set->klen) != h) - break; - last = he; - } - - /* Unlink first chain from the old table */ - RCU_INIT_POINTER(tbl->buckets[n], last->next); - /* If end of chain reached, done */ - if (he == NULL) - return; - - /* Find first element of second chain hashing to bucket h */ - next = NULL; - nft_hash_for_each_entry(he, he->next) { - if (nft_hash_data(&he->key, ntbl->size, set->klen) != h) - continue; - next = he; - break; - } + he = rhashtable_lookup(priv, key); + if (he && set->flags & NFT_SET_MAP) + nft_data_copy(data, he->data); - /* Link the two chains */ - RCU_INIT_POINTER(last->next, next); -} - -static int nft_hash_tbl_expand(const struct nft_set *set, struct nft_hash *priv) -{ - struct nft_hash_table *tbl = nft_dereference(priv->tbl), *ntbl; - struct nft_hash_elem *he; - unsigned int i, h; - bool complete; - - ntbl = nft_hash_tbl_alloc(tbl->size * 2); - if (ntbl == NULL) - return -ENOMEM; - - /* Link new table's buckets to first element in the old table - * hashing to the new bucket. - */ - for (i = 0; i < ntbl->size; i++) { - h = i < tbl->size ? i : i - tbl->size; - nft_hash_for_each_entry(he, tbl->buckets[h]) { - if (nft_hash_data(&he->key, ntbl->size, set->klen) != i) - continue; - RCU_INIT_POINTER(ntbl->buckets[i], he); - break; - } - } - ntbl->elements = tbl->elements; - - /* Publish new table */ - rcu_assign_pointer(priv->tbl, ntbl); - - /* Unzip interleaved hash chains */ - do { - /* Wait for readers to use new table/unzipped chains */ - synchronize_rcu(); - - complete = true; - for (i = 0; i < tbl->size; i++) { - nft_hash_chain_unzip(set, ntbl, tbl, i); - if (tbl->buckets[i] != NULL) - complete = false; - } - } while (!complete); - - nft_hash_tbl_free(tbl); - return 0; -} - -static int nft_hash_tbl_shrink(const struct nft_set *set, struct nft_hash *priv) -{ - struct nft_hash_table *tbl = nft_dereference(priv->tbl), *ntbl; - struct nft_hash_elem __rcu **pprev; - unsigned int i; - - ntbl = nft_hash_tbl_alloc(tbl->size / 2); - if (ntbl == NULL) - return -ENOMEM; - - for (i = 0; i < ntbl->size; i++) { - ntbl->buckets[i] = tbl->buckets[i]; - - for (pprev = &ntbl->buckets[i]; *pprev != NULL; - pprev = &nft_dereference(*pprev)->next) - ; - RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]); - } - ntbl->elements = tbl->elements; - - /* Publish new table */ - rcu_assign_pointer(priv->tbl, ntbl); - synchronize_rcu(); - - nft_hash_tbl_free(tbl); - return 0; + return !!he; } static int nft_hash_insert(const struct nft_set *set, const struct nft_set_elem *elem) { - struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_table *tbl = nft_dereference(priv->tbl); + struct rhashtable *priv = nft_set_priv(set); struct nft_hash_elem *he; - unsigned int size, h; + unsigned int size; if (elem->flags != 0) return -EINVAL; @@ -234,14 +65,7 @@ static int nft_hash_insert(const struct nft_set *set, if (set->flags & NFT_SET_MAP) nft_data_copy(he->data, &elem->data); - h = nft_hash_data(&he->key, tbl->size, set->klen); - RCU_INIT_POINTER(he->next, tbl->buckets[h]); - rcu_assign_pointer(tbl->buckets[h], he); - tbl->elements++; - - /* Expand table when exceeding 75% load */ - if (tbl->elements > tbl->size / 4 * 3) - nft_hash_tbl_expand(set, priv); + rhashtable_insert(priv, &he->node, GFP_KERNEL); return 0; } @@ -258,37 +82,31 @@ static void nft_hash_elem_destroy(const struct nft_set *set, static void nft_hash_remove(const struct nft_set *set, const struct nft_set_elem *elem) { - struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_table *tbl = nft_dereference(priv->tbl); - struct nft_hash_elem *he, __rcu **pprev; + struct rhashtable *priv = nft_set_priv(set); + struct rhash_head *he, __rcu **pprev; pprev = elem->cookie; - he = nft_dereference((*pprev)); + he = rht_dereference((*pprev), priv); + + rhashtable_remove_pprev(priv, he, pprev, GFP_KERNEL); - RCU_INIT_POINTER(*pprev, he->next); synchronize_rcu(); kfree(he); - tbl->elements--; - - /* Shrink table beneath 30% load */ - if (tbl->elements < tbl->size * 3 / 10 && - tbl->size > NFT_HASH_MIN_SIZE) - nft_hash_tbl_shrink(set, priv); } static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) { - const struct nft_hash *priv = nft_set_priv(set); - const struct nft_hash_table *tbl = nft_dereference(priv->tbl); - struct nft_hash_elem __rcu * const *pprev; + const struct rhashtable *priv = nft_set_priv(set); + const struct bucket_table *tbl = rht_dereference_rcu(priv->tbl, priv); + struct rhash_head __rcu * const *pprev; struct nft_hash_elem *he; - unsigned int h; + u32 h; - h = nft_hash_data(&elem->key, tbl->size, set->klen); + h = rhashtable_hashfn(priv, &elem->key, set->klen); pprev = &tbl->buckets[h]; - nft_hash_for_each_entry(he, tbl->buckets[h]) { + rht_for_each_entry_rcu(he, tbl->buckets[h], node) { if (nft_data_cmp(&he->key, &elem->key, set->klen)) { - pprev = &he->next; + pprev = &he->node.next; continue; } @@ -304,14 +122,15 @@ static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, struct nft_set_iter *iter) { - const struct nft_hash *priv = nft_set_priv(set); - const struct nft_hash_table *tbl = nft_dereference(priv->tbl); + const struct rhashtable *priv = nft_set_priv(set); + const struct bucket_table *tbl; const struct nft_hash_elem *he; struct nft_set_elem elem; unsigned int i; + tbl = rht_dereference_rcu(priv->tbl, priv); for (i = 0; i < tbl->size; i++) { - nft_hash_for_each_entry(he, tbl->buckets[i]) { + rht_for_each_entry_rcu(he, tbl->buckets[i], node) { if (iter->count < iter->skip) goto cont; @@ -331,46 +150,79 @@ cont: static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) { - return sizeof(struct nft_hash); + return sizeof(struct rhashtable); +} + +static int lockdep_nfnl_lock_is_held(void) +{ + return lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES); } static int nft_hash_init(const struct nft_set *set, + const struct nft_set_desc *desc, const struct nlattr * const tb[]) { - struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_table *tbl; + struct rhashtable *priv = nft_set_priv(set); + struct rhashtable_params params = { + .nelem_hint = desc->size ? : NFT_HASH_ELEMENT_HINT, + .head_offset = offsetof(struct nft_hash_elem, node), + .key_offset = offsetof(struct nft_hash_elem, key), + .key_len = set->klen, + .hashfn = jhash, + .grow_decision = rht_grow_above_75, + .shrink_decision = rht_shrink_below_30, + .mutex_is_held = lockdep_nfnl_lock_is_held, + }; - if (unlikely(!nft_hash_rnd_initted)) { - get_random_bytes(&nft_hash_rnd, 4); - nft_hash_rnd_initted = true; - } - - tbl = nft_hash_tbl_alloc(NFT_HASH_MIN_SIZE); - if (tbl == NULL) - return -ENOMEM; - RCU_INIT_POINTER(priv->tbl, tbl); - return 0; + return rhashtable_init(priv, ¶ms); } static void nft_hash_destroy(const struct nft_set *set) { - const struct nft_hash *priv = nft_set_priv(set); - const struct nft_hash_table *tbl = nft_dereference(priv->tbl); + const struct rhashtable *priv = nft_set_priv(set); + const struct bucket_table *tbl; struct nft_hash_elem *he, *next; unsigned int i; - for (i = 0; i < tbl->size; i++) { - for (he = nft_dereference(tbl->buckets[i]); he != NULL; - he = next) { - next = nft_dereference(he->next); + tbl = rht_dereference(priv->tbl, priv); + for (i = 0; i < tbl->size; i++) + rht_for_each_entry_safe(he, next, tbl->buckets[i], priv, node) nft_hash_elem_destroy(set, he); - } + + rhashtable_destroy(priv); +} + +static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, + struct nft_set_estimate *est) +{ + unsigned int esize; + + esize = sizeof(struct nft_hash_elem); + if (features & NFT_SET_MAP) + esize += FIELD_SIZEOF(struct nft_hash_elem, data[0]); + + if (desc->size) { + est->size = sizeof(struct rhashtable) + + roundup_pow_of_two(desc->size * 4 / 3) * + sizeof(struct nft_hash_elem *) + + desc->size * esize; + } else { + /* Resizing happens when the load drops below 30% or goes + * above 75%. The average of 52.5% load (approximated by 50%) + * is used for the size estimation of the hash buckets, + * meaning we calculate two buckets per element. + */ + est->size = esize + 2 * sizeof(struct nft_hash_elem *); } - kfree(tbl); + + est->class = NFT_SET_CLASS_O_1; + + return true; } static struct nft_set_ops nft_hash_ops __read_mostly = { .privsize = nft_hash_privsize, + .estimate = nft_hash_estimate, .init = nft_hash_init, .destroy = nft_hash_destroy, .get = nft_hash_get, diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index 10cfb156cdf4..bde05f28cf14 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2012-2014 Pablo Neira Ayuso <pablo@netfilter.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -41,6 +42,8 @@ static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = { [NFTA_LOG_PREFIX] = { .type = NLA_STRING }, [NFTA_LOG_SNAPLEN] = { .type = NLA_U32 }, [NFTA_LOG_QTHRESHOLD] = { .type = NLA_U16 }, + [NFTA_LOG_LEVEL] = { .type = NLA_U32 }, + [NFTA_LOG_FLAGS] = { .type = NLA_U32 }, }; static int nft_log_init(const struct nft_ctx *ctx, @@ -50,6 +53,7 @@ static int nft_log_init(const struct nft_ctx *ctx, struct nft_log *priv = nft_expr_priv(expr); struct nf_loginfo *li = &priv->loginfo; const struct nlattr *nla; + int ret; nla = tb[NFTA_LOG_PREFIX]; if (nla != NULL) { @@ -57,30 +61,74 @@ static int nft_log_init(const struct nft_ctx *ctx, if (priv->prefix == NULL) return -ENOMEM; nla_strlcpy(priv->prefix, nla, nla_len(nla) + 1); - } else + } else { priv->prefix = (char *)nft_log_null_prefix; + } - li->type = NF_LOG_TYPE_ULOG; + li->type = NF_LOG_TYPE_LOG; + if (tb[NFTA_LOG_LEVEL] != NULL && + tb[NFTA_LOG_GROUP] != NULL) + return -EINVAL; if (tb[NFTA_LOG_GROUP] != NULL) + li->type = NF_LOG_TYPE_ULOG; + + switch (li->type) { + case NF_LOG_TYPE_LOG: + if (tb[NFTA_LOG_LEVEL] != NULL) { + li->u.log.level = + ntohl(nla_get_be32(tb[NFTA_LOG_LEVEL])); + } else { + li->u.log.level = 4; + } + if (tb[NFTA_LOG_FLAGS] != NULL) { + li->u.log.logflags = + ntohl(nla_get_be32(tb[NFTA_LOG_FLAGS])); + } + break; + case NF_LOG_TYPE_ULOG: li->u.ulog.group = ntohs(nla_get_be16(tb[NFTA_LOG_GROUP])); + if (tb[NFTA_LOG_SNAPLEN] != NULL) { + li->u.ulog.copy_len = + ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN])); + } + if (tb[NFTA_LOG_QTHRESHOLD] != NULL) { + li->u.ulog.qthreshold = + ntohs(nla_get_be16(tb[NFTA_LOG_QTHRESHOLD])); + } + break; + } - if (tb[NFTA_LOG_SNAPLEN] != NULL) - li->u.ulog.copy_len = ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN])); - if (tb[NFTA_LOG_QTHRESHOLD] != NULL) { - li->u.ulog.qthreshold = - ntohs(nla_get_be16(tb[NFTA_LOG_QTHRESHOLD])); + if (ctx->afi->family == NFPROTO_INET) { + ret = nf_logger_find_get(NFPROTO_IPV4, li->type); + if (ret < 0) + return ret; + + ret = nf_logger_find_get(NFPROTO_IPV6, li->type); + if (ret < 0) { + nf_logger_put(NFPROTO_IPV4, li->type); + return ret; + } + return 0; } - return 0; + return nf_logger_find_get(ctx->afi->family, li->type); } static void nft_log_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { struct nft_log *priv = nft_expr_priv(expr); + struct nf_loginfo *li = &priv->loginfo; if (priv->prefix != nft_log_null_prefix) kfree(priv->prefix); + + if (ctx->afi->family == NFPROTO_INET) { + nf_logger_put(NFPROTO_IPV4, li->type); + nf_logger_put(NFPROTO_IPV6, li->type); + } else { + nf_logger_put(ctx->afi->family, li->type); + } } static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr) @@ -91,17 +139,33 @@ static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr) if (priv->prefix != nft_log_null_prefix) if (nla_put_string(skb, NFTA_LOG_PREFIX, priv->prefix)) goto nla_put_failure; - if (li->u.ulog.group) - if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group))) - goto nla_put_failure; - if (li->u.ulog.copy_len) - if (nla_put_be32(skb, NFTA_LOG_SNAPLEN, - htonl(li->u.ulog.copy_len))) + switch (li->type) { + case NF_LOG_TYPE_LOG: + if (nla_put_be32(skb, NFTA_LOG_LEVEL, htonl(li->u.log.level))) goto nla_put_failure; - if (li->u.ulog.qthreshold) - if (nla_put_be16(skb, NFTA_LOG_QTHRESHOLD, - htons(li->u.ulog.qthreshold))) + + if (li->u.log.logflags) { + if (nla_put_be32(skb, NFTA_LOG_FLAGS, + htonl(li->u.log.logflags))) + goto nla_put_failure; + } + break; + case NF_LOG_TYPE_ULOG: + if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group))) goto nla_put_failure; + + if (li->u.ulog.copy_len) { + if (nla_put_be32(skb, NFTA_LOG_SNAPLEN, + htonl(li->u.ulog.copy_len))) + goto nla_put_failure; + } + if (li->u.ulog.qthreshold) { + if (nla_put_be16(skb, NFTA_LOG_QTHRESHOLD, + htons(li->u.ulog.qthreshold))) + goto nla_put_failure; + } + break; + } return 0; nla_put_failure: diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 7fd2bea8aa23..6404a726d17b 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -56,8 +56,14 @@ static int nft_lookup_init(const struct nft_ctx *ctx, return -EINVAL; set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET]); - if (IS_ERR(set)) - return PTR_ERR(set); + if (IS_ERR(set)) { + if (tb[NFTA_LOOKUP_SET_ID]) { + set = nf_tables_set_lookup_byid(ctx->net, + tb[NFTA_LOOKUP_SET_ID]); + } + if (IS_ERR(set)) + return PTR_ERR(set); + } priv->sreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_SREG])); err = nft_validate_input_register(priv->sreg); diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 425cf39af890..852b178c6ae7 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -18,18 +18,11 @@ #include <net/sock.h> #include <net/tcp_states.h> /* for TCP_TIME_WAIT */ #include <net/netfilter/nf_tables.h> +#include <net/netfilter/nft_meta.h> -struct nft_meta { - enum nft_meta_keys key:8; - union { - enum nft_registers dreg:8; - enum nft_registers sreg:8; - }; -}; - -static void nft_meta_get_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) +void nft_meta_get_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) { const struct nft_meta *priv = nft_expr_priv(expr); const struct sk_buff *skb = pkt->skb; @@ -140,10 +133,11 @@ static void nft_meta_get_eval(const struct nft_expr *expr, err: data[NFT_REG_VERDICT].verdict = NFT_BREAK; } +EXPORT_SYMBOL_GPL(nft_meta_get_eval); -static void nft_meta_set_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) +void nft_meta_set_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) { const struct nft_meta *meta = nft_expr_priv(expr); struct sk_buff *skb = pkt->skb; @@ -163,28 +157,24 @@ static void nft_meta_set_eval(const struct nft_expr *expr, WARN_ON(1); } } +EXPORT_SYMBOL_GPL(nft_meta_set_eval); -static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { +const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { [NFTA_META_DREG] = { .type = NLA_U32 }, [NFTA_META_KEY] = { .type = NLA_U32 }, [NFTA_META_SREG] = { .type = NLA_U32 }, }; +EXPORT_SYMBOL_GPL(nft_meta_policy); -static int nft_meta_init_validate_set(uint32_t key) +int nft_meta_get_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) { - switch (key) { - case NFT_META_MARK: - case NFT_META_PRIORITY: - case NFT_META_NFTRACE: - return 0; - default: - return -EOPNOTSUPP; - } -} + struct nft_meta *priv = nft_expr_priv(expr); + int err; -static int nft_meta_init_validate_get(uint32_t key) -{ - switch (key) { + priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); + switch (priv->key) { case NFT_META_LEN: case NFT_META_PROTOCOL: case NFT_META_NFPROTO: @@ -205,39 +195,41 @@ static int nft_meta_init_validate_get(uint32_t key) #ifdef CONFIG_NETWORK_SECMARK case NFT_META_SECMARK: #endif - return 0; + break; default: return -EOPNOTSUPP; } + priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + + err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + if (err < 0) + return err; + + return 0; } +EXPORT_SYMBOL_GPL(nft_meta_get_init); -static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nlattr * const tb[]) +int nft_meta_set_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) { struct nft_meta *priv = nft_expr_priv(expr); int err; priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); - - if (tb[NFTA_META_DREG]) { - err = nft_meta_init_validate_get(priv->key); - if (err < 0) - return err; - - priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG])); - err = nft_validate_output_register(priv->dreg); - if (err < 0) - return err; - - return nft_validate_data_load(ctx, priv->dreg, NULL, - NFT_DATA_VALUE); + switch (priv->key) { + case NFT_META_MARK: + case NFT_META_PRIORITY: + case NFT_META_NFTRACE: + break; + default: + return -EOPNOTSUPP; } - err = nft_meta_init_validate_set(priv->key); - if (err < 0) - return err; - priv->sreg = ntohl(nla_get_be32(tb[NFTA_META_SREG])); err = nft_validate_input_register(priv->sreg); if (err < 0) @@ -245,9 +237,10 @@ static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return 0; } +EXPORT_SYMBOL_GPL(nft_meta_set_init); -static int nft_meta_get_dump(struct sk_buff *skb, - const struct nft_expr *expr) +int nft_meta_get_dump(struct sk_buff *skb, + const struct nft_expr *expr) { const struct nft_meta *priv = nft_expr_priv(expr); @@ -260,9 +253,10 @@ static int nft_meta_get_dump(struct sk_buff *skb, nla_put_failure: return -1; } +EXPORT_SYMBOL_GPL(nft_meta_get_dump); -static int nft_meta_set_dump(struct sk_buff *skb, - const struct nft_expr *expr) +int nft_meta_set_dump(struct sk_buff *skb, + const struct nft_expr *expr) { const struct nft_meta *priv = nft_expr_priv(expr); @@ -276,13 +270,14 @@ static int nft_meta_set_dump(struct sk_buff *skb, nla_put_failure: return -1; } +EXPORT_SYMBOL_GPL(nft_meta_set_dump); static struct nft_expr_type nft_meta_type; static const struct nft_expr_ops nft_meta_get_ops = { .type = &nft_meta_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), .eval = nft_meta_get_eval, - .init = nft_meta_init, + .init = nft_meta_get_init, .dump = nft_meta_get_dump, }; @@ -290,7 +285,7 @@ static const struct nft_expr_ops nft_meta_set_ops = { .type = &nft_meta_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), .eval = nft_meta_set_eval, - .init = nft_meta_init, + .init = nft_meta_set_init, .dump = nft_meta_set_dump, }; diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index a0195d28bcfc..79ff58cd36dc 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -175,12 +175,14 @@ static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr) if (nla_put_be32(skb, NFTA_NAT_REG_ADDR_MAX, htonl(priv->sreg_addr_max))) goto nla_put_failure; - if (nla_put_be32(skb, - NFTA_NAT_REG_PROTO_MIN, htonl(priv->sreg_proto_min))) - goto nla_put_failure; - if (nla_put_be32(skb, - NFTA_NAT_REG_PROTO_MAX, htonl(priv->sreg_proto_max))) - goto nla_put_failure; + if (priv->sreg_proto_min) { + if (nla_put_be32(skb, NFTA_NAT_REG_PROTO_MIN, + htonl(priv->sreg_proto_min))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_NAT_REG_PROTO_MAX, + htonl(priv->sreg_proto_max))) + goto nla_put_failure; + } return 0; nla_put_failure: diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index e21d69d13506..e1836ff88199 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -18,6 +18,8 @@ #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> +static DEFINE_SPINLOCK(nft_rbtree_lock); + struct nft_rbtree { struct rb_root root; }; @@ -38,6 +40,7 @@ static bool nft_rbtree_lookup(const struct nft_set *set, const struct rb_node *parent = priv->root.rb_node; int d; + spin_lock_bh(&nft_rbtree_lock); while (parent != NULL) { rbe = rb_entry(parent, struct nft_rbtree_elem, node); @@ -53,6 +56,8 @@ found: goto out; if (set->flags & NFT_SET_MAP) nft_data_copy(data, rbe->data); + + spin_unlock_bh(&nft_rbtree_lock); return true; } } @@ -62,6 +67,7 @@ found: goto found; } out: + spin_unlock_bh(&nft_rbtree_lock); return false; } @@ -124,9 +130,12 @@ static int nft_rbtree_insert(const struct nft_set *set, !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) nft_data_copy(rbe->data, &elem->data); + spin_lock_bh(&nft_rbtree_lock); err = __nft_rbtree_insert(set, rbe); if (err < 0) kfree(rbe); + + spin_unlock_bh(&nft_rbtree_lock); return err; } @@ -136,7 +145,9 @@ static void nft_rbtree_remove(const struct nft_set *set, struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe = elem->cookie; + spin_lock_bh(&nft_rbtree_lock); rb_erase(&rbe->node, &priv->root); + spin_unlock_bh(&nft_rbtree_lock); kfree(rbe); } @@ -147,6 +158,7 @@ static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem) struct nft_rbtree_elem *rbe; int d; + spin_lock_bh(&nft_rbtree_lock); while (parent != NULL) { rbe = rb_entry(parent, struct nft_rbtree_elem, node); @@ -161,9 +173,11 @@ static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem) !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) nft_data_copy(&elem->data, rbe->data); elem->flags = rbe->flags; + spin_unlock_bh(&nft_rbtree_lock); return 0; } } + spin_unlock_bh(&nft_rbtree_lock); return -ENOENT; } @@ -176,6 +190,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, struct nft_set_elem elem; struct rb_node *node; + spin_lock_bh(&nft_rbtree_lock); for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { if (iter->count < iter->skip) goto cont; @@ -188,11 +203,14 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, elem.flags = rbe->flags; iter->err = iter->fn(ctx, set, iter, &elem); - if (iter->err < 0) + if (iter->err < 0) { + spin_unlock_bh(&nft_rbtree_lock); return; + } cont: iter->count++; } + spin_unlock_bh(&nft_rbtree_lock); } static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[]) @@ -201,6 +219,7 @@ static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[]) } static int nft_rbtree_init(const struct nft_set *set, + const struct nft_set_desc *desc, const struct nlattr * const nla[]) { struct nft_rbtree *priv = nft_set_priv(set); @@ -215,15 +234,37 @@ static void nft_rbtree_destroy(const struct nft_set *set) struct nft_rbtree_elem *rbe; struct rb_node *node; + spin_lock_bh(&nft_rbtree_lock); while ((node = priv->root.rb_node) != NULL) { rb_erase(node, &priv->root); rbe = rb_entry(node, struct nft_rbtree_elem, node); nft_rbtree_elem_destroy(set, rbe); } + spin_unlock_bh(&nft_rbtree_lock); +} + +static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, + struct nft_set_estimate *est) +{ + unsigned int nsize; + + nsize = sizeof(struct nft_rbtree_elem); + if (features & NFT_SET_MAP) + nsize += FIELD_SIZEOF(struct nft_rbtree_elem, data[0]); + + if (desc->size) + est->size = sizeof(struct nft_rbtree) + desc->size * nsize; + else + est->size = nsize; + + est->class = NFT_SET_CLASS_O_LOG_N; + + return true; } static struct nft_set_ops nft_rbtree_ops __read_mostly = { .privsize = nft_rbtree_privsize, + .estimate = nft_rbtree_estimate, .init = nft_rbtree_init, .destroy = nft_rbtree_destroy, .insert = nft_rbtree_insert, diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 227aa11e8409..47b978bc3100 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -711,28 +711,15 @@ void xt_free_table_info(struct xt_table_info *info) { int cpu; - for_each_possible_cpu(cpu) { - if (info->size <= PAGE_SIZE) - kfree(info->entries[cpu]); - else - vfree(info->entries[cpu]); - } + for_each_possible_cpu(cpu) + kvfree(info->entries[cpu]); if (info->jumpstack != NULL) { - if (sizeof(void *) * info->stacksize > PAGE_SIZE) { - for_each_possible_cpu(cpu) - vfree(info->jumpstack[cpu]); - } else { - for_each_possible_cpu(cpu) - kfree(info->jumpstack[cpu]); - } + for_each_possible_cpu(cpu) + kvfree(info->jumpstack[cpu]); + kvfree(info->jumpstack); } - if (sizeof(void **) * nr_cpu_ids > PAGE_SIZE) - vfree(info->jumpstack); - else - kfree(info->jumpstack); - free_percpu(info->stackptr); kfree(info); diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c index 993de2ba89d3..3ba31c194cce 100644 --- a/net/netfilter/xt_LED.c +++ b/net/netfilter/xt_LED.c @@ -50,11 +50,14 @@ struct xt_led_info_internal { struct timer_list timer; }; +#define XT_LED_BLINK_DELAY 50 /* ms */ + static unsigned int led_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_led_info *ledinfo = par->targinfo; struct xt_led_info_internal *ledinternal = ledinfo->internal_data; + unsigned long led_delay = XT_LED_BLINK_DELAY; /* * If "always blink" is enabled, and there's still some time until the @@ -62,9 +65,10 @@ led_tg(struct sk_buff *skb, const struct xt_action_param *par) */ if ((ledinfo->delay > 0) && ledinfo->always_blink && timer_pending(&ledinternal->timer)) - led_trigger_event(&ledinternal->netfilter_led_trigger, LED_OFF); - - led_trigger_event(&ledinternal->netfilter_led_trigger, LED_FULL); + led_trigger_blink_oneshot(&ledinternal->netfilter_led_trigger, + &led_delay, &led_delay, 1); + else + led_trigger_event(&ledinternal->netfilter_led_trigger, LED_FULL); /* If there's a positive delay, start/update the timer */ if (ledinfo->delay > 0) { @@ -133,9 +137,7 @@ static int led_tg_check(const struct xt_tgchk_param *par) err = led_trigger_register(&ledinternal->netfilter_led_trigger); if (err) { - pr_warning("led_trigger_register() failed\n"); - if (err == -EEXIST) - pr_warning("Trigger name is already in use.\n"); + pr_err("Trigger name is already in use.\n"); goto exit_alloc; } diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c index 5ab24843370a..c13b79440ede 100644 --- a/net/netfilter/xt_LOG.c +++ b/net/netfilter/xt_LOG.c @@ -27,806 +27,6 @@ #include <linux/netfilter/xt_LOG.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <net/netfilter/nf_log.h> -#include <net/netfilter/xt_log.h> - -static struct nf_loginfo default_loginfo = { - .type = NF_LOG_TYPE_LOG, - .u = { - .log = { - .level = 5, - .logflags = NF_LOG_MASK, - }, - }, -}; - -static int dump_udp_header(struct sbuff *m, const struct sk_buff *skb, - u8 proto, int fragment, unsigned int offset) -{ - struct udphdr _udph; - const struct udphdr *uh; - - if (proto == IPPROTO_UDP) - /* Max length: 10 "PROTO=UDP " */ - sb_add(m, "PROTO=UDP "); - else /* Max length: 14 "PROTO=UDPLITE " */ - sb_add(m, "PROTO=UDPLITE "); - - if (fragment) - goto out; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph); - if (uh == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset); - - return 1; - } - - /* Max length: 20 "SPT=65535 DPT=65535 " */ - sb_add(m, "SPT=%u DPT=%u LEN=%u ", ntohs(uh->source), ntohs(uh->dest), - ntohs(uh->len)); - -out: - return 0; -} - -static int dump_tcp_header(struct sbuff *m, const struct sk_buff *skb, - u8 proto, int fragment, unsigned int offset, - unsigned int logflags) -{ - struct tcphdr _tcph; - const struct tcphdr *th; - - /* Max length: 10 "PROTO=TCP " */ - sb_add(m, "PROTO=TCP "); - - if (fragment) - return 0; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph); - if (th == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset); - return 1; - } - - /* Max length: 20 "SPT=65535 DPT=65535 " */ - sb_add(m, "SPT=%u DPT=%u ", ntohs(th->source), ntohs(th->dest)); - /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ - if (logflags & XT_LOG_TCPSEQ) - sb_add(m, "SEQ=%u ACK=%u ", ntohl(th->seq), ntohl(th->ack_seq)); - - /* Max length: 13 "WINDOW=65535 " */ - sb_add(m, "WINDOW=%u ", ntohs(th->window)); - /* Max length: 9 "RES=0x3C " */ - sb_add(m, "RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) & - TCP_RESERVED_BITS) >> 22)); - /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */ - if (th->cwr) - sb_add(m, "CWR "); - if (th->ece) - sb_add(m, "ECE "); - if (th->urg) - sb_add(m, "URG "); - if (th->ack) - sb_add(m, "ACK "); - if (th->psh) - sb_add(m, "PSH "); - if (th->rst) - sb_add(m, "RST "); - if (th->syn) - sb_add(m, "SYN "); - if (th->fin) - sb_add(m, "FIN "); - /* Max length: 11 "URGP=65535 " */ - sb_add(m, "URGP=%u ", ntohs(th->urg_ptr)); - - if ((logflags & XT_LOG_TCPOPT) && th->doff*4 > sizeof(struct tcphdr)) { - u_int8_t _opt[60 - sizeof(struct tcphdr)]; - const u_int8_t *op; - unsigned int i; - unsigned int optsize = th->doff*4 - sizeof(struct tcphdr); - - op = skb_header_pointer(skb, offset + sizeof(struct tcphdr), - optsize, _opt); - if (op == NULL) { - sb_add(m, "OPT (TRUNCATED)"); - return 1; - } - - /* Max length: 127 "OPT (" 15*4*2chars ") " */ - sb_add(m, "OPT ("); - for (i = 0; i < optsize; i++) - sb_add(m, "%02X", op[i]); - - sb_add(m, ") "); - } - - return 0; -} - -static void dump_sk_uid_gid(struct sbuff *m, struct sock *sk) -{ - if (!sk || sk->sk_state == TCP_TIME_WAIT) - return; - - read_lock_bh(&sk->sk_callback_lock); - if (sk->sk_socket && sk->sk_socket->file) { - const struct cred *cred = sk->sk_socket->file->f_cred; - sb_add(m, "UID=%u GID=%u ", - from_kuid_munged(&init_user_ns, cred->fsuid), - from_kgid_munged(&init_user_ns, cred->fsgid)); - } - read_unlock_bh(&sk->sk_callback_lock); -} - -/* One level of recursion won't kill us */ -static void dump_ipv4_packet(struct sbuff *m, - const struct nf_loginfo *info, - const struct sk_buff *skb, - unsigned int iphoff) -{ - struct iphdr _iph; - const struct iphdr *ih; - unsigned int logflags; - - if (info->type == NF_LOG_TYPE_LOG) - logflags = info->u.log.logflags; - else - logflags = NF_LOG_MASK; - - ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph); - if (ih == NULL) { - sb_add(m, "TRUNCATED"); - return; - } - - /* Important fields: - * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */ - /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */ - sb_add(m, "SRC=%pI4 DST=%pI4 ", - &ih->saddr, &ih->daddr); - - /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */ - sb_add(m, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ", - ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK, - ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id)); - - /* Max length: 6 "CE DF MF " */ - if (ntohs(ih->frag_off) & IP_CE) - sb_add(m, "CE "); - if (ntohs(ih->frag_off) & IP_DF) - sb_add(m, "DF "); - if (ntohs(ih->frag_off) & IP_MF) - sb_add(m, "MF "); - - /* Max length: 11 "FRAG:65535 " */ - if (ntohs(ih->frag_off) & IP_OFFSET) - sb_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET); - - if ((logflags & XT_LOG_IPOPT) && - ih->ihl * 4 > sizeof(struct iphdr)) { - const unsigned char *op; - unsigned char _opt[4 * 15 - sizeof(struct iphdr)]; - unsigned int i, optsize; - - optsize = ih->ihl * 4 - sizeof(struct iphdr); - op = skb_header_pointer(skb, iphoff+sizeof(_iph), - optsize, _opt); - if (op == NULL) { - sb_add(m, "TRUNCATED"); - return; - } - - /* Max length: 127 "OPT (" 15*4*2chars ") " */ - sb_add(m, "OPT ("); - for (i = 0; i < optsize; i++) - sb_add(m, "%02X", op[i]); - sb_add(m, ") "); - } - - switch (ih->protocol) { - case IPPROTO_TCP: - if (dump_tcp_header(m, skb, ih->protocol, - ntohs(ih->frag_off) & IP_OFFSET, - iphoff+ih->ihl*4, logflags)) - return; - break; - case IPPROTO_UDP: - case IPPROTO_UDPLITE: - if (dump_udp_header(m, skb, ih->protocol, - ntohs(ih->frag_off) & IP_OFFSET, - iphoff+ih->ihl*4)) - return; - break; - case IPPROTO_ICMP: { - struct icmphdr _icmph; - const struct icmphdr *ich; - static const size_t required_len[NR_ICMP_TYPES+1] - = { [ICMP_ECHOREPLY] = 4, - [ICMP_DEST_UNREACH] - = 8 + sizeof(struct iphdr), - [ICMP_SOURCE_QUENCH] - = 8 + sizeof(struct iphdr), - [ICMP_REDIRECT] - = 8 + sizeof(struct iphdr), - [ICMP_ECHO] = 4, - [ICMP_TIME_EXCEEDED] - = 8 + sizeof(struct iphdr), - [ICMP_PARAMETERPROB] - = 8 + sizeof(struct iphdr), - [ICMP_TIMESTAMP] = 20, - [ICMP_TIMESTAMPREPLY] = 20, - [ICMP_ADDRESS] = 12, - [ICMP_ADDRESSREPLY] = 12 }; - - /* Max length: 11 "PROTO=ICMP " */ - sb_add(m, "PROTO=ICMP "); - - if (ntohs(ih->frag_off) & IP_OFFSET) - break; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - ich = skb_header_pointer(skb, iphoff + ih->ihl * 4, - sizeof(_icmph), &_icmph); - if (ich == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] ", - skb->len - iphoff - ih->ihl*4); - break; - } - - /* Max length: 18 "TYPE=255 CODE=255 " */ - sb_add(m, "TYPE=%u CODE=%u ", ich->type, ich->code); - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - if (ich->type <= NR_ICMP_TYPES && - required_len[ich->type] && - skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) { - sb_add(m, "INCOMPLETE [%u bytes] ", - skb->len - iphoff - ih->ihl*4); - break; - } - - switch (ich->type) { - case ICMP_ECHOREPLY: - case ICMP_ECHO: - /* Max length: 19 "ID=65535 SEQ=65535 " */ - sb_add(m, "ID=%u SEQ=%u ", - ntohs(ich->un.echo.id), - ntohs(ich->un.echo.sequence)); - break; - - case ICMP_PARAMETERPROB: - /* Max length: 14 "PARAMETER=255 " */ - sb_add(m, "PARAMETER=%u ", - ntohl(ich->un.gateway) >> 24); - break; - case ICMP_REDIRECT: - /* Max length: 24 "GATEWAY=255.255.255.255 " */ - sb_add(m, "GATEWAY=%pI4 ", &ich->un.gateway); - /* Fall through */ - case ICMP_DEST_UNREACH: - case ICMP_SOURCE_QUENCH: - case ICMP_TIME_EXCEEDED: - /* Max length: 3+maxlen */ - if (!iphoff) { /* Only recurse once. */ - sb_add(m, "["); - dump_ipv4_packet(m, info, skb, - iphoff + ih->ihl*4+sizeof(_icmph)); - sb_add(m, "] "); - } - - /* Max length: 10 "MTU=65535 " */ - if (ich->type == ICMP_DEST_UNREACH && - ich->code == ICMP_FRAG_NEEDED) - sb_add(m, "MTU=%u ", ntohs(ich->un.frag.mtu)); - } - break; - } - /* Max Length */ - case IPPROTO_AH: { - struct ip_auth_hdr _ahdr; - const struct ip_auth_hdr *ah; - - if (ntohs(ih->frag_off) & IP_OFFSET) - break; - - /* Max length: 9 "PROTO=AH " */ - sb_add(m, "PROTO=AH "); - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - ah = skb_header_pointer(skb, iphoff+ih->ihl*4, - sizeof(_ahdr), &_ahdr); - if (ah == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] ", - skb->len - iphoff - ih->ihl*4); - break; - } - - /* Length: 15 "SPI=0xF1234567 " */ - sb_add(m, "SPI=0x%x ", ntohl(ah->spi)); - break; - } - case IPPROTO_ESP: { - struct ip_esp_hdr _esph; - const struct ip_esp_hdr *eh; - - /* Max length: 10 "PROTO=ESP " */ - sb_add(m, "PROTO=ESP "); - - if (ntohs(ih->frag_off) & IP_OFFSET) - break; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - eh = skb_header_pointer(skb, iphoff+ih->ihl*4, - sizeof(_esph), &_esph); - if (eh == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] ", - skb->len - iphoff - ih->ihl*4); - break; - } - - /* Length: 15 "SPI=0xF1234567 " */ - sb_add(m, "SPI=0x%x ", ntohl(eh->spi)); - break; - } - /* Max length: 10 "PROTO 255 " */ - default: - sb_add(m, "PROTO=%u ", ih->protocol); - } - - /* Max length: 15 "UID=4294967295 " */ - if ((logflags & XT_LOG_UID) && !iphoff) - dump_sk_uid_gid(m, skb->sk); - - /* Max length: 16 "MARK=0xFFFFFFFF " */ - if (!iphoff && skb->mark) - sb_add(m, "MARK=0x%x ", skb->mark); - - /* Proto Max log string length */ - /* IP: 40+46+6+11+127 = 230 */ - /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */ - /* UDP: 10+max(25,20) = 35 */ - /* UDPLITE: 14+max(25,20) = 39 */ - /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */ - /* ESP: 10+max(25)+15 = 50 */ - /* AH: 9+max(25)+15 = 49 */ - /* unknown: 10 */ - - /* (ICMP allows recursion one level deep) */ - /* maxlen = IP + ICMP + IP + max(TCP,UDP,ICMP,unknown) */ - /* maxlen = 230+ 91 + 230 + 252 = 803 */ -} - -static void dump_ipv4_mac_header(struct sbuff *m, - const struct nf_loginfo *info, - const struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - unsigned int logflags = 0; - - if (info->type == NF_LOG_TYPE_LOG) - logflags = info->u.log.logflags; - - if (!(logflags & XT_LOG_MACDECODE)) - goto fallback; - - switch (dev->type) { - case ARPHRD_ETHER: - sb_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ", - eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, - ntohs(eth_hdr(skb)->h_proto)); - return; - default: - break; - } - -fallback: - sb_add(m, "MAC="); - if (dev->hard_header_len && - skb->mac_header != skb->network_header) { - const unsigned char *p = skb_mac_header(skb); - unsigned int i; - - sb_add(m, "%02x", *p++); - for (i = 1; i < dev->hard_header_len; i++, p++) - sb_add(m, ":%02x", *p); - } - sb_add(m, " "); -} - -static void -log_packet_common(struct sbuff *m, - u_int8_t pf, - unsigned int hooknum, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct nf_loginfo *loginfo, - const char *prefix) -{ - sb_add(m, KERN_SOH "%c%sIN=%s OUT=%s ", - '0' + loginfo->u.log.level, prefix, - in ? in->name : "", - out ? out->name : ""); -#ifdef CONFIG_BRIDGE_NETFILTER - if (skb->nf_bridge) { - const struct net_device *physindev; - const struct net_device *physoutdev; - - physindev = skb->nf_bridge->physindev; - if (physindev && in != physindev) - sb_add(m, "PHYSIN=%s ", physindev->name); - physoutdev = skb->nf_bridge->physoutdev; - if (physoutdev && out != physoutdev) - sb_add(m, "PHYSOUT=%s ", physoutdev->name); - } -#endif -} - - -static void -ipt_log_packet(struct net *net, - u_int8_t pf, - unsigned int hooknum, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct nf_loginfo *loginfo, - const char *prefix) -{ - struct sbuff *m; - - /* FIXME: Disabled from containers until syslog ns is supported */ - if (!net_eq(net, &init_net)) - return; - - m = sb_open(); - - if (!loginfo) - loginfo = &default_loginfo; - - log_packet_common(m, pf, hooknum, skb, in, out, loginfo, prefix); - - if (in != NULL) - dump_ipv4_mac_header(m, loginfo, skb); - - dump_ipv4_packet(m, loginfo, skb, 0); - - sb_close(m); -} - -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) -/* One level of recursion won't kill us */ -static void dump_ipv6_packet(struct sbuff *m, - const struct nf_loginfo *info, - const struct sk_buff *skb, unsigned int ip6hoff, - int recurse) -{ - u_int8_t currenthdr; - int fragment; - struct ipv6hdr _ip6h; - const struct ipv6hdr *ih; - unsigned int ptr; - unsigned int hdrlen = 0; - unsigned int logflags; - - if (info->type == NF_LOG_TYPE_LOG) - logflags = info->u.log.logflags; - else - logflags = NF_LOG_MASK; - - ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h); - if (ih == NULL) { - sb_add(m, "TRUNCATED"); - return; - } - - /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */ - sb_add(m, "SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr); - - /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */ - sb_add(m, "LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ", - ntohs(ih->payload_len) + sizeof(struct ipv6hdr), - (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20, - ih->hop_limit, - (ntohl(*(__be32 *)ih) & 0x000fffff)); - - fragment = 0; - ptr = ip6hoff + sizeof(struct ipv6hdr); - currenthdr = ih->nexthdr; - while (currenthdr != NEXTHDR_NONE && ip6t_ext_hdr(currenthdr)) { - struct ipv6_opt_hdr _hdr; - const struct ipv6_opt_hdr *hp; - - hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - if (hp == NULL) { - sb_add(m, "TRUNCATED"); - return; - } - - /* Max length: 48 "OPT (...) " */ - if (logflags & XT_LOG_IPOPT) - sb_add(m, "OPT ( "); - - switch (currenthdr) { - case IPPROTO_FRAGMENT: { - struct frag_hdr _fhdr; - const struct frag_hdr *fh; - - sb_add(m, "FRAG:"); - fh = skb_header_pointer(skb, ptr, sizeof(_fhdr), - &_fhdr); - if (fh == NULL) { - sb_add(m, "TRUNCATED "); - return; - } - - /* Max length: 6 "65535 " */ - sb_add(m, "%u ", ntohs(fh->frag_off) & 0xFFF8); - - /* Max length: 11 "INCOMPLETE " */ - if (fh->frag_off & htons(0x0001)) - sb_add(m, "INCOMPLETE "); - - sb_add(m, "ID:%08x ", ntohl(fh->identification)); - - if (ntohs(fh->frag_off) & 0xFFF8) - fragment = 1; - - hdrlen = 8; - - break; - } - case IPPROTO_DSTOPTS: - case IPPROTO_ROUTING: - case IPPROTO_HOPOPTS: - if (fragment) { - if (logflags & XT_LOG_IPOPT) - sb_add(m, ")"); - return; - } - hdrlen = ipv6_optlen(hp); - break; - /* Max Length */ - case IPPROTO_AH: - if (logflags & XT_LOG_IPOPT) { - struct ip_auth_hdr _ahdr; - const struct ip_auth_hdr *ah; - - /* Max length: 3 "AH " */ - sb_add(m, "AH "); - - if (fragment) { - sb_add(m, ")"); - return; - } - - ah = skb_header_pointer(skb, ptr, sizeof(_ahdr), - &_ahdr); - if (ah == NULL) { - /* - * Max length: 26 "INCOMPLETE [65535 - * bytes] )" - */ - sb_add(m, "INCOMPLETE [%u bytes] )", - skb->len - ptr); - return; - } - - /* Length: 15 "SPI=0xF1234567 */ - sb_add(m, "SPI=0x%x ", ntohl(ah->spi)); - - } - - hdrlen = (hp->hdrlen+2)<<2; - break; - case IPPROTO_ESP: - if (logflags & XT_LOG_IPOPT) { - struct ip_esp_hdr _esph; - const struct ip_esp_hdr *eh; - - /* Max length: 4 "ESP " */ - sb_add(m, "ESP "); - - if (fragment) { - sb_add(m, ")"); - return; - } - - /* - * Max length: 26 "INCOMPLETE [65535 bytes] )" - */ - eh = skb_header_pointer(skb, ptr, sizeof(_esph), - &_esph); - if (eh == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] )", - skb->len - ptr); - return; - } - - /* Length: 16 "SPI=0xF1234567 )" */ - sb_add(m, "SPI=0x%x )", ntohl(eh->spi)); - - } - return; - default: - /* Max length: 20 "Unknown Ext Hdr 255" */ - sb_add(m, "Unknown Ext Hdr %u", currenthdr); - return; - } - if (logflags & XT_LOG_IPOPT) - sb_add(m, ") "); - - currenthdr = hp->nexthdr; - ptr += hdrlen; - } - - switch (currenthdr) { - case IPPROTO_TCP: - if (dump_tcp_header(m, skb, currenthdr, fragment, ptr, - logflags)) - return; - break; - case IPPROTO_UDP: - case IPPROTO_UDPLITE: - if (dump_udp_header(m, skb, currenthdr, fragment, ptr)) - return; - break; - case IPPROTO_ICMPV6: { - struct icmp6hdr _icmp6h; - const struct icmp6hdr *ic; - - /* Max length: 13 "PROTO=ICMPv6 " */ - sb_add(m, "PROTO=ICMPv6 "); - - if (fragment) - break; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h); - if (ic == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr); - return; - } - - /* Max length: 18 "TYPE=255 CODE=255 " */ - sb_add(m, "TYPE=%u CODE=%u ", ic->icmp6_type, ic->icmp6_code); - - switch (ic->icmp6_type) { - case ICMPV6_ECHO_REQUEST: - case ICMPV6_ECHO_REPLY: - /* Max length: 19 "ID=65535 SEQ=65535 " */ - sb_add(m, "ID=%u SEQ=%u ", - ntohs(ic->icmp6_identifier), - ntohs(ic->icmp6_sequence)); - break; - case ICMPV6_MGM_QUERY: - case ICMPV6_MGM_REPORT: - case ICMPV6_MGM_REDUCTION: - break; - - case ICMPV6_PARAMPROB: - /* Max length: 17 "POINTER=ffffffff " */ - sb_add(m, "POINTER=%08x ", ntohl(ic->icmp6_pointer)); - /* Fall through */ - case ICMPV6_DEST_UNREACH: - case ICMPV6_PKT_TOOBIG: - case ICMPV6_TIME_EXCEED: - /* Max length: 3+maxlen */ - if (recurse) { - sb_add(m, "["); - dump_ipv6_packet(m, info, skb, - ptr + sizeof(_icmp6h), 0); - sb_add(m, "] "); - } - - /* Max length: 10 "MTU=65535 " */ - if (ic->icmp6_type == ICMPV6_PKT_TOOBIG) - sb_add(m, "MTU=%u ", ntohl(ic->icmp6_mtu)); - } - break; - } - /* Max length: 10 "PROTO=255 " */ - default: - sb_add(m, "PROTO=%u ", currenthdr); - } - - /* Max length: 15 "UID=4294967295 " */ - if ((logflags & XT_LOG_UID) && recurse) - dump_sk_uid_gid(m, skb->sk); - - /* Max length: 16 "MARK=0xFFFFFFFF " */ - if (recurse && skb->mark) - sb_add(m, "MARK=0x%x ", skb->mark); -} - -static void dump_ipv6_mac_header(struct sbuff *m, - const struct nf_loginfo *info, - const struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - unsigned int logflags = 0; - - if (info->type == NF_LOG_TYPE_LOG) - logflags = info->u.log.logflags; - - if (!(logflags & XT_LOG_MACDECODE)) - goto fallback; - - switch (dev->type) { - case ARPHRD_ETHER: - sb_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ", - eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, - ntohs(eth_hdr(skb)->h_proto)); - return; - default: - break; - } - -fallback: - sb_add(m, "MAC="); - if (dev->hard_header_len && - skb->mac_header != skb->network_header) { - const unsigned char *p = skb_mac_header(skb); - unsigned int len = dev->hard_header_len; - unsigned int i; - - if (dev->type == ARPHRD_SIT) { - p -= ETH_HLEN; - - if (p < skb->head) - p = NULL; - } - - if (p != NULL) { - sb_add(m, "%02x", *p++); - for (i = 1; i < len; i++) - sb_add(m, ":%02x", *p++); - } - sb_add(m, " "); - - if (dev->type == ARPHRD_SIT) { - const struct iphdr *iph = - (struct iphdr *)skb_mac_header(skb); - sb_add(m, "TUNNEL=%pI4->%pI4 ", &iph->saddr, - &iph->daddr); - } - } else - sb_add(m, " "); -} - -static void -ip6t_log_packet(struct net *net, - u_int8_t pf, - unsigned int hooknum, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct nf_loginfo *loginfo, - const char *prefix) -{ - struct sbuff *m; - - /* FIXME: Disabled from containers until syslog ns is supported */ - if (!net_eq(net, &init_net)) - return; - - m = sb_open(); - - if (!loginfo) - loginfo = &default_loginfo; - - log_packet_common(m, pf, hooknum, skb, in, out, loginfo, prefix); - - if (in != NULL) - dump_ipv6_mac_header(m, loginfo, skb); - - dump_ipv6_packet(m, loginfo, skb, skb_network_offset(skb), 1); - - sb_close(m); -} -#endif static unsigned int log_tg(struct sk_buff *skb, const struct xt_action_param *par) @@ -839,17 +39,8 @@ log_tg(struct sk_buff *skb, const struct xt_action_param *par) li.u.log.level = loginfo->level; li.u.log.logflags = loginfo->logflags; - if (par->family == NFPROTO_IPV4) - ipt_log_packet(net, NFPROTO_IPV4, par->hooknum, skb, par->in, - par->out, &li, loginfo->prefix); -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) - else if (par->family == NFPROTO_IPV6) - ip6t_log_packet(net, NFPROTO_IPV6, par->hooknum, skb, par->in, - par->out, &li, loginfo->prefix); -#endif - else - WARN_ON_ONCE(1); - + nf_log_packet(net, par->family, par->hooknum, skb, par->in, par->out, + &li, "%s", loginfo->prefix); return XT_CONTINUE; } @@ -870,7 +61,12 @@ static int log_tg_check(const struct xt_tgchk_param *par) return -EINVAL; } - return 0; + return nf_logger_find_get(par->family, NF_LOG_TYPE_LOG); +} + +static void log_tg_destroy(const struct xt_tgdtor_param *par) +{ + nf_logger_put(par->family, NF_LOG_TYPE_LOG); } static struct xt_target log_tg_regs[] __read_mostly = { @@ -880,6 +76,7 @@ static struct xt_target log_tg_regs[] __read_mostly = { .target = log_tg, .targetsize = sizeof(struct xt_log_info), .checkentry = log_tg_check, + .destroy = log_tg_destroy, .me = THIS_MODULE, }, #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) @@ -889,78 +86,19 @@ static struct xt_target log_tg_regs[] __read_mostly = { .target = log_tg, .targetsize = sizeof(struct xt_log_info), .checkentry = log_tg_check, + .destroy = log_tg_destroy, .me = THIS_MODULE, }, #endif }; -static struct nf_logger ipt_log_logger __read_mostly = { - .name = "ipt_LOG", - .logfn = &ipt_log_packet, - .me = THIS_MODULE, -}; - -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) -static struct nf_logger ip6t_log_logger __read_mostly = { - .name = "ip6t_LOG", - .logfn = &ip6t_log_packet, - .me = THIS_MODULE, -}; -#endif - -static int __net_init log_net_init(struct net *net) -{ - nf_log_set(net, NFPROTO_IPV4, &ipt_log_logger); -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) - nf_log_set(net, NFPROTO_IPV6, &ip6t_log_logger); -#endif - return 0; -} - -static void __net_exit log_net_exit(struct net *net) -{ - nf_log_unset(net, &ipt_log_logger); -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) - nf_log_unset(net, &ip6t_log_logger); -#endif -} - -static struct pernet_operations log_net_ops = { - .init = log_net_init, - .exit = log_net_exit, -}; - static int __init log_tg_init(void) { - int ret; - - ret = register_pernet_subsys(&log_net_ops); - if (ret < 0) - goto err_pernet; - - ret = xt_register_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs)); - if (ret < 0) - goto err_target; - - nf_log_register(NFPROTO_IPV4, &ipt_log_logger); -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) - nf_log_register(NFPROTO_IPV6, &ip6t_log_logger); -#endif - return 0; - -err_target: - unregister_pernet_subsys(&log_net_ops); -err_pernet: - return ret; + return xt_register_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs)); } static void __exit log_tg_exit(void) { - unregister_pernet_subsys(&log_net_ops); - nf_log_unregister(&ipt_log_logger); -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) - nf_log_unregister(&ip6t_log_logger); -#endif xt_unregister_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs)); } diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c index 12d4da8e6c77..dffee9d47ec4 100644 --- a/net/netfilter/xt_bpf.c +++ b/net/netfilter/xt_bpf.c @@ -23,11 +23,12 @@ MODULE_ALIAS("ip6t_bpf"); static int bpf_mt_check(const struct xt_mtchk_param *par) { struct xt_bpf_info *info = par->matchinfo; - struct sock_fprog program; + struct sock_fprog_kern program; program.len = info->bpf_program_num_elem; - program.filter = (struct sock_filter __user *) info->bpf_program; - if (sk_unattached_filter_create(&info->filter, &program)) { + program.filter = info->bpf_program; + + if (bpf_prog_create(&info->filter, &program)) { pr_info("bpf: check failed: parse error\n"); return -EINVAL; } @@ -39,13 +40,13 @@ static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_bpf_info *info = par->matchinfo; - return SK_RUN_FILTER(info->filter, skb); + return BPF_PROG_RUN(info->filter, skb); } static void bpf_mt_destroy(const struct xt_mtdtor_param *par) { const struct xt_bpf_info *info = par->matchinfo; - sk_unattached_filter_destroy(info->filter); + bpf_prog_destroy(info->filter); } static struct xt_match bpf_mt_reg __read_mostly = { diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index a3910fc2122b..47dc6836830a 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -104,7 +104,7 @@ struct xt_hashlimit_htable { spinlock_t lock; /* lock for list_head */ u_int32_t rnd; /* random seed for hash */ unsigned int count; /* number entries in table */ - struct timer_list timer; /* timer for gc */ + struct delayed_work gc_work; /* seq_file stuff */ struct proc_dir_entry *pde; @@ -213,7 +213,7 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent) call_rcu_bh(&ent->rcu, dsthash_free_rcu); ht->count--; } -static void htable_gc(unsigned long htlong); +static void htable_gc(struct work_struct *work); static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo, u_int8_t family) @@ -273,9 +273,9 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo, } hinfo->net = net; - setup_timer(&hinfo->timer, htable_gc, (unsigned long)hinfo); - hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval); - add_timer(&hinfo->timer); + INIT_DEFERRABLE_WORK(&hinfo->gc_work, htable_gc); + queue_delayed_work(system_power_efficient_wq, &hinfo->gc_work, + msecs_to_jiffies(hinfo->cfg.gc_interval)); hlist_add_head(&hinfo->node, &hashlimit_net->htables); @@ -300,29 +300,30 @@ static void htable_selective_cleanup(struct xt_hashlimit_htable *ht, { unsigned int i; - /* lock hash table and iterate over it */ - spin_lock_bh(&ht->lock); for (i = 0; i < ht->cfg.size; i++) { struct dsthash_ent *dh; struct hlist_node *n; + + spin_lock_bh(&ht->lock); hlist_for_each_entry_safe(dh, n, &ht->hash[i], node) { if ((*select)(ht, dh)) dsthash_free(ht, dh); } + spin_unlock_bh(&ht->lock); + cond_resched(); } - spin_unlock_bh(&ht->lock); } -/* hash table garbage collector, run by timer */ -static void htable_gc(unsigned long htlong) +static void htable_gc(struct work_struct *work) { - struct xt_hashlimit_htable *ht = (struct xt_hashlimit_htable *)htlong; + struct xt_hashlimit_htable *ht; + + ht = container_of(work, struct xt_hashlimit_htable, gc_work.work); htable_selective_cleanup(ht, select_gc); - /* re-add the timer accordingly */ - ht->timer.expires = jiffies + msecs_to_jiffies(ht->cfg.gc_interval); - add_timer(&ht->timer); + queue_delayed_work(system_power_efficient_wq, + &ht->gc_work, msecs_to_jiffies(ht->cfg.gc_interval)); } static void htable_remove_proc_entry(struct xt_hashlimit_htable *hinfo) @@ -341,7 +342,7 @@ static void htable_remove_proc_entry(struct xt_hashlimit_htable *hinfo) static void htable_destroy(struct xt_hashlimit_htable *hinfo) { - del_timer_sync(&hinfo->timer); + cancel_delayed_work_sync(&hinfo->gc_work); htable_remove_proc_entry(hinfo); htable_selective_cleanup(hinfo, select_all); kfree(hinfo->name); diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c index b3be0ef21f19..8c646ed9c921 100644 --- a/net/netfilter/xt_nfacct.c +++ b/net/netfilter/xt_nfacct.c @@ -21,11 +21,14 @@ MODULE_ALIAS("ip6t_nfacct"); static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par) { + int overquota; const struct xt_nfacct_match_info *info = par->targinfo; nfnl_acct_update(skb, info->nfacct); - return true; + overquota = nfnl_acct_overquota(skb, info->nfacct); + + return overquota == NFACCT_UNDERQUOTA ? false : true; } static int diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 1e657cf715c4..a9faae89f955 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -313,10 +313,7 @@ out: static void recent_table_free(void *addr) { - if (is_vmalloc_addr(addr)) - vfree(addr); - else - kfree(addr); + kvfree(addr); } static int recent_mt_check(const struct xt_mtchk_param *par, diff --git a/net/netfilter/xt_repldata.h b/net/netfilter/xt_repldata.h index 6efe4e5a81c6..8fd324116e6f 100644 --- a/net/netfilter/xt_repldata.h +++ b/net/netfilter/xt_repldata.h @@ -5,23 +5,35 @@ * they serve as the hanging-off data accessed through repl.data[]. */ +/* tbl has the following structure equivalent, but is C99 compliant: + * struct { + * struct type##_replace repl; + * struct type##_standard entries[nhooks]; + * struct type##_error term; + * } *tbl; + */ + #define xt_alloc_initial_table(type, typ2) ({ \ unsigned int hook_mask = info->valid_hooks; \ unsigned int nhooks = hweight32(hook_mask); \ unsigned int bytes = 0, hooknum = 0, i = 0; \ struct { \ struct type##_replace repl; \ - struct type##_standard entries[nhooks]; \ - struct type##_error term; \ - } *tbl = kzalloc(sizeof(*tbl), GFP_KERNEL); \ + struct type##_standard entries[]; \ + } *tbl; \ + struct type##_error *term; \ + size_t term_offset = (offsetof(typeof(*tbl), entries[nhooks]) + \ + __alignof__(*term) - 1) & ~(__alignof__(*term) - 1); \ + tbl = kzalloc(term_offset + sizeof(*term), GFP_KERNEL); \ if (tbl == NULL) \ return NULL; \ + term = (struct type##_error *)&(((char *)tbl)[term_offset]); \ strncpy(tbl->repl.name, info->name, sizeof(tbl->repl.name)); \ - tbl->term = (struct type##_error)typ2##_ERROR_INIT; \ + *term = (struct type##_error)typ2##_ERROR_INIT; \ tbl->repl.valid_hooks = hook_mask; \ tbl->repl.num_entries = nhooks + 1; \ tbl->repl.size = nhooks * sizeof(struct type##_standard) + \ - sizeof(struct type##_error); \ + sizeof(struct type##_error); \ for (; hook_mask != 0; hook_mask >>= 1, ++hooknum) { \ if (!(hook_mask & 1)) \ continue; \ diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 3045a964f39c..0b4692dd1c5e 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -170,7 +170,6 @@ int netlbl_cfg_unlbl_map_add(const char *domain, #endif /* IPv6 */ default: goto cfg_unlbl_map_add_failure; - break; } entry->def.addrsel = addrmap; @@ -405,8 +404,72 @@ out_entry: * Security Attribute Functions */ +#define _CM_F_NONE 0x00000000 +#define _CM_F_ALLOC 0x00000001 +#define _CM_F_WALK 0x00000002 + +/** + * _netlbl_catmap_getnode - Get a individual node from a catmap + * @catmap: pointer to the category bitmap + * @offset: the requested offset + * @cm_flags: catmap flags, see _CM_F_* + * @gfp_flags: memory allocation flags + * + * Description: + * Iterate through the catmap looking for the node associated with @offset. + * If the _CM_F_ALLOC flag is set in @cm_flags and there is no associated node, + * one will be created and inserted into the catmap. If the _CM_F_WALK flag is + * set in @cm_flags and there is no associated node, the next highest node will + * be returned. Returns a pointer to the node on success, NULL on failure. + * + */ +static struct netlbl_lsm_catmap *_netlbl_catmap_getnode( + struct netlbl_lsm_catmap **catmap, + u32 offset, + unsigned int cm_flags, + gfp_t gfp_flags) +{ + struct netlbl_lsm_catmap *iter = *catmap; + struct netlbl_lsm_catmap *prev = NULL; + + if (iter == NULL) + goto catmap_getnode_alloc; + if (offset < iter->startbit) + goto catmap_getnode_walk; + while (iter && offset >= (iter->startbit + NETLBL_CATMAP_SIZE)) { + prev = iter; + iter = iter->next; + } + if (iter == NULL || offset < iter->startbit) + goto catmap_getnode_walk; + + return iter; + +catmap_getnode_walk: + if (cm_flags & _CM_F_WALK) + return iter; +catmap_getnode_alloc: + if (!(cm_flags & _CM_F_ALLOC)) + return NULL; + + iter = netlbl_catmap_alloc(gfp_flags); + if (iter == NULL) + return NULL; + iter->startbit = offset & ~(NETLBL_CATMAP_SIZE - 1); + + if (prev == NULL) { + iter->next = *catmap; + *catmap = iter; + } else { + iter->next = prev->next; + prev->next = iter; + } + + return iter; +} + /** - * netlbl_secattr_catmap_walk - Walk a LSM secattr catmap looking for a bit + * netlbl_catmap_walk - Walk a LSM secattr catmap looking for a bit * @catmap: the category bitmap * @offset: the offset to start searching at, in bits * @@ -415,54 +478,51 @@ out_entry: * returns the spot of the first set bit or -ENOENT if no bits are set. * */ -int netlbl_secattr_catmap_walk(struct netlbl_lsm_secattr_catmap *catmap, - u32 offset) +int netlbl_catmap_walk(struct netlbl_lsm_catmap *catmap, u32 offset) { - struct netlbl_lsm_secattr_catmap *iter = catmap; - u32 node_idx; - u32 node_bit; + struct netlbl_lsm_catmap *iter = catmap; + u32 idx; + u32 bit; NETLBL_CATMAP_MAPTYPE bitmap; + iter = _netlbl_catmap_getnode(&catmap, offset, _CM_F_WALK, 0); + if (iter == NULL) + return -ENOENT; if (offset > iter->startbit) { - while (offset >= (iter->startbit + NETLBL_CATMAP_SIZE)) { - iter = iter->next; - if (iter == NULL) - return -ENOENT; - } - node_idx = (offset - iter->startbit) / NETLBL_CATMAP_MAPSIZE; - node_bit = offset - iter->startbit - - (NETLBL_CATMAP_MAPSIZE * node_idx); + offset -= iter->startbit; + idx = offset / NETLBL_CATMAP_MAPSIZE; + bit = offset % NETLBL_CATMAP_MAPSIZE; } else { - node_idx = 0; - node_bit = 0; + idx = 0; + bit = 0; } - bitmap = iter->bitmap[node_idx] >> node_bit; + bitmap = iter->bitmap[idx] >> bit; for (;;) { if (bitmap != 0) { while ((bitmap & NETLBL_CATMAP_BIT) == 0) { bitmap >>= 1; - node_bit++; + bit++; } return iter->startbit + - (NETLBL_CATMAP_MAPSIZE * node_idx) + node_bit; + (NETLBL_CATMAP_MAPSIZE * idx) + bit; } - if (++node_idx >= NETLBL_CATMAP_MAPCNT) { + if (++idx >= NETLBL_CATMAP_MAPCNT) { if (iter->next != NULL) { iter = iter->next; - node_idx = 0; + idx = 0; } else return -ENOENT; } - bitmap = iter->bitmap[node_idx]; - node_bit = 0; + bitmap = iter->bitmap[idx]; + bit = 0; } return -ENOENT; } /** - * netlbl_secattr_catmap_walk_rng - Find the end of a string of set bits + * netlbl_catmap_walkrng - Find the end of a string of set bits * @catmap: the category bitmap * @offset: the offset to start searching at, in bits * @@ -472,57 +532,105 @@ int netlbl_secattr_catmap_walk(struct netlbl_lsm_secattr_catmap *catmap, * the end of the bitmap. * */ -int netlbl_secattr_catmap_walk_rng(struct netlbl_lsm_secattr_catmap *catmap, - u32 offset) +int netlbl_catmap_walkrng(struct netlbl_lsm_catmap *catmap, u32 offset) { - struct netlbl_lsm_secattr_catmap *iter = catmap; - u32 node_idx; - u32 node_bit; + struct netlbl_lsm_catmap *iter; + struct netlbl_lsm_catmap *prev = NULL; + u32 idx; + u32 bit; NETLBL_CATMAP_MAPTYPE bitmask; NETLBL_CATMAP_MAPTYPE bitmap; + iter = _netlbl_catmap_getnode(&catmap, offset, _CM_F_WALK, 0); + if (iter == NULL) + return -ENOENT; if (offset > iter->startbit) { - while (offset >= (iter->startbit + NETLBL_CATMAP_SIZE)) { - iter = iter->next; - if (iter == NULL) - return -ENOENT; - } - node_idx = (offset - iter->startbit) / NETLBL_CATMAP_MAPSIZE; - node_bit = offset - iter->startbit - - (NETLBL_CATMAP_MAPSIZE * node_idx); + offset -= iter->startbit; + idx = offset / NETLBL_CATMAP_MAPSIZE; + bit = offset % NETLBL_CATMAP_MAPSIZE; } else { - node_idx = 0; - node_bit = 0; + idx = 0; + bit = 0; } - bitmask = NETLBL_CATMAP_BIT << node_bit; + bitmask = NETLBL_CATMAP_BIT << bit; for (;;) { - bitmap = iter->bitmap[node_idx]; + bitmap = iter->bitmap[idx]; while (bitmask != 0 && (bitmap & bitmask) != 0) { bitmask <<= 1; - node_bit++; + bit++; } - if (bitmask != 0) + if (prev && idx == 0 && bit == 0) + return prev->startbit + NETLBL_CATMAP_SIZE - 1; + else if (bitmask != 0) return iter->startbit + - (NETLBL_CATMAP_MAPSIZE * node_idx) + - node_bit - 1; - else if (++node_idx >= NETLBL_CATMAP_MAPCNT) { + (NETLBL_CATMAP_MAPSIZE * idx) + bit - 1; + else if (++idx >= NETLBL_CATMAP_MAPCNT) { if (iter->next == NULL) - return iter->startbit + NETLBL_CATMAP_SIZE - 1; + return iter->startbit + NETLBL_CATMAP_SIZE - 1; + prev = iter; iter = iter->next; - node_idx = 0; + idx = 0; } bitmask = NETLBL_CATMAP_BIT; - node_bit = 0; + bit = 0; } return -ENOENT; } /** - * netlbl_secattr_catmap_setbit - Set a bit in a LSM secattr catmap - * @catmap: the category bitmap + * netlbl_catmap_getlong - Export an unsigned long bitmap + * @catmap: pointer to the category bitmap + * @offset: pointer to the requested offset + * @bitmap: the exported bitmap + * + * Description: + * Export a bitmap with an offset greater than or equal to @offset and return + * it in @bitmap. The @offset must be aligned to an unsigned long and will be + * updated on return if different from what was requested; if the catmap is + * empty at the requested offset and beyond, the @offset is set to (u32)-1. + * Returns zero on sucess, negative values on failure. + * + */ +int netlbl_catmap_getlong(struct netlbl_lsm_catmap *catmap, + u32 *offset, + unsigned long *bitmap) +{ + struct netlbl_lsm_catmap *iter; + u32 off = *offset; + u32 idx; + + /* only allow aligned offsets */ + if ((off & (BITS_PER_LONG - 1)) != 0) + return -EINVAL; + + if (off < catmap->startbit) { + off = catmap->startbit; + *offset = off; + } + iter = _netlbl_catmap_getnode(&catmap, off, _CM_F_NONE, 0); + if (iter == NULL) { + *offset = (u32)-1; + return 0; + } + + if (off < iter->startbit) { + off = iter->startbit; + *offset = off; + } else + off -= iter->startbit; + + idx = off / NETLBL_CATMAP_MAPSIZE; + *bitmap = iter->bitmap[idx] >> (off % NETLBL_CATMAP_SIZE); + + return 0; +} + +/** + * netlbl_catmap_setbit - Set a bit in a LSM secattr catmap + * @catmap: pointer to the category bitmap * @bit: the bit to set * @flags: memory allocation flags * @@ -531,36 +639,27 @@ int netlbl_secattr_catmap_walk_rng(struct netlbl_lsm_secattr_catmap *catmap, * negative values on failure. * */ -int netlbl_secattr_catmap_setbit(struct netlbl_lsm_secattr_catmap *catmap, - u32 bit, - gfp_t flags) +int netlbl_catmap_setbit(struct netlbl_lsm_catmap **catmap, + u32 bit, + gfp_t flags) { - struct netlbl_lsm_secattr_catmap *iter = catmap; - u32 node_bit; - u32 node_idx; + struct netlbl_lsm_catmap *iter; + u32 idx; - while (iter->next != NULL && - bit >= (iter->startbit + NETLBL_CATMAP_SIZE)) - iter = iter->next; - if (bit >= (iter->startbit + NETLBL_CATMAP_SIZE)) { - iter->next = netlbl_secattr_catmap_alloc(flags); - if (iter->next == NULL) - return -ENOMEM; - iter = iter->next; - iter->startbit = bit & ~(NETLBL_CATMAP_SIZE - 1); - } + iter = _netlbl_catmap_getnode(catmap, bit, _CM_F_ALLOC, flags); + if (iter == NULL) + return -ENOMEM; - /* gcc always rounds to zero when doing integer division */ - node_idx = (bit - iter->startbit) / NETLBL_CATMAP_MAPSIZE; - node_bit = bit - iter->startbit - (NETLBL_CATMAP_MAPSIZE * node_idx); - iter->bitmap[node_idx] |= NETLBL_CATMAP_BIT << node_bit; + bit -= iter->startbit; + idx = bit / NETLBL_CATMAP_MAPSIZE; + iter->bitmap[idx] |= NETLBL_CATMAP_BIT << (bit % NETLBL_CATMAP_MAPSIZE); return 0; } /** - * netlbl_secattr_catmap_setrng - Set a range of bits in a LSM secattr catmap - * @catmap: the category bitmap + * netlbl_catmap_setrng - Set a range of bits in a LSM secattr catmap + * @catmap: pointer to the category bitmap * @start: the starting bit * @end: the last bit in the string * @flags: memory allocation flags @@ -570,36 +669,63 @@ int netlbl_secattr_catmap_setbit(struct netlbl_lsm_secattr_catmap *catmap, * on success, negative values on failure. * */ -int netlbl_secattr_catmap_setrng(struct netlbl_lsm_secattr_catmap *catmap, - u32 start, - u32 end, - gfp_t flags) +int netlbl_catmap_setrng(struct netlbl_lsm_catmap **catmap, + u32 start, + u32 end, + gfp_t flags) { - int ret_val = 0; - struct netlbl_lsm_secattr_catmap *iter = catmap; - u32 iter_max_spot; - u32 spot; - - /* XXX - This could probably be made a bit faster by combining writes - * to the catmap instead of setting a single bit each time, but for - * right now skipping to the start of the range in the catmap should - * be a nice improvement over calling the individual setbit function - * repeatedly from a loop. */ - - while (iter->next != NULL && - start >= (iter->startbit + NETLBL_CATMAP_SIZE)) - iter = iter->next; - iter_max_spot = iter->startbit + NETLBL_CATMAP_SIZE; - - for (spot = start; spot <= end && ret_val == 0; spot++) { - if (spot >= iter_max_spot && iter->next != NULL) { - iter = iter->next; - iter_max_spot = iter->startbit + NETLBL_CATMAP_SIZE; - } - ret_val = netlbl_secattr_catmap_setbit(iter, spot, flags); + int rc = 0; + u32 spot = start; + + while (rc == 0 && spot <= end) { + if (((spot & (BITS_PER_LONG - 1)) != 0) && + ((end - spot) > BITS_PER_LONG)) { + rc = netlbl_catmap_setlong(catmap, + spot, + (unsigned long)-1, + flags); + spot += BITS_PER_LONG; + } else + rc = netlbl_catmap_setbit(catmap, spot++, flags); } - return ret_val; + return rc; +} + +/** + * netlbl_catmap_setlong - Import an unsigned long bitmap + * @catmap: pointer to the category bitmap + * @offset: offset to the start of the imported bitmap + * @bitmap: the bitmap to import + * @flags: memory allocation flags + * + * Description: + * Import the bitmap specified in @bitmap into @catmap, using the offset + * in @offset. The offset must be aligned to an unsigned long. Returns zero + * on success, negative values on failure. + * + */ +int netlbl_catmap_setlong(struct netlbl_lsm_catmap **catmap, + u32 offset, + unsigned long bitmap, + gfp_t flags) +{ + struct netlbl_lsm_catmap *iter; + u32 idx; + + /* only allow aligned offsets */ + if ((offset & (BITS_PER_LONG - 1)) != 0) + return -EINVAL; + + iter = _netlbl_catmap_getnode(catmap, offset, _CM_F_ALLOC, flags); + if (iter == NULL) + return -ENOMEM; + + offset -= iter->startbit; + idx = offset / NETLBL_CATMAP_MAPSIZE; + iter->bitmap[idx] |= bitmap << (offset % NETLBL_CATMAP_MAPSIZE); + + return 0; } /* diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 894cda0206bb..479a344563d8 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -58,7 +58,9 @@ #include <linux/mutex.h> #include <linux/vmalloc.h> #include <linux/if_arp.h> +#include <linux/rhashtable.h> #include <asm/cacheflush.h> +#include <linux/hash.h> #include <net/net_namespace.h> #include <net/sock.h> @@ -100,6 +102,18 @@ static atomic_t nl_table_users = ATOMIC_INIT(0); #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock)); +/* Protects netlink socket hash table mutations */ +DEFINE_MUTEX(nl_sk_hash_lock); + +static int lockdep_nl_sk_hash_is_held(void) +{ +#ifdef CONFIG_LOCKDEP + return (debug_locks) ? lockdep_is_held(&nl_sk_hash_lock) : 1; +#else + return 1; +#endif +} + static ATOMIC_NOTIFIER_HEAD(netlink_chain); static DEFINE_SPINLOCK(netlink_tap_lock); @@ -110,11 +124,6 @@ static inline u32 netlink_group_mask(u32 group) return group ? 1 << (group - 1) : 0; } -static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u32 portid) -{ - return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask]; -} - int netlink_add_tap(struct netlink_tap *nt) { if (unlikely(nt->dev->type != ARPHRD_NETLINK)) @@ -170,7 +179,6 @@ EXPORT_SYMBOL_GPL(netlink_remove_tap); static bool netlink_filter_tap(const struct sk_buff *skb) { struct sock *sk = skb->sk; - bool pass = false; /* We take the more conservative approach and * whitelist socket protocols that may pass. @@ -184,11 +192,10 @@ static bool netlink_filter_tap(const struct sk_buff *skb) case NETLINK_FIB_LOOKUP: case NETLINK_NETFILTER: case NETLINK_GENERIC: - pass = true; - break; + return true; } - return pass; + return false; } static int __netlink_deliver_tap_skb(struct sk_buff *skb, @@ -376,7 +383,7 @@ static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, if ((int)req->nm_block_size <= 0) return -EINVAL; - if (!IS_ALIGNED(req->nm_block_size, PAGE_SIZE)) + if (!PAGE_ALIGNED(req->nm_block_size)) return -EINVAL; if (req->nm_frame_size < NL_MMAP_HDRLEN) return -EINVAL; @@ -636,7 +643,7 @@ static unsigned int netlink_poll(struct file *file, struct socket *sock, while (nlk->cb_running && netlink_dump_space(nlk)) { err = netlink_dump(sk); if (err < 0) { - sk->sk_err = err; + sk->sk_err = -err; sk->sk_error_report(sk); break; } @@ -985,105 +992,48 @@ netlink_unlock_table(void) wake_up(&nl_table_wait); } -static bool netlink_compare(struct net *net, struct sock *sk) +struct netlink_compare_arg { - return net_eq(sock_net(sk), net); -} - -static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) -{ - struct netlink_table *table = &nl_table[protocol]; - struct nl_portid_hash *hash = &table->hash; - struct hlist_head *head; - struct sock *sk; - - read_lock(&nl_table_lock); - head = nl_portid_hashfn(hash, portid); - sk_for_each(sk, head) { - if (table->compare(net, sk) && - (nlk_sk(sk)->portid == portid)) { - sock_hold(sk); - goto found; - } - } - sk = NULL; -found: - read_unlock(&nl_table_lock); - return sk; -} + struct net *net; + u32 portid; +}; -static struct hlist_head *nl_portid_hash_zalloc(size_t size) +static bool netlink_compare(void *ptr, void *arg) { - if (size <= PAGE_SIZE) - return kzalloc(size, GFP_ATOMIC); - else - return (struct hlist_head *) - __get_free_pages(GFP_ATOMIC | __GFP_ZERO, - get_order(size)); -} + struct netlink_compare_arg *x = arg; + struct sock *sk = ptr; -static void nl_portid_hash_free(struct hlist_head *table, size_t size) -{ - if (size <= PAGE_SIZE) - kfree(table); - else - free_pages((unsigned long)table, get_order(size)); + return nlk_sk(sk)->portid == x->portid && + net_eq(sock_net(sk), x->net); } -static int nl_portid_hash_rehash(struct nl_portid_hash *hash, int grow) +static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid, + struct net *net) { - unsigned int omask, mask, shift; - size_t osize, size; - struct hlist_head *otable, *table; - int i; - - omask = mask = hash->mask; - osize = size = (mask + 1) * sizeof(*table); - shift = hash->shift; + struct netlink_compare_arg arg = { + .net = net, + .portid = portid, + }; + u32 hash; - if (grow) { - if (++shift > hash->max_shift) - return 0; - mask = mask * 2 + 1; - size *= 2; - } + hash = rhashtable_hashfn(&table->hash, &portid, sizeof(portid)); - table = nl_portid_hash_zalloc(size); - if (!table) - return 0; - - otable = hash->table; - hash->table = table; - hash->mask = mask; - hash->shift = shift; - get_random_bytes(&hash->rnd, sizeof(hash->rnd)); - - for (i = 0; i <= omask; i++) { - struct sock *sk; - struct hlist_node *tmp; - - sk_for_each_safe(sk, tmp, &otable[i]) - __sk_add_node(sk, nl_portid_hashfn(hash, nlk_sk(sk)->portid)); - } - - nl_portid_hash_free(otable, osize); - hash->rehash_time = jiffies + 10 * 60 * HZ; - return 1; + return rhashtable_lookup_compare(&table->hash, hash, + &netlink_compare, &arg); } -static inline int nl_portid_hash_dilute(struct nl_portid_hash *hash, int len) +static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) { - int avg = hash->entries >> hash->shift; - - if (unlikely(avg > 1) && nl_portid_hash_rehash(hash, 1)) - return 1; + struct netlink_table *table = &nl_table[protocol]; + struct sock *sk; - if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) { - nl_portid_hash_rehash(hash, 0); - return 1; - } + rcu_read_lock(); + sk = __netlink_lookup(table, portid, net); + if (sk) + sock_hold(sk); + rcu_read_unlock(); - return 0; + return sk; } static const struct proto_ops netlink_ops; @@ -1115,22 +1065,10 @@ netlink_update_listeners(struct sock *sk) static int netlink_insert(struct sock *sk, struct net *net, u32 portid) { struct netlink_table *table = &nl_table[sk->sk_protocol]; - struct nl_portid_hash *hash = &table->hash; - struct hlist_head *head; int err = -EADDRINUSE; - struct sock *osk; - int len; - netlink_table_grab(); - head = nl_portid_hashfn(hash, portid); - len = 0; - sk_for_each(osk, head) { - if (table->compare(net, osk) && - (nlk_sk(osk)->portid == portid)) - break; - len++; - } - if (osk) + mutex_lock(&nl_sk_hash_lock); + if (__netlink_lookup(table, portid, net)) goto err; err = -EBUSY; @@ -1138,26 +1076,31 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid) goto err; err = -ENOMEM; - if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX)) + if (BITS_PER_LONG > 32 && unlikely(table->hash.nelems >= UINT_MAX)) goto err; - if (len && nl_portid_hash_dilute(hash, len)) - head = nl_portid_hashfn(hash, portid); - hash->entries++; nlk_sk(sk)->portid = portid; - sk_add_node(sk, head); + sock_hold(sk); + rhashtable_insert(&table->hash, &nlk_sk(sk)->node, GFP_KERNEL); err = 0; - err: - netlink_table_ungrab(); + mutex_unlock(&nl_sk_hash_lock); return err; } static void netlink_remove(struct sock *sk) { + struct netlink_table *table; + + mutex_lock(&nl_sk_hash_lock); + table = &nl_table[sk->sk_protocol]; + if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node, GFP_KERNEL)) { + WARN_ON(atomic_read(&sk->sk_refcnt) == 1); + __sock_put(sk); + } + mutex_unlock(&nl_sk_hash_lock); + netlink_table_grab(); - if (sk_del_node_init(sk)) - nl_table[sk->sk_protocol].hash.entries--; if (nlk_sk(sk)->subscriptions) __sk_del_bind_node(sk); netlink_table_ungrab(); @@ -1206,7 +1149,8 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, struct module *module = NULL; struct mutex *cb_mutex; struct netlink_sock *nlk; - void (*bind)(int group); + int (*bind)(int group); + void (*unbind)(int group); int err = 0; sock->state = SS_UNCONNECTED; @@ -1232,6 +1176,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, err = -EPROTONOSUPPORT; cb_mutex = nl_table[protocol].cb_mutex; bind = nl_table[protocol].bind; + unbind = nl_table[protocol].unbind; netlink_unlock_table(); if (err < 0) @@ -1248,6 +1193,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, nlk = nlk_sk(sock->sk); nlk->module = module; nlk->netlink_bind = bind; + nlk->netlink_unbind = unbind; out: return err; @@ -1301,6 +1247,7 @@ static int netlink_release(struct socket *sock) kfree_rcu(old, rcu); nl_table[sk->sk_protocol].module = NULL; nl_table[sk->sk_protocol].bind = NULL; + nl_table[sk->sk_protocol].unbind = NULL; nl_table[sk->sk_protocol].flags = 0; nl_table[sk->sk_protocol].registered = 0; } @@ -1309,6 +1256,9 @@ static int netlink_release(struct socket *sock) } netlink_table_ungrab(); + /* Wait for readers to complete */ + synchronize_net(); + kfree(nlk->groups); nlk->groups = NULL; @@ -1324,30 +1274,22 @@ static int netlink_autobind(struct socket *sock) struct sock *sk = sock->sk; struct net *net = sock_net(sk); struct netlink_table *table = &nl_table[sk->sk_protocol]; - struct nl_portid_hash *hash = &table->hash; - struct hlist_head *head; - struct sock *osk; s32 portid = task_tgid_vnr(current); int err; static s32 rover = -4097; retry: cond_resched(); - netlink_table_grab(); - head = nl_portid_hashfn(hash, portid); - sk_for_each(osk, head) { - if (!table->compare(net, osk)) - continue; - if (nlk_sk(osk)->portid == portid) { - /* Bind collision, search negative portid values. */ - portid = rover--; - if (rover > -4097) - rover = -4097; - netlink_table_ungrab(); - goto retry; - } + rcu_read_lock(); + if (__netlink_lookup(table, portid, net)) { + /* Bind collision, search negative portid values. */ + portid = rover--; + if (rover > -4097) + rover = -4097; + rcu_read_unlock(); + goto retry; } - netlink_table_ungrab(); + rcu_read_unlock(); err = netlink_insert(sk, net, portid); if (err == -EADDRINUSE) @@ -1360,7 +1302,74 @@ retry: return err; } -static inline int netlink_capable(const struct socket *sock, unsigned int flag) +/** + * __netlink_ns_capable - General netlink message capability test + * @nsp: NETLINK_CB of the socket buffer holding a netlink command from userspace. + * @user_ns: The user namespace of the capability to use + * @cap: The capability to use + * + * Test to see if the opener of the socket we received the message + * from had when the netlink socket was created and the sender of the + * message has has the capability @cap in the user namespace @user_ns. + */ +bool __netlink_ns_capable(const struct netlink_skb_parms *nsp, + struct user_namespace *user_ns, int cap) +{ + return ((nsp->flags & NETLINK_SKB_DST) || + file_ns_capable(nsp->sk->sk_socket->file, user_ns, cap)) && + ns_capable(user_ns, cap); +} +EXPORT_SYMBOL(__netlink_ns_capable); + +/** + * netlink_ns_capable - General netlink message capability test + * @skb: socket buffer holding a netlink command from userspace + * @user_ns: The user namespace of the capability to use + * @cap: The capability to use + * + * Test to see if the opener of the socket we received the message + * from had when the netlink socket was created and the sender of the + * message has has the capability @cap in the user namespace @user_ns. + */ +bool netlink_ns_capable(const struct sk_buff *skb, + struct user_namespace *user_ns, int cap) +{ + return __netlink_ns_capable(&NETLINK_CB(skb), user_ns, cap); +} +EXPORT_SYMBOL(netlink_ns_capable); + +/** + * netlink_capable - Netlink global message capability test + * @skb: socket buffer holding a netlink command from userspace + * @cap: The capability to use + * + * Test to see if the opener of the socket we received the message + * from had when the netlink socket was created and the sender of the + * message has has the capability @cap in all user namespaces. + */ +bool netlink_capable(const struct sk_buff *skb, int cap) +{ + return netlink_ns_capable(skb, &init_user_ns, cap); +} +EXPORT_SYMBOL(netlink_capable); + +/** + * netlink_net_capable - Netlink network namespace message capability test + * @skb: socket buffer holding a netlink command from userspace + * @cap: The capability to use + * + * Test to see if the opener of the socket we received the message + * from had when the netlink socket was created and the sender of the + * message has has the capability @cap over the network namespace of + * the socket we received the message from. + */ +bool netlink_net_capable(const struct sk_buff *skb, int cap) +{ + return netlink_ns_capable(skb, sock_net(skb->sk)->user_ns, cap); +} +EXPORT_SYMBOL(netlink_net_capable); + +static inline int netlink_allowed(const struct socket *sock, unsigned int flag) { return (nl_table[sock->sk->sk_protocol].flags & flag) || ns_capable(sock_net(sock->sk)->user_ns, CAP_NET_ADMIN); @@ -1411,6 +1420,19 @@ static int netlink_realloc_groups(struct sock *sk) return err; } +static void netlink_unbind(int group, long unsigned int groups, + struct netlink_sock *nlk) +{ + int undo; + + if (!nlk->netlink_unbind) + return; + + for (undo = 0; undo < group; undo++) + if (test_bit(group, &groups)) + nlk->netlink_unbind(undo); +} + static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { @@ -1419,6 +1441,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; int err; + long unsigned int groups = nladdr->nl_groups; if (addr_len < sizeof(struct sockaddr_nl)) return -EINVAL; @@ -1427,45 +1450,53 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, return -EINVAL; /* Only superuser is allowed to listen multicasts */ - if (nladdr->nl_groups) { - if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV)) + if (groups) { + if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV)) return -EPERM; err = netlink_realloc_groups(sk); if (err) return err; } - if (nlk->portid) { + if (nlk->portid) if (nladdr->nl_pid != nlk->portid) return -EINVAL; - } else { + + if (nlk->netlink_bind && groups) { + int group; + + for (group = 0; group < nlk->ngroups; group++) { + if (!test_bit(group, &groups)) + continue; + err = nlk->netlink_bind(group); + if (!err) + continue; + netlink_unbind(group, groups, nlk); + return err; + } + } + + if (!nlk->portid) { err = nladdr->nl_pid ? netlink_insert(sk, net, nladdr->nl_pid) : netlink_autobind(sock); - if (err) + if (err) { + netlink_unbind(nlk->ngroups - 1, groups, nlk); return err; + } } - if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) + if (!groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) return 0; netlink_table_grab(); netlink_update_subscriptions(sk, nlk->subscriptions + - hweight32(nladdr->nl_groups) - + hweight32(groups) - hweight32(nlk->groups[0])); - nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups; + nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | groups; netlink_update_listeners(sk); netlink_table_ungrab(); - if (nlk->netlink_bind && nlk->groups[0]) { - int i; - - for (i = 0; i < nlk->ngroups; i++) { - if (test_bit(i, nlk->groups)) - nlk->netlink_bind(i); - } - } - return 0; } @@ -1490,7 +1521,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, return -EINVAL; if ((nladdr->nl_groups || nladdr->nl_pid) && - !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) + !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) return -EPERM; if (!nlk->portid) @@ -1868,25 +1899,25 @@ struct netlink_broadcast_data { void *tx_data; }; -static int do_one_broadcast(struct sock *sk, - struct netlink_broadcast_data *p) +static void do_one_broadcast(struct sock *sk, + struct netlink_broadcast_data *p) { struct netlink_sock *nlk = nlk_sk(sk); int val; if (p->exclude_sk == sk) - goto out; + return; if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups || !test_bit(p->group - 1, nlk->groups)) - goto out; + return; if (!net_eq(sock_net(sk), p->net)) - goto out; + return; if (p->failure) { netlink_overrun(sk); - goto out; + return; } sock_hold(sk); @@ -1924,9 +1955,6 @@ static int do_one_broadcast(struct sock *sk, p->skb2 = NULL; } sock_put(sk); - -out: - return 0; } int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid, @@ -2096,20 +2124,24 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, break; case NETLINK_ADD_MEMBERSHIP: case NETLINK_DROP_MEMBERSHIP: { - if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV)) + if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV)) return -EPERM; err = netlink_realloc_groups(sk); if (err) return err; if (!val || val - 1 >= nlk->ngroups) return -EINVAL; + if (optname == NETLINK_ADD_MEMBERSHIP && nlk->netlink_bind) { + err = nlk->netlink_bind(val); + if (err) + return err; + } netlink_table_grab(); netlink_update_socket_mc(nlk, val, optname == NETLINK_ADD_MEMBERSHIP); netlink_table_ungrab(); - - if (nlk->netlink_bind) - nlk->netlink_bind(val); + if (optname == NETLINK_DROP_MEMBERSHIP && nlk->netlink_unbind) + nlk->netlink_unbind(val); err = 0; break; @@ -2228,6 +2260,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, struct sk_buff *skb; int err; struct scm_cookie scm; + u32 netlink_skb_flags = 0; if (msg->msg_flags&MSG_OOB) return -EOPNOTSUPP; @@ -2247,8 +2280,9 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, dst_group = ffs(addr->nl_groups); err = -EPERM; if ((dst_group || dst_portid) && - !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) + !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) goto out; + netlink_skb_flags |= NETLINK_SKB_DST; } else { dst_portid = nlk->dst_portid; dst_group = nlk->dst_group; @@ -2278,6 +2312,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, NETLINK_CB(skb).portid = nlk->portid; NETLINK_CB(skb).dst_group = dst_group; NETLINK_CB(skb).creds = siocb->scm->creds; + NETLINK_CB(skb).flags = netlink_skb_flags; err = -EFAULT; if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { @@ -2383,7 +2418,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) { ret = netlink_dump(sk); if (ret) { - sk->sk_err = ret; + sk->sk_err = -ret; sk->sk_error_report(sk); } } @@ -2858,14 +2893,18 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) { struct nl_seq_iter *iter = seq->private; int i, j; + struct netlink_sock *nlk; struct sock *s; loff_t off = 0; for (i = 0; i < MAX_LINKS; i++) { - struct nl_portid_hash *hash = &nl_table[i].hash; + struct rhashtable *ht = &nl_table[i].hash; + const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); + + for (j = 0; j < tbl->size; j++) { + rht_for_each_entry_rcu(nlk, tbl->buckets[j], node) { + s = (struct sock *)nlk; - for (j = 0; j <= hash->mask; j++) { - sk_for_each(s, &hash->table[j]) { if (sock_net(s) != seq_file_net(seq)) continue; if (off == pos) { @@ -2881,15 +2920,14 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) } static void *netlink_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(nl_table_lock) { - read_lock(&nl_table_lock); + rcu_read_lock(); return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN; } static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct sock *s; + struct netlink_sock *nlk; struct nl_seq_iter *iter; struct net *net; int i, j; @@ -2901,28 +2939,26 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) net = seq_file_net(seq); iter = seq->private; - s = v; - do { - s = sk_next(s); - } while (s && !nl_table[s->sk_protocol].compare(net, s)); - if (s) - return s; + nlk = v; + + rht_for_each_entry_rcu(nlk, nlk->node.next, node) + if (net_eq(sock_net((struct sock *)nlk), net)) + return nlk; i = iter->link; j = iter->hash_idx + 1; do { - struct nl_portid_hash *hash = &nl_table[i].hash; - - for (; j <= hash->mask; j++) { - s = sk_head(&hash->table[j]); + struct rhashtable *ht = &nl_table[i].hash; + const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); - while (s && !nl_table[s->sk_protocol].compare(net, s)) - s = sk_next(s); - if (s) { - iter->link = i; - iter->hash_idx = j; - return s; + for (; j < tbl->size; j++) { + rht_for_each_entry_rcu(nlk, tbl->buckets[j], node) { + if (net_eq(sock_net((struct sock *)nlk), net)) { + iter->link = i; + iter->hash_idx = j; + return nlk; + } } } @@ -2933,9 +2969,8 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void netlink_seq_stop(struct seq_file *seq, void *v) - __releases(nl_table_lock) { - read_unlock(&nl_table_lock); + rcu_read_unlock(); } @@ -3073,9 +3108,17 @@ static struct pernet_operations __net_initdata netlink_net_ops = { static int __init netlink_proto_init(void) { int i; - unsigned long limit; - unsigned int order; int err = proto_register(&netlink_proto, 0); + struct rhashtable_params ht_params = { + .head_offset = offsetof(struct netlink_sock, node), + .key_offset = offsetof(struct netlink_sock, portid), + .key_len = sizeof(u32), /* portid */ + .hashfn = arch_fast_hash, + .max_shift = 16, /* 64K */ + .grow_decision = rht_grow_above_75, + .shrink_decision = rht_shrink_below_30, + .mutex_is_held = lockdep_nl_sk_hash_is_held, + }; if (err != 0) goto out; @@ -3086,32 +3129,13 @@ static int __init netlink_proto_init(void) if (!nl_table) goto panic; - if (totalram_pages >= (128 * 1024)) - limit = totalram_pages >> (21 - PAGE_SHIFT); - else - limit = totalram_pages >> (23 - PAGE_SHIFT); - - order = get_bitmask_order(limit) - 1 + PAGE_SHIFT; - limit = (1UL << order) / sizeof(struct hlist_head); - order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1; - for (i = 0; i < MAX_LINKS; i++) { - struct nl_portid_hash *hash = &nl_table[i].hash; - - hash->table = nl_portid_hash_zalloc(1 * sizeof(*hash->table)); - if (!hash->table) { - while (i-- > 0) - nl_portid_hash_free(nl_table[i].hash.table, - 1 * sizeof(*hash->table)); + if (rhashtable_init(&nl_table[i].hash, &ht_params) < 0) { + while (--i > 0) + rhashtable_destroy(&nl_table[i].hash); kfree(nl_table); goto panic; } - hash->max_shift = order; - hash->shift = 0; - hash->mask = 0; - hash->rehash_time = jiffies; - - nl_table[i].compare = netlink_compare; } INIT_LIST_HEAD(&netlink_tap_all); diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index ed13a790b00e..60f631fb7087 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -1,6 +1,7 @@ #ifndef _AF_NETLINK_H #define _AF_NETLINK_H +#include <linux/rhashtable.h> #include <net/sock.h> #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) @@ -38,7 +39,8 @@ struct netlink_sock { struct mutex *cb_mutex; struct mutex cb_def_mutex; void (*netlink_rcv)(struct sk_buff *skb); - void (*netlink_bind)(int group); + int (*netlink_bind)(int group); + void (*netlink_unbind)(int group); struct module *module; #ifdef CONFIG_NETLINK_MMAP struct mutex pg_vec_lock; @@ -46,6 +48,8 @@ struct netlink_sock { struct netlink_ring tx_ring; atomic_t mapped; #endif /* CONFIG_NETLINK_MMAP */ + + struct rhash_head node; }; static inline struct netlink_sock *nlk_sk(struct sock *sk) @@ -53,28 +57,16 @@ static inline struct netlink_sock *nlk_sk(struct sock *sk) return container_of(sk, struct netlink_sock, sk); } -struct nl_portid_hash { - struct hlist_head *table; - unsigned long rehash_time; - - unsigned int mask; - unsigned int shift; - - unsigned int entries; - unsigned int max_shift; - - u32 rnd; -}; - struct netlink_table { - struct nl_portid_hash hash; + struct rhashtable hash; struct hlist_head mc_list; struct listeners __rcu *listeners; unsigned int flags; unsigned int groups; struct mutex *cb_mutex; struct module *module; - void (*bind)(int group); + int (*bind)(int group); + void (*unbind)(int group); bool (*compare)(struct net *net, struct sock *sock); int registered; }; diff --git a/net/netlink/diag.c b/net/netlink/diag.c index 1af29624b92f..7301850eb56f 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -4,6 +4,7 @@ #include <linux/netlink.h> #include <linux/sock_diag.h> #include <linux/netlink_diag.h> +#include <linux/rhashtable.h> #include "af_netlink.h" @@ -101,16 +102,20 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, int protocol, int s_num) { struct netlink_table *tbl = &nl_table[protocol]; - struct nl_portid_hash *hash = &tbl->hash; + struct rhashtable *ht = &tbl->hash; + const struct bucket_table *htbl = rht_dereference(ht->tbl, ht); struct net *net = sock_net(skb->sk); struct netlink_diag_req *req; + struct netlink_sock *nlsk; struct sock *sk; int ret = 0, num = 0, i; req = nlmsg_data(cb->nlh); - for (i = 0; i <= hash->mask; i++) { - sk_for_each(sk, &hash->table[i]) { + for (i = 0; i < htbl->size; i++) { + rht_for_each_entry(nlsk, htbl->buckets[i], ht, node) { + sk = (struct sock *)nlsk; + if (!net_eq(sock_net(sk), net)) continue; if (num < s_num) { diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index b1dcdb932a86..76393f2f4b22 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -317,7 +317,7 @@ static void genl_unregister_mc_groups(struct genl_family *family) } } -static int genl_validate_ops(struct genl_family *family) +static int genl_validate_ops(const struct genl_family *family) { const struct genl_ops *ops = family->ops; unsigned int n_ops = family->n_ops; @@ -337,10 +337,6 @@ static int genl_validate_ops(struct genl_family *family) return -EINVAL; } - /* family is not registered yet, so no locking needed */ - family->ops = ops; - family->n_ops = n_ops; - return 0; } @@ -561,7 +557,7 @@ static int genl_family_rcv_msg(struct genl_family *family, return -EOPNOTSUPP; if ((ops->flags & GENL_ADMIN_PERM) && - !capable(CAP_NET_ADMIN)) + !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index ede50d197e10..71cf1bffea06 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1418,7 +1418,7 @@ static int __init nr_proto_init(void) struct net_device *dev; sprintf(name, "nr%d", i); - dev = alloc_netdev(0, name, nr_setup); + dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, nr_setup); if (!dev) { printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device structure\n"); goto fail; diff --git a/net/nfc/digital.h b/net/nfc/digital.h index 3759add68b1b..3c39c72eb038 100644 --- a/net/nfc/digital.h +++ b/net/nfc/digital.h @@ -29,6 +29,7 @@ #define DIGITAL_CMD_TG_SEND 1 #define DIGITAL_CMD_TG_LISTEN 2 #define DIGITAL_CMD_TG_LISTEN_MDAA 3 +#define DIGITAL_CMD_TG_LISTEN_MD 4 #define DIGITAL_MAX_HEADER_LEN 7 #define DIGITAL_CRC_LEN 2 @@ -71,6 +72,7 @@ static inline int digital_in_send_cmd(struct nfc_digital_dev *ddev, void digital_poll_next_tech(struct nfc_digital_dev *ddev); int digital_in_send_sens_req(struct nfc_digital_dev *ddev, u8 rf_tech); +int digital_in_send_sensb_req(struct nfc_digital_dev *ddev, u8 rf_tech); int digital_in_send_sensf_req(struct nfc_digital_dev *ddev, u8 rf_tech); int digital_in_send_iso15693_inv_req(struct nfc_digital_dev *ddev, u8 rf_tech); @@ -120,6 +122,8 @@ int digital_tg_send_dep_res(struct nfc_digital_dev *ddev, struct sk_buff *skb); int digital_tg_listen_nfca(struct nfc_digital_dev *ddev, u8 rf_tech); int digital_tg_listen_nfcf(struct nfc_digital_dev *ddev, u8 rf_tech); +void digital_tg_recv_md_req(struct nfc_digital_dev *ddev, void *arg, + struct sk_buff *resp); typedef u16 (*crc_func_t)(u16, const u8 *, size_t); diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c index e01e15dbf1ab..009bcf317101 100644 --- a/net/nfc/digital_core.c +++ b/net/nfc/digital_core.c @@ -22,6 +22,8 @@ #define DIGITAL_PROTO_NFCA_RF_TECH \ (NFC_PROTO_JEWEL_MASK | NFC_PROTO_MIFARE_MASK | NFC_PROTO_NFC_DEP_MASK) +#define DIGITAL_PROTO_NFCB_RF_TECH NFC_PROTO_ISO14443_B_MASK + #define DIGITAL_PROTO_NFCF_RF_TECH \ (NFC_PROTO_FELICA_MASK | NFC_PROTO_NFC_DEP_MASK) @@ -199,6 +201,11 @@ static void digital_wq_cmd(struct work_struct *work) digital_send_cmd_complete, cmd); break; + case DIGITAL_CMD_TG_LISTEN_MD: + rc = ddev->ops->tg_listen_md(ddev, cmd->timeout, + digital_send_cmd_complete, cmd); + break; + default: pr_err("Unknown cmd type %d\n", cmd->type); return; @@ -291,12 +298,19 @@ static int digital_tg_listen_mdaa(struct nfc_digital_dev *ddev, u8 rf_tech) 500, digital_tg_recv_atr_req, NULL); } +static int digital_tg_listen_md(struct nfc_digital_dev *ddev, u8 rf_tech) +{ + return digital_send_cmd(ddev, DIGITAL_CMD_TG_LISTEN_MD, NULL, NULL, 500, + digital_tg_recv_md_req, NULL); +} + int digital_target_found(struct nfc_digital_dev *ddev, struct nfc_target *target, u8 protocol) { int rc; u8 framing; u8 rf_tech; + u8 poll_tech_count; int (*check_crc)(struct sk_buff *skb); void (*add_crc)(struct sk_buff *skb); @@ -345,6 +359,12 @@ int digital_target_found(struct nfc_digital_dev *ddev, add_crc = digital_skb_add_crc_a; break; + case NFC_PROTO_ISO14443_B: + framing = NFC_DIGITAL_FRAMING_NFCB_T4T; + check_crc = digital_skb_check_crc_b; + add_crc = digital_skb_add_crc_b; + break; + default: pr_err("Invalid protocol %d\n", protocol); return -EINVAL; @@ -367,17 +387,23 @@ int digital_target_found(struct nfc_digital_dev *ddev, return rc; target->supported_protocols = (1 << protocol); - rc = nfc_targets_found(ddev->nfc_dev, target, 1); - if (rc) - return rc; + poll_tech_count = ddev->poll_tech_count; ddev->poll_tech_count = 0; + rc = nfc_targets_found(ddev->nfc_dev, target, 1); + if (rc) { + ddev->poll_tech_count = poll_tech_count; + return rc; + } + return 0; } void digital_poll_next_tech(struct nfc_digital_dev *ddev) { + u8 rand_mod; + digital_switch_rf(ddev, 0); mutex_lock(&ddev->poll_lock); @@ -387,8 +413,8 @@ void digital_poll_next_tech(struct nfc_digital_dev *ddev) return; } - ddev->poll_tech_index = (ddev->poll_tech_index + 1) % - ddev->poll_tech_count; + get_random_bytes(&rand_mod, sizeof(rand_mod)); + ddev->poll_tech_index = rand_mod % ddev->poll_tech_count; mutex_unlock(&ddev->poll_lock); @@ -475,6 +501,10 @@ static int digital_start_poll(struct nfc_dev *nfc_dev, __u32 im_protocols, digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_106A, digital_in_send_sens_req); + if (matching_im_protocols & DIGITAL_PROTO_NFCB_RF_TECH) + digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_106B, + digital_in_send_sensb_req); + if (matching_im_protocols & DIGITAL_PROTO_NFCF_RF_TECH) { digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_212F, digital_in_send_sensf_req); @@ -491,6 +521,9 @@ static int digital_start_poll(struct nfc_dev *nfc_dev, __u32 im_protocols, if (ddev->ops->tg_listen_mdaa) { digital_add_poll_tech(ddev, 0, digital_tg_listen_mdaa); + } else if (ddev->ops->tg_listen_md) { + digital_add_poll_tech(ddev, 0, + digital_tg_listen_md); } else { digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_106A, digital_tg_listen_nfca); @@ -635,7 +668,8 @@ static void digital_in_send_complete(struct nfc_digital_dev *ddev, void *arg, goto done; } - if (ddev->curr_protocol == NFC_PROTO_ISO14443) { + if ((ddev->curr_protocol == NFC_PROTO_ISO14443) || + (ddev->curr_protocol == NFC_PROTO_ISO14443_B)) { rc = digital_in_iso_dep_pull_sod(ddev, resp); if (rc) goto done; @@ -676,7 +710,8 @@ static int digital_in_send(struct nfc_dev *nfc_dev, struct nfc_target *target, goto exit; } - if (ddev->curr_protocol == NFC_PROTO_ISO14443) { + if ((ddev->curr_protocol == NFC_PROTO_ISO14443) || + (ddev->curr_protocol == NFC_PROTO_ISO14443_B)) { rc = digital_in_iso_dep_push_sod(ddev, skb); if (rc) goto exit; @@ -716,7 +751,7 @@ struct nfc_digital_dev *nfc_digital_allocate_device(struct nfc_digital_ops *ops, if (!ops->in_configure_hw || !ops->in_send_cmd || !ops->tg_listen || !ops->tg_configure_hw || !ops->tg_send_cmd || !ops->abort_cmd || - !ops->switch_rf) + !ops->switch_rf || (ops->tg_listen_md && !ops->tg_get_rf_tech)) return NULL; ddev = kzalloc(sizeof(struct nfc_digital_dev), GFP_KERNEL); @@ -747,6 +782,8 @@ struct nfc_digital_dev *nfc_digital_allocate_device(struct nfc_digital_ops *ops, ddev->protocols |= NFC_PROTO_ISO15693_MASK; if (supported_protocols & NFC_PROTO_ISO14443_MASK) ddev->protocols |= NFC_PROTO_ISO14443_MASK; + if (supported_protocols & NFC_PROTO_ISO14443_B_MASK) + ddev->protocols |= NFC_PROTO_ISO14443_B_MASK; ddev->tx_headroom = tx_headroom + DIGITAL_MAX_HEADER_LEN; ddev->tx_tailroom = tx_tailroom + DIGITAL_CRC_LEN; diff --git a/net/nfc/digital_dep.c b/net/nfc/digital_dep.c index d4ed25ff723f..e1638dab076d 100644 --- a/net/nfc/digital_dep.c +++ b/net/nfc/digital_dep.c @@ -224,9 +224,8 @@ int digital_in_send_atr_req(struct nfc_digital_dev *ddev, ddev->skb_add_crc(skb); - digital_in_send_cmd(ddev, skb, 500, digital_in_recv_atr_res, target); - - return 0; + return digital_in_send_cmd(ddev, skb, 500, digital_in_recv_atr_res, + target); } static int digital_in_send_rtox(struct nfc_digital_dev *ddev, @@ -458,12 +457,10 @@ static void digital_tg_recv_dep_req(struct nfc_digital_dev *ddev, void *arg, pr_err("Received a ACK/NACK PDU\n"); rc = -EINVAL; goto exit; - break; case DIGITAL_NFC_DEP_PFB_SUPERVISOR_PDU: pr_err("Received a SUPERVISOR PDU\n"); rc = -EINVAL; goto exit; - break; } skb_pull(resp, size); @@ -674,6 +671,7 @@ void digital_tg_recv_atr_req(struct nfc_digital_dev *ddev, void *arg, int rc; struct digital_atr_req *atr_req; size_t gb_len, min_size; + u8 poll_tech_count; if (IS_ERR(resp)) { rc = PTR_ERR(resp); @@ -731,12 +729,16 @@ void digital_tg_recv_atr_req(struct nfc_digital_dev *ddev, void *arg, goto exit; gb_len = resp->len - sizeof(struct digital_atr_req); + + poll_tech_count = ddev->poll_tech_count; + ddev->poll_tech_count = 0; + rc = nfc_tm_activated(ddev->nfc_dev, NFC_PROTO_NFC_DEP_MASK, NFC_COMM_PASSIVE, atr_req->gb, gb_len); - if (rc) + if (rc) { + ddev->poll_tech_count = poll_tech_count; goto exit; - - ddev->poll_tech_count = 0; + } rc = 0; exit: diff --git a/net/nfc/digital_technology.c b/net/nfc/digital_technology.c index 278c3fed27e0..fb58ed2dd41d 100644 --- a/net/nfc/digital_technology.c +++ b/net/nfc/digital_technology.c @@ -41,6 +41,24 @@ #define DIGITAL_MIFARE_READ_RES_LEN 16 #define DIGITAL_MIFARE_ACK_RES 0x0A +#define DIGITAL_CMD_SENSB_REQ 0x05 +#define DIGITAL_SENSB_ADVANCED BIT(5) +#define DIGITAL_SENSB_EXTENDED BIT(4) +#define DIGITAL_SENSB_ALLB_REQ BIT(3) +#define DIGITAL_SENSB_N(n) ((n) & 0x7) + +#define DIGITAL_CMD_SENSB_RES 0x50 + +#define DIGITAL_CMD_ATTRIB_REQ 0x1D +#define DIGITAL_ATTRIB_P1_TR0_DEFAULT (0x0 << 6) +#define DIGITAL_ATTRIB_P1_TR1_DEFAULT (0x0 << 4) +#define DIGITAL_ATTRIB_P1_SUPRESS_EOS BIT(3) +#define DIGITAL_ATTRIB_P1_SUPRESS_SOS BIT(2) +#define DIGITAL_ATTRIB_P2_LISTEN_POLL_1 (0x0 << 6) +#define DIGITAL_ATTRIB_P2_POLL_LISTEN_1 (0x0 << 4) +#define DIGITAL_ATTRIB_P2_MAX_FRAME_256 0x8 +#define DIGITAL_ATTRIB_P4_DID(n) ((n) & 0xf) + #define DIGITAL_CMD_SENSF_REQ 0x00 #define DIGITAL_CMD_SENSF_RES 0x01 @@ -75,6 +93,7 @@ static const u8 digital_ats_fsc[] = { }; #define DIGITAL_ATS_FSCI(t0) ((t0) & 0x0F) +#define DIGITAL_SENSB_FSCI(pi2) (((pi2) & 0xF0) >> 4) #define DIGITAL_ATS_MAX_FSC 256 #define DIGITAL_RATS_BYTE1 0xE0 @@ -92,6 +111,32 @@ struct digital_sel_req { u8 bcc; } __packed; +struct digital_sensb_req { + u8 cmd; + u8 afi; + u8 param; +} __packed; + +struct digital_sensb_res { + u8 cmd; + u8 nfcid0[4]; + u8 app_data[4]; + u8 proto_info[3]; +} __packed; + +struct digital_attrib_req { + u8 cmd; + u8 nfcid0[4]; + u8 param1; + u8 param2; + u8 param3; + u8 param4; +} __packed; + +struct digital_attrib_res { + u8 mbli_did; +} __packed; + struct digital_sensf_req { u8 cmd; u8 sc1; @@ -273,6 +318,8 @@ static void digital_in_recv_sel_res(struct nfc_digital_dev *ddev, void *arg, if (DIGITAL_SEL_RES_IS_T2T(sel_res)) { nfc_proto = NFC_PROTO_MIFARE; + } else if (DIGITAL_SEL_RES_IS_NFC_DEP(sel_res)) { + nfc_proto = NFC_PROTO_NFC_DEP; } else if (DIGITAL_SEL_RES_IS_T4T(sel_res)) { rc = digital_in_send_rats(ddev, target); if (rc) @@ -282,8 +329,6 @@ static void digital_in_recv_sel_res(struct nfc_digital_dev *ddev, void *arg, * done when receiving the ATS */ goto exit_free_skb; - } else if (DIGITAL_SEL_RES_IS_NFC_DEP(sel_res)) { - nfc_proto = NFC_PROTO_NFC_DEP; } else { rc = -EOPNOTSUPP; goto exit; @@ -531,6 +576,175 @@ int digital_in_recv_mifare_res(struct sk_buff *resp) return -EIO; } +static void digital_in_recv_attrib_res(struct nfc_digital_dev *ddev, void *arg, + struct sk_buff *resp) +{ + struct nfc_target *target = arg; + struct digital_attrib_res *attrib_res; + int rc; + + if (IS_ERR(resp)) { + rc = PTR_ERR(resp); + resp = NULL; + goto exit; + } + + if (resp->len < sizeof(*attrib_res)) { + PROTOCOL_ERR("12.6.2"); + rc = -EIO; + goto exit; + } + + attrib_res = (struct digital_attrib_res *)resp->data; + + if (attrib_res->mbli_did & 0x0f) { + PROTOCOL_ERR("12.6.2.1"); + rc = -EIO; + goto exit; + } + + rc = digital_target_found(ddev, target, NFC_PROTO_ISO14443_B); + +exit: + dev_kfree_skb(resp); + kfree(target); + + if (rc) + digital_poll_next_tech(ddev); +} + +static int digital_in_send_attrib_req(struct nfc_digital_dev *ddev, + struct nfc_target *target, + struct digital_sensb_res *sensb_res) +{ + struct digital_attrib_req *attrib_req; + struct sk_buff *skb; + int rc; + + skb = digital_skb_alloc(ddev, sizeof(*attrib_req)); + if (!skb) + return -ENOMEM; + + attrib_req = (struct digital_attrib_req *)skb_put(skb, + sizeof(*attrib_req)); + + attrib_req->cmd = DIGITAL_CMD_ATTRIB_REQ; + memcpy(attrib_req->nfcid0, sensb_res->nfcid0, + sizeof(attrib_req->nfcid0)); + attrib_req->param1 = DIGITAL_ATTRIB_P1_TR0_DEFAULT | + DIGITAL_ATTRIB_P1_TR1_DEFAULT; + attrib_req->param2 = DIGITAL_ATTRIB_P2_LISTEN_POLL_1 | + DIGITAL_ATTRIB_P2_POLL_LISTEN_1 | + DIGITAL_ATTRIB_P2_MAX_FRAME_256; + attrib_req->param3 = sensb_res->proto_info[1] & 0x07; + attrib_req->param4 = DIGITAL_ATTRIB_P4_DID(0); + + rc = digital_in_send_cmd(ddev, skb, 30, digital_in_recv_attrib_res, + target); + if (rc) + kfree_skb(skb); + + return rc; +} + +static void digital_in_recv_sensb_res(struct nfc_digital_dev *ddev, void *arg, + struct sk_buff *resp) +{ + struct nfc_target *target = NULL; + struct digital_sensb_res *sensb_res; + u8 fsci; + int rc; + + if (IS_ERR(resp)) { + rc = PTR_ERR(resp); + resp = NULL; + goto exit; + } + + if (resp->len != sizeof(*sensb_res)) { + PROTOCOL_ERR("5.6.2.1"); + rc = -EIO; + goto exit; + } + + sensb_res = (struct digital_sensb_res *)resp->data; + + if (sensb_res->cmd != DIGITAL_CMD_SENSB_RES) { + PROTOCOL_ERR("5.6.2"); + rc = -EIO; + goto exit; + } + + if (!(sensb_res->proto_info[1] & BIT(0))) { + PROTOCOL_ERR("5.6.2.12"); + rc = -EIO; + goto exit; + } + + if (sensb_res->proto_info[1] & BIT(3)) { + PROTOCOL_ERR("5.6.2.16"); + rc = -EIO; + goto exit; + } + + fsci = DIGITAL_SENSB_FSCI(sensb_res->proto_info[1]); + if (fsci >= 8) + ddev->target_fsc = DIGITAL_ATS_MAX_FSC; + else + ddev->target_fsc = digital_ats_fsc[fsci]; + + target = kzalloc(sizeof(struct nfc_target), GFP_KERNEL); + if (!target) { + rc = -ENOMEM; + goto exit; + } + + rc = digital_in_send_attrib_req(ddev, target, sensb_res); + +exit: + dev_kfree_skb(resp); + + if (rc) { + kfree(target); + digital_poll_next_tech(ddev); + } +} + +int digital_in_send_sensb_req(struct nfc_digital_dev *ddev, u8 rf_tech) +{ + struct digital_sensb_req *sensb_req; + struct sk_buff *skb; + int rc; + + rc = digital_in_configure_hw(ddev, NFC_DIGITAL_CONFIG_RF_TECH, + NFC_DIGITAL_RF_TECH_106B); + if (rc) + return rc; + + rc = digital_in_configure_hw(ddev, NFC_DIGITAL_CONFIG_FRAMING, + NFC_DIGITAL_FRAMING_NFCB); + if (rc) + return rc; + + skb = digital_skb_alloc(ddev, sizeof(*sensb_req)); + if (!skb) + return -ENOMEM; + + sensb_req = (struct digital_sensb_req *)skb_put(skb, + sizeof(*sensb_req)); + + sensb_req->cmd = DIGITAL_CMD_SENSB_REQ; + sensb_req->afi = 0x00; /* All families and sub-families */ + sensb_req->param = DIGITAL_SENSB_N(0); + + rc = digital_in_send_cmd(ddev, skb, 30, digital_in_recv_sensb_res, + NULL); + if (rc) + kfree_skb(skb); + + return rc; +} + static void digital_in_recv_sensf_res(struct nfc_digital_dev *ddev, void *arg, struct sk_buff *resp) { @@ -730,6 +944,13 @@ static int digital_tg_send_sel_res(struct nfc_digital_dev *ddev) if (!DIGITAL_DRV_CAPS_TG_CRC(ddev)) digital_skb_add_crc_a(skb); + rc = digital_tg_configure_hw(ddev, NFC_DIGITAL_CONFIG_FRAMING, + NFC_DIGITAL_FRAMING_NFCA_ANTICOL_COMPLETE); + if (rc) { + kfree_skb(skb); + return rc; + } + rc = digital_tg_send_cmd(ddev, skb, 300, digital_tg_recv_atr_req, NULL); if (rc) @@ -788,6 +1009,13 @@ static int digital_tg_send_sdd_res(struct nfc_digital_dev *ddev) for (i = 0; i < 4; i++) sdd_res->bcc ^= sdd_res->nfcid1[i]; + rc = digital_tg_configure_hw(ddev, NFC_DIGITAL_CONFIG_FRAMING, + NFC_DIGITAL_FRAMING_NFCA_STANDARD_WITH_CRC_A); + if (rc) { + kfree_skb(skb); + return rc; + } + rc = digital_tg_send_cmd(ddev, skb, 300, digital_tg_recv_sel_req, NULL); if (rc) @@ -840,6 +1068,13 @@ static int digital_tg_send_sens_res(struct nfc_digital_dev *ddev) sens_res[0] = (DIGITAL_SENS_RES_NFC_DEP >> 8) & 0xFF; sens_res[1] = DIGITAL_SENS_RES_NFC_DEP & 0xFF; + rc = digital_tg_configure_hw(ddev, NFC_DIGITAL_CONFIG_FRAMING, + NFC_DIGITAL_FRAMING_NFCA_STANDARD); + if (rc) { + kfree_skb(skb); + return rc; + } + rc = digital_tg_send_cmd(ddev, skb, 300, digital_tg_recv_sdd_req, NULL); if (rc) @@ -877,6 +1112,18 @@ exit: dev_kfree_skb(resp); } +static void digital_tg_recv_atr_or_sensf_req(struct nfc_digital_dev *ddev, + void *arg, struct sk_buff *resp) +{ + if (!IS_ERR(resp) && (resp->len >= 2) && + (resp->data[1] == DIGITAL_CMD_SENSF_REQ)) + digital_tg_recv_sensf_req(ddev, arg, resp); + else + digital_tg_recv_atr_req(ddev, arg, resp); + + return; +} + static int digital_tg_send_sensf_res(struct nfc_digital_dev *ddev, struct digital_sensf_req *sensf_req) { @@ -887,7 +1134,7 @@ static int digital_tg_send_sensf_res(struct nfc_digital_dev *ddev, size = sizeof(struct digital_sensf_res); - if (sensf_req->rc != DIGITAL_SENSF_REQ_RC_NONE) + if (sensf_req->rc == DIGITAL_SENSF_REQ_RC_NONE) size -= sizeof(sensf_res->rd); skb = digital_skb_alloc(ddev, size); @@ -922,7 +1169,7 @@ static int digital_tg_send_sensf_res(struct nfc_digital_dev *ddev, digital_skb_add_crc_f(skb); rc = digital_tg_send_cmd(ddev, skb, 300, - digital_tg_recv_atr_req, NULL); + digital_tg_recv_atr_or_sensf_req, NULL); if (rc) kfree_skb(skb); @@ -971,33 +1218,48 @@ exit: dev_kfree_skb(resp); } -int digital_tg_listen_nfca(struct nfc_digital_dev *ddev, u8 rf_tech) +static int digital_tg_config_nfca(struct nfc_digital_dev *ddev) { int rc; - rc = digital_tg_configure_hw(ddev, NFC_DIGITAL_CONFIG_RF_TECH, rf_tech); + rc = digital_tg_configure_hw(ddev, NFC_DIGITAL_CONFIG_RF_TECH, + NFC_DIGITAL_RF_TECH_106A); if (rc) return rc; - rc = digital_tg_configure_hw(ddev, NFC_DIGITAL_CONFIG_FRAMING, - NFC_DIGITAL_FRAMING_NFCA_NFC_DEP); + return digital_tg_configure_hw(ddev, NFC_DIGITAL_CONFIG_FRAMING, + NFC_DIGITAL_FRAMING_NFCA_NFC_DEP); +} + +int digital_tg_listen_nfca(struct nfc_digital_dev *ddev, u8 rf_tech) +{ + int rc; + + rc = digital_tg_config_nfca(ddev); if (rc) return rc; return digital_tg_listen(ddev, 300, digital_tg_recv_sens_req, NULL); } -int digital_tg_listen_nfcf(struct nfc_digital_dev *ddev, u8 rf_tech) +static int digital_tg_config_nfcf(struct nfc_digital_dev *ddev, u8 rf_tech) { int rc; - u8 *nfcid2; rc = digital_tg_configure_hw(ddev, NFC_DIGITAL_CONFIG_RF_TECH, rf_tech); if (rc) return rc; - rc = digital_tg_configure_hw(ddev, NFC_DIGITAL_CONFIG_FRAMING, - NFC_DIGITAL_FRAMING_NFCF_NFC_DEP); + return digital_tg_configure_hw(ddev, NFC_DIGITAL_CONFIG_FRAMING, + NFC_DIGITAL_FRAMING_NFCF_NFC_DEP); +} + +int digital_tg_listen_nfcf(struct nfc_digital_dev *ddev, u8 rf_tech) +{ + int rc; + u8 *nfcid2; + + rc = digital_tg_config_nfcf(ddev, rf_tech); if (rc) return rc; @@ -1011,3 +1273,43 @@ int digital_tg_listen_nfcf(struct nfc_digital_dev *ddev, u8 rf_tech) return digital_tg_listen(ddev, 300, digital_tg_recv_sensf_req, nfcid2); } + +void digital_tg_recv_md_req(struct nfc_digital_dev *ddev, void *arg, + struct sk_buff *resp) +{ + u8 rf_tech; + int rc; + + if (IS_ERR(resp)) { + resp = NULL; + goto exit_free_skb; + } + + rc = ddev->ops->tg_get_rf_tech(ddev, &rf_tech); + if (rc) + goto exit_free_skb; + + switch (rf_tech) { + case NFC_DIGITAL_RF_TECH_106A: + rc = digital_tg_config_nfca(ddev); + if (rc) + goto exit_free_skb; + digital_tg_recv_sens_req(ddev, arg, resp); + break; + case NFC_DIGITAL_RF_TECH_212F: + case NFC_DIGITAL_RF_TECH_424F: + rc = digital_tg_config_nfcf(ddev, rf_tech); + if (rc) + goto exit_free_skb; + digital_tg_recv_sensf_req(ddev, arg, resp); + break; + default: + goto exit_free_skb; + } + + return; + +exit_free_skb: + digital_poll_next_tech(ddev); + dev_kfree_skb(resp); +} diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c index a9f4d2e62d8d..677d24bb70f8 100644 --- a/net/nfc/hci/command.c +++ b/net/nfc/hci/command.c @@ -26,6 +26,8 @@ #include "hci.h" +#define MAX_FWI 4949 + static int nfc_hci_execute_cmd_async(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, const u8 *param, size_t param_len, data_exchange_cb_t cb, void *cb_context) @@ -37,7 +39,7 @@ static int nfc_hci_execute_cmd_async(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, * for all commands? */ return nfc_hci_hcp_message_tx(hdev, pipe, NFC_HCI_HCP_COMMAND, cmd, - param, param_len, cb, cb_context, 3000); + param, param_len, cb, cb_context, MAX_FWI); } /* @@ -82,7 +84,7 @@ static int nfc_hci_execute_cmd(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, NFC_HCI_HCP_COMMAND, cmd, param, param_len, nfc_hci_execute_cb, &hcp_ew, - 3000); + MAX_FWI); if (hcp_ew.exec_result < 0) return hcp_ew.exec_result; diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index d45b638e77c7..117708263ced 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -225,7 +225,7 @@ int nfc_hci_target_discovered(struct nfc_hci_dev *hdev, u8 gate) goto exit; } - targets->sens_res = be16_to_cpu(*(u16 *)atqa_skb->data); + targets->sens_res = be16_to_cpu(*(__be16 *)atqa_skb->data); targets->sel_res = sak_skb->data[0]; r = nfc_hci_get_param(hdev, NFC_HCI_RF_READER_A_GATE, @@ -380,34 +380,31 @@ static int hci_dev_session_init(struct nfc_hci_dev *hdev) if (r < 0) goto disconnect_all; - if (skb->len && skb->len == strlen(hdev->init_data.session_id)) - if (memcmp(hdev->init_data.session_id, skb->data, - skb->len) == 0) { - /* TODO ELa: restore gate<->pipe table from - * some TBD location. - * note: it doesn't seem possible to get the chip - * currently open gate/pipe table. - * It is only possible to obtain the supported - * gate list. - */ + if (skb->len && skb->len == strlen(hdev->init_data.session_id) && + (memcmp(hdev->init_data.session_id, skb->data, + skb->len) == 0) && hdev->ops->load_session) { + /* Restore gate<->pipe table from some proprietary location. */ - /* goto exit - * For now, always do a full initialization */ - } + r = hdev->ops->load_session(hdev); - r = nfc_hci_disconnect_all_gates(hdev); - if (r < 0) - goto exit; + if (r < 0) + goto disconnect_all; + } else { - r = hci_dev_connect_gates(hdev, hdev->init_data.gate_count, - hdev->init_data.gates); - if (r < 0) - goto disconnect_all; + r = nfc_hci_disconnect_all_gates(hdev); + if (r < 0) + goto exit; - r = nfc_hci_set_param(hdev, NFC_HCI_ADMIN_GATE, - NFC_HCI_ADMIN_SESSION_IDENTITY, - hdev->init_data.session_id, - strlen(hdev->init_data.session_id)); + r = hci_dev_connect_gates(hdev, hdev->init_data.gate_count, + hdev->init_data.gates); + if (r < 0) + goto disconnect_all; + + r = nfc_hci_set_param(hdev, NFC_HCI_ADMIN_GATE, + NFC_HCI_ADMIN_SESSION_IDENTITY, + hdev->init_data.session_id, + strlen(hdev->init_data.session_id)); + } if (r == 0) goto exit; @@ -556,8 +553,11 @@ static void hci_stop_poll(struct nfc_dev *nfc_dev) { struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); - nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE, - NFC_HCI_EVT_END_OPERATION, NULL, 0); + if (hdev->ops->stop_poll) + hdev->ops->stop_poll(hdev); + else + nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE, + NFC_HCI_EVT_END_OPERATION, NULL, 0); } static int hci_dep_link_up(struct nfc_dev *nfc_dev, struct nfc_target *target, diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c index bec6ed15f503..a3ad69a4c648 100644 --- a/net/nfc/llcp_commands.c +++ b/net/nfc/llcp_commands.c @@ -387,7 +387,7 @@ int nfc_llcp_send_symm(struct nfc_dev *dev) __net_timestamp(skb); - nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_TX); + nfc_llcp_send_to_raw_sock(local, skb, NFC_DIRECTION_TX); return nfc_data_exchange(dev, local->target_idx, skb, nfc_llcp_recv, local); diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index b4671958fcf9..51e788797317 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -680,16 +680,17 @@ void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local, continue; if (skb_copy == NULL) { - skb_copy = __pskb_copy(skb, NFC_LLCP_RAW_HEADER_SIZE, - GFP_ATOMIC); + skb_copy = __pskb_copy_fclone(skb, NFC_RAW_HEADER_SIZE, + GFP_ATOMIC, true); if (skb_copy == NULL) continue; - data = skb_push(skb_copy, NFC_LLCP_RAW_HEADER_SIZE); + data = skb_push(skb_copy, NFC_RAW_HEADER_SIZE); data[0] = local->dev ? local->dev->idx : 0xFF; - data[1] = direction; + data[1] = direction & 0x01; + data[1] |= (RAW_PAYLOAD_LLCP << 1); } nskb = skb_clone(skb_copy, GFP_ATOMIC); @@ -747,7 +748,7 @@ static void nfc_llcp_tx_work(struct work_struct *work) __net_timestamp(skb); nfc_llcp_send_to_raw_sock(local, skb, - NFC_LLCP_DIRECTION_TX); + NFC_DIRECTION_TX); ret = nfc_data_exchange(local->dev, local->target_idx, skb, nfc_llcp_recv, local); @@ -1476,7 +1477,7 @@ static void nfc_llcp_rx_work(struct work_struct *work) __net_timestamp(skb); - nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_RX); + nfc_llcp_send_to_raw_sock(local, skb, NFC_DIRECTION_RX); nfc_llcp_rx_skb(local, skb); diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 6c34ac978501..2b400e1a8695 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -861,6 +861,10 @@ static int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb) /* Get rid of skb owner, prior to sending to the driver. */ skb_orphan(skb); + /* Send copy to sniffer */ + nfc_send_to_raw_sock(ndev->nfc_dev, skb, + RAW_PAYLOAD_NCI, NFC_DIRECTION_TX); + return ndev->ops->send(ndev, skb); } @@ -935,6 +939,11 @@ static void nci_rx_work(struct work_struct *work) struct sk_buff *skb; while ((skb = skb_dequeue(&ndev->rx_q))) { + + /* Send copy to sniffer */ + nfc_send_to_raw_sock(ndev->nfc_dev, skb, + RAW_PAYLOAD_NCI, NFC_DIRECTION_RX); + /* Process frame */ switch (nci_mt(skb->data)) { case NCI_MT_RSP_PKT: diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c index 1e905097456b..df91bb95b12a 100644 --- a/net/nfc/nci/ntf.c +++ b/net/nfc/nci/ntf.c @@ -166,7 +166,9 @@ static int nci_add_new_protocol(struct nci_dev *ndev, struct rf_tech_specific_params_nfcf_poll *nfcf_poll; __u32 protocol; - if (rf_protocol == NCI_RF_PROTOCOL_T2T) + if (rf_protocol == NCI_RF_PROTOCOL_T1T) + protocol = NFC_PROTO_JEWEL_MASK; + else if (rf_protocol == NCI_RF_PROTOCOL_T2T) protocol = NFC_PROTO_MIFARE_MASK; else if (rf_protocol == NCI_RF_PROTOCOL_ISO_DEP) if (rf_tech_and_mode == NCI_NFC_A_PASSIVE_POLL_MODE) @@ -366,7 +368,6 @@ static int nci_extract_activation_params_nfc_dep(struct nci_dev *ndev, struct nci_rf_intf_activated_ntf *ntf, __u8 *data) { struct activation_params_poll_nfc_dep *poll; - int i; switch (ntf->activation_rf_tech_and_mode) { case NCI_NFC_A_PASSIVE_POLL_MODE: @@ -374,10 +375,8 @@ static int nci_extract_activation_params_nfc_dep(struct nci_dev *ndev, poll = &ntf->activation_params.poll_nfc_dep; poll->atr_res_len = min_t(__u8, *data++, 63); pr_debug("atr_res_len %d\n", poll->atr_res_len); - if (poll->atr_res_len > 0) { - for (i = 0; i < poll->atr_res_len; i++) - poll->atr_res[poll->atr_res_len-1-i] = data[i]; - } + if (poll->atr_res_len > 0) + memcpy(poll->atr_res, data, poll->atr_res_len); break; default: diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index 9d6e74f7e6b3..88d60064890e 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -40,6 +40,12 @@ struct nfc_rawsock { struct work_struct tx_work; bool tx_work_scheduled; }; + +struct nfc_sock_list { + struct hlist_head head; + rwlock_t lock; +}; + #define nfc_rawsock(sk) ((struct nfc_rawsock *) sk) #define to_rawsock_sk(_tx_work) \ ((struct sock *) container_of(_tx_work, struct nfc_rawsock, tx_work)) diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index c27a6e86cae4..11c3544ea546 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -27,6 +27,24 @@ #include "nfc.h" +static struct nfc_sock_list raw_sk_list = { + .lock = __RW_LOCK_UNLOCKED(raw_sk_list.lock) +}; + +static void nfc_sock_link(struct nfc_sock_list *l, struct sock *sk) +{ + write_lock(&l->lock); + sk_add_node(sk, &l->head); + write_unlock(&l->lock); +} + +static void nfc_sock_unlink(struct nfc_sock_list *l, struct sock *sk) +{ + write_lock(&l->lock); + sk_del_node_init(sk); + write_unlock(&l->lock); +} + static void rawsock_write_queue_purge(struct sock *sk) { pr_debug("sk=%p\n", sk); @@ -57,6 +75,9 @@ static int rawsock_release(struct socket *sock) if (!sk) return 0; + if (sock->type == SOCK_RAW) + nfc_sock_unlink(&raw_sk_list, sk); + sock_orphan(sk); sock_put(sk); @@ -275,6 +296,26 @@ static const struct proto_ops rawsock_ops = { .mmap = sock_no_mmap, }; +static const struct proto_ops rawsock_raw_ops = { + .family = PF_NFC, + .owner = THIS_MODULE, + .release = rawsock_release, + .bind = sock_no_bind, + .connect = sock_no_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = sock_no_getname, + .poll = datagram_poll, + .ioctl = sock_no_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt, + .sendmsg = sock_no_sendmsg, + .recvmsg = rawsock_recvmsg, + .mmap = sock_no_mmap, +}; + static void rawsock_destruct(struct sock *sk) { pr_debug("sk=%p\n", sk); @@ -300,10 +341,13 @@ static int rawsock_create(struct net *net, struct socket *sock, pr_debug("sock=%p\n", sock); - if (sock->type != SOCK_SEQPACKET) + if ((sock->type != SOCK_SEQPACKET) && (sock->type != SOCK_RAW)) return -ESOCKTNOSUPPORT; - sock->ops = &rawsock_ops; + if (sock->type == SOCK_RAW) + sock->ops = &rawsock_raw_ops; + else + sock->ops = &rawsock_ops; sk = sk_alloc(net, PF_NFC, GFP_ATOMIC, nfc_proto->proto); if (!sk) @@ -313,13 +357,53 @@ static int rawsock_create(struct net *net, struct socket *sock, sk->sk_protocol = nfc_proto->id; sk->sk_destruct = rawsock_destruct; sock->state = SS_UNCONNECTED; - - INIT_WORK(&nfc_rawsock(sk)->tx_work, rawsock_tx_work); - nfc_rawsock(sk)->tx_work_scheduled = false; + if (sock->type == SOCK_RAW) + nfc_sock_link(&raw_sk_list, sk); + else { + INIT_WORK(&nfc_rawsock(sk)->tx_work, rawsock_tx_work); + nfc_rawsock(sk)->tx_work_scheduled = false; + } return 0; } +void nfc_send_to_raw_sock(struct nfc_dev *dev, struct sk_buff *skb, + u8 payload_type, u8 direction) +{ + struct sk_buff *skb_copy = NULL, *nskb; + struct sock *sk; + u8 *data; + + read_lock(&raw_sk_list.lock); + + sk_for_each(sk, &raw_sk_list.head) { + if (!skb_copy) { + skb_copy = __pskb_copy_fclone(skb, NFC_RAW_HEADER_SIZE, + GFP_ATOMIC, true); + if (!skb_copy) + continue; + + data = skb_push(skb_copy, NFC_RAW_HEADER_SIZE); + + data[0] = dev ? dev->idx : 0xFF; + data[1] = direction & 0x01; + data[1] |= (payload_type << 1); + } + + nskb = skb_clone(skb_copy, GFP_ATOMIC); + if (!nskb) + continue; + + if (sock_queue_rcv_skb(sk, nskb)) + kfree_skb(nskb); + } + + read_unlock(&raw_sk_list.lock); + + kfree_skb(skb_copy); +} +EXPORT_SYMBOL(nfc_send_to_raw_sock); + static struct proto rawsock_proto = { .name = "NFC_RAW", .owner = THIS_MODULE, diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 2c77e7b1a913..fe5cda0deb39 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -38,7 +38,7 @@ #include "vport.h" static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, - const struct nlattr *attr, int len, bool keep_skb); + const struct nlattr *attr, int len); static int make_writable(struct sk_buff *skb, int write_len) { @@ -134,8 +134,8 @@ static int set_eth_addr(struct sk_buff *skb, skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); - memcpy(eth_hdr(skb)->h_source, eth_key->eth_src, ETH_ALEN); - memcpy(eth_hdr(skb)->h_dest, eth_key->eth_dst, ETH_ALEN); + ether_addr_copy(eth_hdr(skb)->h_source, eth_key->eth_src); + ether_addr_copy(eth_hdr(skb)->h_dest, eth_key->eth_dst); ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); @@ -434,11 +434,17 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, return ovs_dp_upcall(dp, skb, &upcall); } +static bool last_action(const struct nlattr *a, int rem) +{ + return a->nla_len == rem; +} + static int sample(struct datapath *dp, struct sk_buff *skb, const struct nlattr *attr) { const struct nlattr *acts_list = NULL; const struct nlattr *a; + struct sk_buff *sample_skb; int rem; for (a = nla_data(attr), rem = nla_len(attr); rem > 0; @@ -455,8 +461,34 @@ static int sample(struct datapath *dp, struct sk_buff *skb, } } - return do_execute_actions(dp, skb, nla_data(acts_list), - nla_len(acts_list), true); + rem = nla_len(acts_list); + a = nla_data(acts_list); + + /* Actions list is either empty or only contains a single user-space + * action, the latter being a special case as it is the only known + * usage of the sample action. + * In these special cases don't clone the skb as there are no + * side-effects in the nested actions. + * Otherwise, clone in case the nested actions have side effects. + */ + if (likely(rem == 0 || (nla_type(a) == OVS_ACTION_ATTR_USERSPACE && + last_action(a, rem)))) { + sample_skb = skb; + skb_get(skb); + } else { + sample_skb = skb_clone(skb, GFP_ATOMIC); + if (!sample_skb) /* Skip sample action when out of memory. */ + return 0; + } + + /* Note that do_execute_actions() never consumes skb. + * In the case where skb has been cloned above it is the clone that + * is consumed. Otherwise the skb_get(skb) call prevents + * consumption by do_execute_actions(). Thus, it is safe to simply + * return the error code and let the caller (also + * do_execute_actions()) free skb on error. + */ + return do_execute_actions(dp, sample_skb, a, rem); } static int execute_set_action(struct sk_buff *skb, @@ -507,7 +539,7 @@ static int execute_set_action(struct sk_buff *skb, /* Execute a list of actions against 'skb'. */ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, - const struct nlattr *attr, int len, bool keep_skb) + const struct nlattr *attr, int len) { /* Every output action needs a separate clone of 'skb', but the common * case is just a single output action, so that doing a clone and @@ -551,6 +583,8 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, case OVS_ACTION_ATTR_SAMPLE: err = sample(dp, skb, a); + if (unlikely(err)) /* skb already freed. */ + return err; break; } @@ -560,12 +594,9 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, } } - if (prev_port != -1) { - if (keep_skb) - skb = skb_clone(skb, GFP_ATOMIC); - + if (prev_port != -1) do_output(dp, skb, prev_port); - } else if (!keep_skb) + else consume_skb(skb); return 0; @@ -577,6 +608,5 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb) struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts); OVS_CB(skb)->tun_key = NULL; - return do_execute_actions(dp, skb, acts->actions, - acts->actions_len, false); + return do_execute_actions(dp, skb, acts->actions, acts->actions_len); } diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index a3276e3c4feb..7ad3f029baae 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2013 Nicira, Inc. + * Copyright (c) 2007-2014 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -44,11 +44,11 @@ #include <linux/netfilter_ipv4.h> #include <linux/inetdevice.h> #include <linux/list.h> -#include <linux/lockdep.h> #include <linux/openvswitch.h> #include <linux/rculist.h> #include <linux/dmi.h> -#include <linux/workqueue.h> +#include <linux/genetlink.h> +#include <net/genetlink.h> #include <net/genetlink.h> #include <net/net_namespace.h> #include <net/netns/generic.h> @@ -62,6 +62,31 @@ int ovs_net_id __read_mostly; +static struct genl_family dp_packet_genl_family; +static struct genl_family dp_flow_genl_family; +static struct genl_family dp_datapath_genl_family; + +static const struct genl_multicast_group ovs_dp_flow_multicast_group = { + .name = OVS_FLOW_MCGROUP, +}; + +static const struct genl_multicast_group ovs_dp_datapath_multicast_group = { + .name = OVS_DATAPATH_MCGROUP, +}; + +static const struct genl_multicast_group ovs_dp_vport_multicast_group = { + .name = OVS_VPORT_MCGROUP, +}; + +/* Check if need to build a reply message. + * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */ +static bool ovs_must_notify(struct genl_info *info, + const struct genl_multicast_group *grp) +{ + return info->nlhdr->nlmsg_flags & NLM_F_ECHO || + netlink_has_listeners(genl_info_net(info)->genl_sock, 0); +} + static void ovs_notify(struct genl_family *family, struct sk_buff *skb, struct genl_info *info) { @@ -173,6 +198,7 @@ static struct hlist_head *vport_hash_bucket(const struct datapath *dp, return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)]; } +/* Called with ovs_mutex or RCU read lock. */ struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no) { struct vport *vport; @@ -240,7 +266,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) upcall.cmd = OVS_PACKET_CMD_MISS; upcall.key = &key; upcall.userdata = NULL; - upcall.portid = p->upcall_portid; + upcall.portid = ovs_vport_find_upcall_portid(p, skb); ovs_dp_upcall(dp, skb, &upcall); consume_skb(skb); stats_counter = &stats->n_missed; @@ -250,7 +276,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) OVS_CB(skb)->flow = flow; OVS_CB(skb)->pkt_key = &key; - ovs_flow_stats_update(OVS_CB(skb)->flow, skb); + ovs_flow_stats_update(OVS_CB(skb)->flow, key.tp.flags, skb); ovs_execute_actions(dp, skb); stats_counter = &stats->n_hit; @@ -262,16 +288,6 @@ out: u64_stats_update_end(&stats->syncp); } -static struct genl_family dp_packet_genl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = sizeof(struct ovs_header), - .name = OVS_PACKET_FAMILY, - .version = OVS_PACKET_VERSION, - .maxattr = OVS_PACKET_ATTR_MAX, - .netnsok = true, - .parallel_ops = true, -}; - int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info) { @@ -448,7 +464,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, upcall->dp_ifindex = dp_ifindex; nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY); - ovs_nla_put_flow(upcall_info->key, upcall_info->key, user_skb); + err = ovs_nla_put_flow(upcall_info->key, upcall_info->key, user_skb); + BUG_ON(err); nla_nest_end(user_skb, nla); if (upcall_info->userdata) @@ -524,7 +541,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) packet->protocol = htons(ETH_P_802_2); /* Build an sw_flow for sending this packet. */ - flow = ovs_flow_alloc(false); + flow = ovs_flow_alloc(); err = PTR_ERR(flow); if (IS_ERR(flow)) goto err_kfree_skb; @@ -590,6 +607,18 @@ static const struct genl_ops dp_packet_genl_ops[] = { } }; +static struct genl_family dp_packet_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = sizeof(struct ovs_header), + .name = OVS_PACKET_FAMILY, + .version = OVS_PACKET_VERSION, + .maxattr = OVS_PACKET_ATTR_MAX, + .netnsok = true, + .parallel_ops = true, + .ops = dp_packet_genl_ops, + .n_ops = ARRAY_SIZE(dp_packet_genl_ops), +}; + static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats, struct ovs_dp_megaflow_stats *mega_stats) { @@ -621,26 +650,6 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats, } } -static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { - [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, - [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, - [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, -}; - -static struct genl_family dp_flow_genl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = sizeof(struct ovs_header), - .name = OVS_FLOW_FAMILY, - .version = OVS_FLOW_VERSION, - .maxattr = OVS_FLOW_ATTR_MAX, - .netnsok = true, - .parallel_ops = true, -}; - -static struct genl_multicast_group ovs_dp_flow_multicast_group = { - .name = OVS_FLOW_MCGROUP -}; - static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) { return NLMSG_ALIGN(sizeof(struct ovs_header)) @@ -652,8 +661,8 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */ } -/* Called with ovs_mutex. */ -static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, +/* Called with ovs_mutex or RCU read lock. */ +static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, struct sk_buff *skb, u32 portid, u32 seq, u32 flags, u8 cmd) { @@ -670,7 +679,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, if (!ovs_header) return -EMSGSIZE; - ovs_header->dp_ifindex = get_dpifindex(dp); + ovs_header->dp_ifindex = dp_ifindex; /* Fill flow key. */ nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY); @@ -693,6 +702,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, nla_nest_end(skb, nla); ovs_flow_stats_get(flow, &stats, &used, &tcp_flags); + if (used && nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used))) goto nla_put_failure; @@ -720,9 +730,9 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, const struct sw_flow_actions *sf_acts; sf_acts = rcu_dereference_ovsl(flow->sf_acts); - err = ovs_nla_put_actions(sf_acts->actions, sf_acts->actions_len, skb); + if (!err) nla_nest_end(skb, start); else { @@ -743,113 +753,128 @@ error: return err; } -static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow, - struct genl_info *info) +/* May not be called with RCU read lock. */ +static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts, + struct genl_info *info, + bool always) { - size_t len; + struct sk_buff *skb; - len = ovs_flow_cmd_msg_size(ovsl_dereference(flow->sf_acts)); + if (!always && !ovs_must_notify(info, &ovs_dp_flow_multicast_group)) + return NULL; - return genlmsg_new_unicast(len, info, GFP_KERNEL); + skb = genlmsg_new_unicast(ovs_flow_cmd_msg_size(acts), info, GFP_KERNEL); + if (!skb) + return ERR_PTR(-ENOMEM); + + return skb; } -static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, - struct datapath *dp, - struct genl_info *info, - u8 cmd) +/* Called with ovs_mutex. */ +static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow, + int dp_ifindex, + struct genl_info *info, u8 cmd, + bool always) { struct sk_buff *skb; int retval; - skb = ovs_flow_cmd_alloc_info(flow, info); - if (!skb) - return ERR_PTR(-ENOMEM); + skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info, + always); + if (IS_ERR_OR_NULL(skb)) + return skb; - retval = ovs_flow_cmd_fill_info(flow, dp, skb, info->snd_portid, - info->snd_seq, 0, cmd); + retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb, + info->snd_portid, info->snd_seq, 0, + cmd); BUG_ON(retval < 0); return skb; } -static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) +static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct ovs_header *ovs_header = info->userhdr; - struct sw_flow_key key, masked_key; - struct sw_flow *flow = NULL; + struct sw_flow *flow, *new_flow; struct sw_flow_mask mask; struct sk_buff *reply; struct datapath *dp; - struct sw_flow_actions *acts = NULL; + struct sw_flow_actions *acts; struct sw_flow_match match; - bool exact_5tuple; int error; - /* Extract key. */ + /* Must have key and actions. */ error = -EINVAL; if (!a[OVS_FLOW_ATTR_KEY]) goto error; + if (!a[OVS_FLOW_ATTR_ACTIONS]) + goto error; - ovs_match_init(&match, &key, &mask); - error = ovs_nla_get_match(&match, &exact_5tuple, + /* Most of the time we need to allocate a new flow, do it before + * locking. + */ + new_flow = ovs_flow_alloc(); + if (IS_ERR(new_flow)) { + error = PTR_ERR(new_flow); + goto error; + } + + /* Extract key. */ + ovs_match_init(&match, &new_flow->unmasked_key, &mask); + error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); if (error) - goto error; + goto err_kfree_flow; + + ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask); /* Validate actions. */ - if (a[OVS_FLOW_ATTR_ACTIONS]) { - acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); - error = PTR_ERR(acts); - if (IS_ERR(acts)) - goto error; + acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); + error = PTR_ERR(acts); + if (IS_ERR(acts)) + goto err_kfree_flow; - ovs_flow_mask_key(&masked_key, &key, &mask); - error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], - &masked_key, 0, &acts); - if (error) { - OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); - goto err_kfree; - } - } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) { - error = -EINVAL; - goto error; + error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, + 0, &acts); + if (error) { + OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); + goto err_kfree_acts; + } + + reply = ovs_flow_cmd_alloc_info(acts, info, false); + if (IS_ERR(reply)) { + error = PTR_ERR(reply); + goto err_kfree_acts; } ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); - error = -ENODEV; - if (!dp) + if (unlikely(!dp)) { + error = -ENODEV; goto err_unlock_ovs; - + } /* Check if this is a duplicate flow */ - flow = ovs_flow_tbl_lookup(&dp->table, &key); - if (!flow) { - /* Bail out if we're not allowed to create a new flow. */ - error = -ENOENT; - if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) - goto err_unlock_ovs; - - /* Allocate flow. */ - flow = ovs_flow_alloc(!exact_5tuple); - if (IS_ERR(flow)) { - error = PTR_ERR(flow); - goto err_unlock_ovs; - } - - flow->key = masked_key; - flow->unmasked_key = key; - rcu_assign_pointer(flow->sf_acts, acts); + flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->unmasked_key); + if (likely(!flow)) { + rcu_assign_pointer(new_flow->sf_acts, acts); /* Put flow in bucket. */ - error = ovs_flow_tbl_insert(&dp->table, flow, &mask); - if (error) { + error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask); + if (unlikely(error)) { acts = NULL; - goto err_flow_free; + goto err_unlock_ovs; } - reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); + if (unlikely(reply)) { + error = ovs_flow_cmd_fill_info(new_flow, + ovs_header->dp_ifindex, + reply, info->snd_portid, + info->snd_seq, 0, + OVS_FLOW_CMD_NEW); + BUG_ON(error < 0); + } + ovs_unlock(); } else { - /* We found a matching flow. */ struct sw_flow_actions *old_acts; /* Bail out if we're not allowed to modify an existing flow. @@ -858,40 +883,153 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) * request. We also accept NLM_F_EXCL in case that bug ever * gets fixed. */ - error = -EEXIST; - if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW && - info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) + if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE + | NLM_F_EXCL))) { + error = -EEXIST; goto err_unlock_ovs; - + } /* The unmasked key has to be the same for flow updates. */ - if (!ovs_flow_cmp_unmasked_key(flow, &match)) - goto err_unlock_ovs; - + if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) { + flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); + if (!flow) { + error = -ENOENT; + goto err_unlock_ovs; + } + } /* Update actions. */ old_acts = ovsl_dereference(flow->sf_acts); rcu_assign_pointer(flow->sf_acts, acts); + + if (unlikely(reply)) { + error = ovs_flow_cmd_fill_info(flow, + ovs_header->dp_ifindex, + reply, info->snd_portid, + info->snd_seq, 0, + OVS_FLOW_CMD_NEW); + BUG_ON(error < 0); + } + ovs_unlock(); + ovs_nla_free_flow_actions(old_acts); + ovs_flow_free(new_flow, false); + } + + if (reply) + ovs_notify(&dp_flow_genl_family, reply, info); + return 0; + +err_unlock_ovs: + ovs_unlock(); + kfree_skb(reply); +err_kfree_acts: + kfree(acts); +err_kfree_flow: + ovs_flow_free(new_flow, false); +error: + return error; +} + +static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr **a = info->attrs; + struct ovs_header *ovs_header = info->userhdr; + struct sw_flow_key key, masked_key; + struct sw_flow *flow; + struct sw_flow_mask mask; + struct sk_buff *reply = NULL; + struct datapath *dp; + struct sw_flow_actions *old_acts = NULL, *acts = NULL; + struct sw_flow_match match; + int error; + + /* Extract key. */ + error = -EINVAL; + if (!a[OVS_FLOW_ATTR_KEY]) + goto error; + + ovs_match_init(&match, &key, &mask); + error = ovs_nla_get_match(&match, + a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); + if (error) + goto error; + + /* Validate actions. */ + if (a[OVS_FLOW_ATTR_ACTIONS]) { + acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); + error = PTR_ERR(acts); + if (IS_ERR(acts)) + goto error; + + ovs_flow_mask_key(&masked_key, &key, &mask); + error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], + &masked_key, 0, &acts); + if (error) { + OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); + goto err_kfree_acts; + } + } + + /* Can allocate before locking if have acts. */ + if (acts) { + reply = ovs_flow_cmd_alloc_info(acts, info, false); + if (IS_ERR(reply)) { + error = PTR_ERR(reply); + goto err_kfree_acts; + } + } + + ovs_lock(); + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); + if (unlikely(!dp)) { + error = -ENODEV; + goto err_unlock_ovs; + } + /* Check that the flow exists. */ + flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); + if (unlikely(!flow)) { + error = -ENOENT; + goto err_unlock_ovs; + } - reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); + /* Update actions, if present. */ + if (likely(acts)) { + old_acts = ovsl_dereference(flow->sf_acts); + rcu_assign_pointer(flow->sf_acts, acts); - /* Clear stats. */ - if (a[OVS_FLOW_ATTR_CLEAR]) - ovs_flow_stats_clear(flow); + if (unlikely(reply)) { + error = ovs_flow_cmd_fill_info(flow, + ovs_header->dp_ifindex, + reply, info->snd_portid, + info->snd_seq, 0, + OVS_FLOW_CMD_NEW); + BUG_ON(error < 0); + } + } else { + /* Could not alloc without acts before locking. */ + reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, + info, OVS_FLOW_CMD_NEW, false); + if (unlikely(IS_ERR(reply))) { + error = PTR_ERR(reply); + goto err_unlock_ovs; + } } + + /* Clear stats. */ + if (a[OVS_FLOW_ATTR_CLEAR]) + ovs_flow_stats_clear(flow); ovs_unlock(); - if (!IS_ERR(reply)) + if (reply) ovs_notify(&dp_flow_genl_family, reply, info); - else - genl_set_err(&dp_flow_genl_family, sock_net(skb->sk), 0, - 0, PTR_ERR(reply)); + if (old_acts) + ovs_nla_free_flow_actions(old_acts); + return 0; -err_flow_free: - ovs_flow_free(flow, false); err_unlock_ovs: ovs_unlock(); -err_kfree: + kfree_skb(reply); +err_kfree_acts: kfree(acts); error: return error; @@ -914,7 +1052,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) } ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL); + err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); if (err) return err; @@ -925,13 +1063,14 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) goto unlock; } - flow = ovs_flow_tbl_lookup(&dp->table, &key); - if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) { + flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); + if (!flow) { err = -ENOENT; goto unlock; } - reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); + reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info, + OVS_FLOW_CMD_NEW, true); if (IS_ERR(reply)) { err = PTR_ERR(reply); goto unlock; @@ -955,45 +1094,53 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) struct sw_flow_match match; int err; + if (likely(a[OVS_FLOW_ATTR_KEY])) { + ovs_match_init(&match, &key, NULL); + err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); + if (unlikely(err)) + return err; + } + ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); - if (!dp) { + if (unlikely(!dp)) { err = -ENODEV; goto unlock; } - if (!a[OVS_FLOW_ATTR_KEY]) { + if (unlikely(!a[OVS_FLOW_ATTR_KEY])) { err = ovs_flow_tbl_flush(&dp->table); goto unlock; } - ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL); - if (err) - goto unlock; - - flow = ovs_flow_tbl_lookup(&dp->table, &key); - if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) { + flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); + if (unlikely(!flow)) { err = -ENOENT; goto unlock; } - reply = ovs_flow_cmd_alloc_info(flow, info); - if (!reply) { - err = -ENOMEM; - goto unlock; - } - ovs_flow_tbl_remove(&dp->table, flow); + ovs_unlock(); - err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, - info->snd_seq, 0, OVS_FLOW_CMD_DEL); - BUG_ON(err < 0); + reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts, + info, false); + if (likely(reply)) { + if (likely(!IS_ERR(reply))) { + rcu_read_lock(); /*To keep RCU checker happy. */ + err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, + reply, info->snd_portid, + info->snd_seq, 0, + OVS_FLOW_CMD_DEL); + rcu_read_unlock(); + BUG_ON(err < 0); + + ovs_notify(&dp_flow_genl_family, reply, info); + } else { + netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, PTR_ERR(reply)); + } + } ovs_flow_free(flow, true); - ovs_unlock(); - - ovs_notify(&dp_flow_genl_family, reply, info); return 0; unlock: ovs_unlock(); @@ -1024,7 +1171,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!flow) break; - if (ovs_flow_cmd_fill_info(flow, dp, skb, + if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, OVS_FLOW_CMD_NEW) < 0) @@ -1037,11 +1184,17 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } +static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { + [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, + [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, + [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, +}; + static const struct genl_ops dp_flow_genl_ops[] = { { .cmd = OVS_FLOW_CMD_NEW, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = flow_policy, - .doit = ovs_flow_cmd_new_or_set + .doit = ovs_flow_cmd_new }, { .cmd = OVS_FLOW_CMD_DEL, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ @@ -1057,28 +1210,22 @@ static const struct genl_ops dp_flow_genl_ops[] = { { .cmd = OVS_FLOW_CMD_SET, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = flow_policy, - .doit = ovs_flow_cmd_new_or_set, + .doit = ovs_flow_cmd_set, }, }; -static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { - [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, - [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, - [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 }, -}; - -static struct genl_family dp_datapath_genl_family = { +static struct genl_family dp_flow_genl_family = { .id = GENL_ID_GENERATE, .hdrsize = sizeof(struct ovs_header), - .name = OVS_DATAPATH_FAMILY, - .version = OVS_DATAPATH_VERSION, - .maxattr = OVS_DP_ATTR_MAX, + .name = OVS_FLOW_FAMILY, + .version = OVS_FLOW_VERSION, + .maxattr = OVS_FLOW_ATTR_MAX, .netnsok = true, .parallel_ops = true, -}; - -static struct genl_multicast_group ovs_dp_datapath_multicast_group = { - .name = OVS_DATAPATH_MCGROUP + .ops = dp_flow_genl_ops, + .n_ops = ARRAY_SIZE(dp_flow_genl_ops), + .mcgrps = &ovs_dp_flow_multicast_group, + .n_mcgrps = 1, }; static size_t ovs_dp_cmd_msg_size(void) @@ -1093,6 +1240,7 @@ static size_t ovs_dp_cmd_msg_size(void) return msgsize; } +/* Called with ovs_mutex or RCU read lock. */ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, u32 portid, u32 seq, u32 flags, u8 cmd) { @@ -1108,9 +1256,7 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, ovs_header->dp_ifindex = get_dpifindex(dp); - rcu_read_lock(); err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp)); - rcu_read_unlock(); if (err) goto nla_put_failure; @@ -1135,25 +1281,12 @@ error: return -EMSGSIZE; } -static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, - struct genl_info *info, u8 cmd) +static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info) { - struct sk_buff *skb; - int retval; - - skb = genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL); - if (!skb) - return ERR_PTR(-ENOMEM); - - retval = ovs_dp_cmd_fill_info(dp, skb, info->snd_portid, info->snd_seq, 0, cmd); - if (retval < 0) { - kfree_skb(skb); - return ERR_PTR(retval); - } - return skb; + return genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL); } -/* Called with ovs_mutex. */ +/* Called with rcu_read_lock or ovs_mutex. */ static struct datapath *lookup_datapath(struct net *net, struct ovs_header *ovs_header, struct nlattr *a[OVS_DP_ATTR_MAX + 1]) @@ -1165,10 +1298,8 @@ static struct datapath *lookup_datapath(struct net *net, else { struct vport *vport; - rcu_read_lock(); vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME])); dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL; - rcu_read_unlock(); } return dp ? dp : ERR_PTR(-ENODEV); } @@ -1205,12 +1336,14 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) goto err; - ovs_lock(); + reply = ovs_dp_cmd_alloc_info(info); + if (!reply) + return -ENOMEM; err = -ENOMEM; dp = kzalloc(sizeof(*dp), GFP_KERNEL); if (dp == NULL) - goto err_unlock_ovs; + goto err_free_reply; ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); @@ -1241,10 +1374,13 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) parms.options = NULL; parms.dp = dp; parms.port_no = OVSP_LOCAL; - parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]); + parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID]; ovs_dp_change(dp, a); + /* So far only local changes have been made, now need the lock. */ + ovs_lock(); + vport = new_vport(&parms); if (IS_ERR(vport)) { err = PTR_ERR(vport); @@ -1263,10 +1399,9 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_destroy_ports_array; } - reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW); - err = PTR_ERR(reply); - if (IS_ERR(reply)) - goto err_destroy_local_port; + err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, + info->snd_seq, 0, OVS_DP_CMD_NEW); + BUG_ON(err < 0); ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); list_add_tail_rcu(&dp->list_node, &ovs_net->dps); @@ -1276,9 +1411,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_notify(&dp_datapath_genl_family, reply, info); return 0; -err_destroy_local_port: - ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL)); err_destroy_ports_array: + ovs_unlock(); kfree(dp->ports); err_destroy_percpu: free_percpu(dp->stats_percpu); @@ -1287,8 +1421,8 @@ err_destroy_table: err_free_dp: release_net(ovs_dp_get_net(dp)); kfree(dp); -err_unlock_ovs: - ovs_unlock(); +err_free_reply: + kfree_skb(reply); err: return err; } @@ -1326,16 +1460,19 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; int err; + reply = ovs_dp_cmd_alloc_info(info); + if (!reply) + return -ENOMEM; + ovs_lock(); dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); err = PTR_ERR(dp); if (IS_ERR(dp)) - goto unlock; + goto err_unlock_free; - reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_DEL); - err = PTR_ERR(reply); - if (IS_ERR(reply)) - goto unlock; + err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, + info->snd_seq, 0, OVS_DP_CMD_DEL); + BUG_ON(err < 0); __dp_destroy(dp); ovs_unlock(); @@ -1343,8 +1480,10 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) ovs_notify(&dp_datapath_genl_family, reply, info); return 0; -unlock: + +err_unlock_free: ovs_unlock(); + kfree_skb(reply); return err; } @@ -1354,29 +1493,30 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; int err; + reply = ovs_dp_cmd_alloc_info(info); + if (!reply) + return -ENOMEM; + ovs_lock(); dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); err = PTR_ERR(dp); if (IS_ERR(dp)) - goto unlock; + goto err_unlock_free; ovs_dp_change(dp, info->attrs); - reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW); - if (IS_ERR(reply)) { - err = PTR_ERR(reply); - genl_set_err(&dp_datapath_genl_family, sock_net(skb->sk), 0, - 0, err); - err = 0; - goto unlock; - } + err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, + info->snd_seq, 0, OVS_DP_CMD_NEW); + BUG_ON(err < 0); ovs_unlock(); ovs_notify(&dp_datapath_genl_family, reply, info); return 0; -unlock: + +err_unlock_free: ovs_unlock(); + kfree_skb(reply); return err; } @@ -1386,24 +1526,26 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; int err; - ovs_lock(); + reply = ovs_dp_cmd_alloc_info(info); + if (!reply) + return -ENOMEM; + + rcu_read_lock(); dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); if (IS_ERR(dp)) { err = PTR_ERR(dp); - goto unlock; - } - - reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW); - if (IS_ERR(reply)) { - err = PTR_ERR(reply); - goto unlock; + goto err_unlock_free; } + err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, + info->snd_seq, 0, OVS_DP_CMD_NEW); + BUG_ON(err < 0); + rcu_read_unlock(); - ovs_unlock(); return genlmsg_reply(reply, info); -unlock: - ovs_unlock(); +err_unlock_free: + rcu_read_unlock(); + kfree_skb(reply); return err; } @@ -1430,6 +1572,12 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } +static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { + [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, + [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, + [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 }, +}; + static const struct genl_ops dp_datapath_genl_ops[] = { { .cmd = OVS_DP_CMD_NEW, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ @@ -1454,27 +1602,18 @@ static const struct genl_ops dp_datapath_genl_ops[] = { }, }; -static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { - [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, - [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) }, - [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 }, - [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, - [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 }, - [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, -}; - -struct genl_family dp_vport_genl_family = { +static struct genl_family dp_datapath_genl_family = { .id = GENL_ID_GENERATE, .hdrsize = sizeof(struct ovs_header), - .name = OVS_VPORT_FAMILY, - .version = OVS_VPORT_VERSION, - .maxattr = OVS_VPORT_ATTR_MAX, + .name = OVS_DATAPATH_FAMILY, + .version = OVS_DATAPATH_VERSION, + .maxattr = OVS_DP_ATTR_MAX, .netnsok = true, .parallel_ops = true, -}; - -static struct genl_multicast_group ovs_dp_vport_multicast_group = { - .name = OVS_VPORT_MCGROUP + .ops = dp_datapath_genl_ops, + .n_ops = ARRAY_SIZE(dp_datapath_genl_ops), + .mcgrps = &ovs_dp_datapath_multicast_group, + .n_mcgrps = 1, }; /* Called with ovs_mutex or RCU read lock. */ @@ -1494,8 +1633,8 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) || nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) || - nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) || - nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid)) + nla_put_string(skb, OVS_VPORT_ATTR_NAME, + vport->ops->get_name(vport))) goto nla_put_failure; ovs_vport_get_stats(vport, &vport_stats); @@ -1503,6 +1642,9 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, &vport_stats)) goto nla_put_failure; + if (ovs_vport_get_upcall_portids(vport, skb)) + goto nla_put_failure; + err = ovs_vport_get_options(vport, skb); if (err == -EMSGSIZE) goto error; @@ -1516,7 +1658,12 @@ error: return err; } -/* Called with ovs_mutex or RCU read lock. */ +static struct sk_buff *ovs_vport_cmd_alloc_info(void) +{ + return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); +} + +/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, u32 seq, u8 cmd) { @@ -1578,33 +1725,35 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) u32 port_no; int err; - err = -EINVAL; if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] || !a[OVS_VPORT_ATTR_UPCALL_PID]) - goto exit; + return -EINVAL; + + port_no = a[OVS_VPORT_ATTR_PORT_NO] + ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0; + if (port_no >= DP_MAX_PORTS) + return -EFBIG; + + reply = ovs_vport_cmd_alloc_info(); + if (!reply) + return -ENOMEM; ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); err = -ENODEV; if (!dp) - goto exit_unlock; - - if (a[OVS_VPORT_ATTR_PORT_NO]) { - port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]); - - err = -EFBIG; - if (port_no >= DP_MAX_PORTS) - goto exit_unlock; + goto exit_unlock_free; + if (port_no) { vport = ovs_vport_ovsl(dp, port_no); err = -EBUSY; if (vport) - goto exit_unlock; + goto exit_unlock_free; } else { for (port_no = 1; ; port_no++) { if (port_no >= DP_MAX_PORTS) { err = -EFBIG; - goto exit_unlock; + goto exit_unlock_free; } vport = ovs_vport_ovsl(dp, port_no); if (!vport) @@ -1617,27 +1766,24 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) parms.options = a[OVS_VPORT_ATTR_OPTIONS]; parms.dp = dp; parms.port_no = port_no; - parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); + parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID]; vport = new_vport(&parms); err = PTR_ERR(vport); if (IS_ERR(vport)) - goto exit_unlock; + goto exit_unlock_free; - err = 0; - reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, - OVS_VPORT_CMD_NEW); - if (IS_ERR(reply)) { - err = PTR_ERR(reply); - ovs_dp_detach_port(vport); - goto exit_unlock; - } + err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, + info->snd_seq, 0, OVS_VPORT_CMD_NEW); + BUG_ON(err < 0); + ovs_unlock(); ovs_notify(&dp_vport_genl_family, reply, info); + return 0; -exit_unlock: +exit_unlock_free: ovs_unlock(); -exit: + kfree_skb(reply); return err; } @@ -1648,32 +1794,36 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) struct vport *vport; int err; + reply = ovs_vport_cmd_alloc_info(); + if (!reply) + return -ENOMEM; + ovs_lock(); vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); err = PTR_ERR(vport); if (IS_ERR(vport)) - goto exit_unlock; + goto exit_unlock_free; if (a[OVS_VPORT_ATTR_TYPE] && nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) { err = -EINVAL; - goto exit_unlock; - } - - reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!reply) { - err = -ENOMEM; - goto exit_unlock; + goto exit_unlock_free; } if (a[OVS_VPORT_ATTR_OPTIONS]) { err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]); if (err) - goto exit_free; + goto exit_unlock_free; } - if (a[OVS_VPORT_ATTR_UPCALL_PID]) - vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); + + if (a[OVS_VPORT_ATTR_UPCALL_PID]) { + struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID]; + + err = ovs_vport_set_upcall_portids(vport, ids); + if (err) + goto exit_unlock_free; + } err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, info->snd_seq, 0, OVS_VPORT_CMD_NEW); @@ -1683,10 +1833,9 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) ovs_notify(&dp_vport_genl_family, reply, info); return 0; -exit_free: - kfree_skb(reply); -exit_unlock: +exit_unlock_free: ovs_unlock(); + kfree_skb(reply); return err; } @@ -1697,30 +1846,33 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) struct vport *vport; int err; + reply = ovs_vport_cmd_alloc_info(); + if (!reply) + return -ENOMEM; + ovs_lock(); vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); err = PTR_ERR(vport); if (IS_ERR(vport)) - goto exit_unlock; + goto exit_unlock_free; if (vport->port_no == OVSP_LOCAL) { err = -EINVAL; - goto exit_unlock; + goto exit_unlock_free; } - reply = ovs_vport_cmd_build_info(vport, info->snd_portid, - info->snd_seq, OVS_VPORT_CMD_DEL); - err = PTR_ERR(reply); - if (IS_ERR(reply)) - goto exit_unlock; - - err = 0; + err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, + info->snd_seq, 0, OVS_VPORT_CMD_DEL); + BUG_ON(err < 0); ovs_dp_detach_port(vport); + ovs_unlock(); ovs_notify(&dp_vport_genl_family, reply, info); + return 0; -exit_unlock: +exit_unlock_free: ovs_unlock(); + kfree_skb(reply); return err; } @@ -1732,24 +1884,25 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) struct vport *vport; int err; + reply = ovs_vport_cmd_alloc_info(); + if (!reply) + return -ENOMEM; + rcu_read_lock(); vport = lookup_vport(sock_net(skb->sk), ovs_header, a); err = PTR_ERR(vport); if (IS_ERR(vport)) - goto exit_unlock; - - reply = ovs_vport_cmd_build_info(vport, info->snd_portid, - info->snd_seq, OVS_VPORT_CMD_NEW); - err = PTR_ERR(reply); - if (IS_ERR(reply)) - goto exit_unlock; - + goto exit_unlock_free; + err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, + info->snd_seq, 0, OVS_VPORT_CMD_NEW); + BUG_ON(err < 0); rcu_read_unlock(); return genlmsg_reply(reply, info); -exit_unlock: +exit_unlock_free: rcu_read_unlock(); + kfree_skb(reply); return err; } @@ -1792,6 +1945,15 @@ out: return skb->len; } +static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { + [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, + [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) }, + [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 }, + [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, + [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 }, + [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, +}; + static const struct genl_ops dp_vport_genl_ops[] = { { .cmd = OVS_VPORT_CMD_NEW, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ @@ -1816,26 +1978,25 @@ static const struct genl_ops dp_vport_genl_ops[] = { }, }; -struct genl_family_and_ops { - struct genl_family *family; - const struct genl_ops *ops; - int n_ops; - const struct genl_multicast_group *group; +struct genl_family dp_vport_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = sizeof(struct ovs_header), + .name = OVS_VPORT_FAMILY, + .version = OVS_VPORT_VERSION, + .maxattr = OVS_VPORT_ATTR_MAX, + .netnsok = true, + .parallel_ops = true, + .ops = dp_vport_genl_ops, + .n_ops = ARRAY_SIZE(dp_vport_genl_ops), + .mcgrps = &ovs_dp_vport_multicast_group, + .n_mcgrps = 1, }; -static const struct genl_family_and_ops dp_genl_families[] = { - { &dp_datapath_genl_family, - dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops), - &ovs_dp_datapath_multicast_group }, - { &dp_vport_genl_family, - dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops), - &ovs_dp_vport_multicast_group }, - { &dp_flow_genl_family, - dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops), - &ovs_dp_flow_multicast_group }, - { &dp_packet_genl_family, - dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops), - NULL }, +static struct genl_family * const dp_genl_families[] = { + &dp_datapath_genl_family, + &dp_vport_genl_family, + &dp_flow_genl_family, + &dp_packet_genl_family, }; static void dp_unregister_genl(int n_families) @@ -1843,33 +2004,25 @@ static void dp_unregister_genl(int n_families) int i; for (i = 0; i < n_families; i++) - genl_unregister_family(dp_genl_families[i].family); + genl_unregister_family(dp_genl_families[i]); } static int dp_register_genl(void) { - int n_registered; int err; int i; - n_registered = 0; for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) { - const struct genl_family_and_ops *f = &dp_genl_families[i]; - f->family->ops = f->ops; - f->family->n_ops = f->n_ops; - f->family->mcgrps = f->group; - f->family->n_mcgrps = f->group ? 1 : 0; - err = genl_register_family(f->family); + err = genl_register_family(dp_genl_families[i]); if (err) goto error; - n_registered++; } return 0; error: - dp_unregister_genl(n_registered); + dp_unregister_genl(i); return err; } @@ -1910,10 +2063,14 @@ static int __init dp_init(void) pr_info("Open vSwitch switching datapath\n"); - err = ovs_flow_init(); + err = ovs_internal_dev_rtnl_link_register(); if (err) goto error; + err = ovs_flow_init(); + if (err) + goto error_unreg_rtnl_link; + err = ovs_vport_init(); if (err) goto error_flow_exit; @@ -1940,6 +2097,8 @@ error_vport_exit: ovs_vport_exit(); error_flow_exit: ovs_flow_exit(); +error_unreg_rtnl_link: + ovs_internal_dev_rtnl_link_unregister(); error: return err; } @@ -1952,6 +2111,7 @@ static void dp_cleanup(void) rcu_barrier(); ovs_vport_exit(); ovs_flow_exit(); + ovs_internal_dev_rtnl_link_unregister(); } module_init(dp_init); diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 05317380fc03..701b5738c38a 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -144,7 +144,7 @@ int lockdep_ovsl_is_held(void); #define lockdep_ovsl_is_held() 1 #endif -#define ASSERT_OVSL() WARN_ON(unlikely(!lockdep_ovsl_is_held())) +#define ASSERT_OVSL() WARN_ON(!lockdep_ovsl_is_held()) #define ovsl_dereference(p) \ rcu_dereference_protected(p, lockdep_ovsl_is_held()) #define rcu_dereference_ovsl(p) \ @@ -194,7 +194,9 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq, int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb); void ovs_dp_notify_wq(struct work_struct *work); -#define OVS_NLERR(fmt, ...) \ - pr_info_once("netlink: " fmt, ##__VA_ARGS__) - +#define OVS_NLERR(fmt, ...) \ +do { \ + if (net_ratelimit()) \ + pr_info("netlink: " fmt, ##__VA_ARGS__); \ +} while (0) #endif /* datapath.h */ diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 2998989e76db..d07ab538fc9d 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -61,91 +61,113 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies) #define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF)) -void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb) +void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, + struct sk_buff *skb) { struct flow_stats *stats; - __be16 tcp_flags = 0; - - if (!flow->stats.is_percpu) - stats = flow->stats.stat; - else - stats = this_cpu_ptr(flow->stats.cpu_stats); - - if ((flow->key.eth.type == htons(ETH_P_IP) || - flow->key.eth.type == htons(ETH_P_IPV6)) && - flow->key.ip.frag != OVS_FRAG_TYPE_LATER && - flow->key.ip.proto == IPPROTO_TCP && - likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) { - tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb)); + int node = numa_node_id(); + + stats = rcu_dereference(flow->stats[node]); + + /* Check if already have node-specific stats. */ + if (likely(stats)) { + spin_lock(&stats->lock); + /* Mark if we write on the pre-allocated stats. */ + if (node == 0 && unlikely(flow->stats_last_writer != node)) + flow->stats_last_writer = node; + } else { + stats = rcu_dereference(flow->stats[0]); /* Pre-allocated. */ + spin_lock(&stats->lock); + + /* If the current NUMA-node is the only writer on the + * pre-allocated stats keep using them. + */ + if (unlikely(flow->stats_last_writer != node)) { + /* A previous locker may have already allocated the + * stats, so we need to check again. If node-specific + * stats were already allocated, we update the pre- + * allocated stats as we have already locked them. + */ + if (likely(flow->stats_last_writer != NUMA_NO_NODE) + && likely(!rcu_dereference(flow->stats[node]))) { + /* Try to allocate node-specific stats. */ + struct flow_stats *new_stats; + + new_stats = + kmem_cache_alloc_node(flow_stats_cache, + GFP_THISNODE | + __GFP_NOMEMALLOC, + node); + if (likely(new_stats)) { + new_stats->used = jiffies; + new_stats->packet_count = 1; + new_stats->byte_count = skb->len; + new_stats->tcp_flags = tcp_flags; + spin_lock_init(&new_stats->lock); + + rcu_assign_pointer(flow->stats[node], + new_stats); + goto unlock; + } + } + flow->stats_last_writer = node; + } } - spin_lock(&stats->lock); stats->used = jiffies; stats->packet_count++; stats->byte_count += skb->len; stats->tcp_flags |= tcp_flags; +unlock: spin_unlock(&stats->lock); } -static void stats_read(struct flow_stats *stats, - struct ovs_flow_stats *ovs_stats, - unsigned long *used, __be16 *tcp_flags) -{ - spin_lock(&stats->lock); - if (!*used || time_after(stats->used, *used)) - *used = stats->used; - *tcp_flags |= stats->tcp_flags; - ovs_stats->n_packets += stats->packet_count; - ovs_stats->n_bytes += stats->byte_count; - spin_unlock(&stats->lock); -} - -void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, +/* Must be called with rcu_read_lock or ovs_mutex. */ +void ovs_flow_stats_get(const struct sw_flow *flow, + struct ovs_flow_stats *ovs_stats, unsigned long *used, __be16 *tcp_flags) { - int cpu; + int node; *used = 0; *tcp_flags = 0; memset(ovs_stats, 0, sizeof(*ovs_stats)); - local_bh_disable(); - if (!flow->stats.is_percpu) { - stats_read(flow->stats.stat, ovs_stats, used, tcp_flags); - } else { - for_each_possible_cpu(cpu) { - struct flow_stats *stats; + for_each_node(node) { + struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[node]); - stats = per_cpu_ptr(flow->stats.cpu_stats, cpu); - stats_read(stats, ovs_stats, used, tcp_flags); + if (stats) { + /* Local CPU may write on non-local stats, so we must + * block bottom-halves here. + */ + spin_lock_bh(&stats->lock); + if (!*used || time_after(stats->used, *used)) + *used = stats->used; + *tcp_flags |= stats->tcp_flags; + ovs_stats->n_packets += stats->packet_count; + ovs_stats->n_bytes += stats->byte_count; + spin_unlock_bh(&stats->lock); } } - local_bh_enable(); -} - -static void stats_reset(struct flow_stats *stats) -{ - spin_lock(&stats->lock); - stats->used = 0; - stats->packet_count = 0; - stats->byte_count = 0; - stats->tcp_flags = 0; - spin_unlock(&stats->lock); } +/* Called with ovs_mutex. */ void ovs_flow_stats_clear(struct sw_flow *flow) { - int cpu; - - local_bh_disable(); - if (!flow->stats.is_percpu) { - stats_reset(flow->stats.stat); - } else { - for_each_possible_cpu(cpu) { - stats_reset(per_cpu_ptr(flow->stats.cpu_stats, cpu)); + int node; + + for_each_node(node) { + struct flow_stats *stats = ovsl_dereference(flow->stats[node]); + + if (stats) { + spin_lock_bh(&stats->lock); + stats->used = 0; + stats->packet_count = 0; + stats->byte_count = 0; + stats->tcp_flags = 0; + spin_unlock_bh(&stats->lock); } } - local_bh_enable(); } static int check_header(struct sk_buff *skb, int len) @@ -332,8 +354,8 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, /* The ICMPv6 type and code fields use the 16-bit transport port * fields, so we need to store them in 16-bit network byte order. */ - key->ipv6.tp.src = htons(icmp->icmp6_type); - key->ipv6.tp.dst = htons(icmp->icmp6_code); + key->tp.src = htons(icmp->icmp6_type); + key->tp.dst = htons(icmp->icmp6_code); if (icmp->icmp6_code == 0 && (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION || @@ -372,14 +394,14 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, && opt_len == 8) { if (unlikely(!is_zero_ether_addr(key->ipv6.nd.sll))) goto invalid; - memcpy(key->ipv6.nd.sll, - &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN); + ether_addr_copy(key->ipv6.nd.sll, + &nd->opt[offset+sizeof(*nd_opt)]); } else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR && opt_len == 8) { if (unlikely(!is_zero_ether_addr(key->ipv6.nd.tll))) goto invalid; - memcpy(key->ipv6.nd.tll, - &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN); + ether_addr_copy(key->ipv6.nd.tll, + &nd->opt[offset+sizeof(*nd_opt)]); } icmp_len -= opt_len; @@ -439,8 +461,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) * header in the linear data area. */ eth = eth_hdr(skb); - memcpy(key->eth.src, eth->h_source, ETH_ALEN); - memcpy(key->eth.dst, eth->h_dest, ETH_ALEN); + ether_addr_copy(key->eth.src, eth->h_source); + ether_addr_copy(key->eth.dst, eth->h_dest); __skb_pull(skb, 2 * ETH_ALEN); /* We are going to push all headers that we pull, so no need to @@ -495,21 +517,21 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) if (key->ip.proto == IPPROTO_TCP) { if (tcphdr_ok(skb)) { struct tcphdr *tcp = tcp_hdr(skb); - key->ipv4.tp.src = tcp->source; - key->ipv4.tp.dst = tcp->dest; - key->ipv4.tp.flags = TCP_FLAGS_BE16(tcp); + key->tp.src = tcp->source; + key->tp.dst = tcp->dest; + key->tp.flags = TCP_FLAGS_BE16(tcp); } } else if (key->ip.proto == IPPROTO_UDP) { if (udphdr_ok(skb)) { struct udphdr *udp = udp_hdr(skb); - key->ipv4.tp.src = udp->source; - key->ipv4.tp.dst = udp->dest; + key->tp.src = udp->source; + key->tp.dst = udp->dest; } } else if (key->ip.proto == IPPROTO_SCTP) { if (sctphdr_ok(skb)) { struct sctphdr *sctp = sctp_hdr(skb); - key->ipv4.tp.src = sctp->source; - key->ipv4.tp.dst = sctp->dest; + key->tp.src = sctp->source; + key->tp.dst = sctp->dest; } } else if (key->ip.proto == IPPROTO_ICMP) { if (icmphdr_ok(skb)) { @@ -517,8 +539,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) /* The ICMP type and code fields use the 16-bit * transport port fields, so we need to store * them in 16-bit network byte order. */ - key->ipv4.tp.src = htons(icmp->type); - key->ipv4.tp.dst = htons(icmp->code); + key->tp.src = htons(icmp->type); + key->tp.dst = htons(icmp->code); } } @@ -538,8 +560,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) key->ip.proto = ntohs(arp->ar_op); memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src)); memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst)); - memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN); - memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN); + ether_addr_copy(key->ipv4.arp.sha, arp->ar_sha); + ether_addr_copy(key->ipv4.arp.tha, arp->ar_tha); } } else if (key->eth.type == htons(ETH_P_IPV6)) { int nh_len; /* IPv6 Header + Extensions */ @@ -564,21 +586,21 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) if (key->ip.proto == NEXTHDR_TCP) { if (tcphdr_ok(skb)) { struct tcphdr *tcp = tcp_hdr(skb); - key->ipv6.tp.src = tcp->source; - key->ipv6.tp.dst = tcp->dest; - key->ipv6.tp.flags = TCP_FLAGS_BE16(tcp); + key->tp.src = tcp->source; + key->tp.dst = tcp->dest; + key->tp.flags = TCP_FLAGS_BE16(tcp); } } else if (key->ip.proto == NEXTHDR_UDP) { if (udphdr_ok(skb)) { struct udphdr *udp = udp_hdr(skb); - key->ipv6.tp.src = udp->source; - key->ipv6.tp.dst = udp->dest; + key->tp.src = udp->source; + key->tp.dst = udp->dest; } } else if (key->ip.proto == NEXTHDR_SCTP) { if (sctphdr_ok(skb)) { struct sctphdr *sctp = sctp_hdr(skb); - key->ipv6.tp.src = sctp->source; - key->ipv6.tp.dst = sctp->dest; + key->tp.src = sctp->source; + key->tp.dst = sctp->dest; } } else if (key->ip.proto == NEXTHDR_ICMP) { if (icmp6hdr_ok(skb)) { diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 2d770e28a3a3..5e5aaed3a85b 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2013 Nicira, Inc. + * Copyright (c) 2007-2014 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -47,7 +47,7 @@ struct ovs_key_ipv4_tunnel { __be16 tun_flags; u8 ipv4_tos; u8 ipv4_ttl; -}; +} __packed __aligned(4); /* Minimize padding. */ static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key, const struct iphdr *iph, __be64 tun_id, @@ -71,7 +71,7 @@ struct sw_flow_key { u32 priority; /* Packet QoS priority. */ u32 skb_mark; /* SKB mark. */ u16 in_port; /* Input switch port (or DP_MAX_PORTS). */ - } phy; + } __packed phy; /* Safe when right after 'tun_key'. */ struct { u8 src[ETH_ALEN]; /* Ethernet source address. */ u8 dst[ETH_ALEN]; /* Ethernet destination address. */ @@ -84,23 +84,21 @@ struct sw_flow_key { u8 ttl; /* IP TTL/hop limit. */ u8 frag; /* One of OVS_FRAG_TYPE_*. */ } ip; + struct { + __be16 src; /* TCP/UDP/SCTP source port. */ + __be16 dst; /* TCP/UDP/SCTP destination port. */ + __be16 flags; /* TCP flags. */ + } tp; union { struct { struct { __be32 src; /* IP source address. */ __be32 dst; /* IP destination address. */ } addr; - union { - struct { - __be16 src; /* TCP/UDP/SCTP source port. */ - __be16 dst; /* TCP/UDP/SCTP destination port. */ - __be16 flags; /* TCP flags. */ - } tp; - struct { - u8 sha[ETH_ALEN]; /* ARP source hardware address. */ - u8 tha[ETH_ALEN]; /* ARP target hardware address. */ - } arp; - }; + struct { + u8 sha[ETH_ALEN]; /* ARP source hardware address. */ + u8 tha[ETH_ALEN]; /* ARP target hardware address. */ + } arp; } ipv4; struct { struct { @@ -109,11 +107,6 @@ struct sw_flow_key { } addr; __be32 label; /* IPv6 flow label. */ struct { - __be16 src; /* TCP/UDP/SCTP source port. */ - __be16 dst; /* TCP/UDP/SCTP destination port. */ - __be16 flags; /* TCP flags. */ - } tp; - struct { struct in6_addr target; /* ND target address. */ u8 sll[ETH_ALEN]; /* ND source link layer address. */ u8 tll[ETH_ALEN]; /* ND target link layer address. */ @@ -155,24 +148,22 @@ struct flow_stats { __be16 tcp_flags; /* Union of seen TCP flags. */ }; -struct sw_flow_stats { - bool is_percpu; - union { - struct flow_stats *stat; - struct flow_stats __percpu *cpu_stats; - }; -}; - struct sw_flow { struct rcu_head rcu; struct hlist_node hash_node[2]; u32 hash; - + int stats_last_writer; /* NUMA-node id of the last writer on + * 'stats[0]'. + */ struct sw_flow_key key; struct sw_flow_key unmasked_key; struct sw_flow_mask *mask; struct sw_flow_actions __rcu *sf_acts; - struct sw_flow_stats stats; + struct flow_stats __rcu *stats[]; /* One for each NUMA node. First one + * is allocated at flow creation time, + * the rest are allocated on demand + * while holding the 'stats[0].lock'. + */ }; struct arp_eth_header { @@ -189,10 +180,11 @@ struct arp_eth_header { unsigned char ar_tip[4]; /* target IP address */ } __packed; -void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb); -void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *stats, +void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags, + struct sk_buff *); +void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *, unsigned long *used, __be16 *tcp_flags); -void ovs_flow_stats_clear(struct sw_flow *flow); +void ovs_flow_stats_clear(struct sw_flow *); u64 ovs_flow_used_time(unsigned long flow_jiffies); int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *); diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 4d000acaed0d..d757848da89c 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -16,6 +16,8 @@ * 02110-1301, USA */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include "flow.h" #include "datapath.h" #include <linux/uaccess.h> @@ -202,11 +204,11 @@ static bool match_validate(const struct sw_flow_match *match, if (match->mask && (match->mask->key.ip.proto == 0xff)) mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6; - if (match->key->ipv6.tp.src == + if (match->key->tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) || - match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { + match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { key_expected |= 1 << OVS_KEY_ATTR_ND; - if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff))) + if (match->mask && (match->mask->key.tp.src == htons(0xffff))) mask_allowed |= 1 << OVS_KEY_ATTR_ND; } } @@ -216,14 +218,14 @@ static bool match_validate(const struct sw_flow_match *match, if ((key_attrs & key_expected) != key_expected) { /* Key attributes check failed. */ OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n", - key_attrs, key_expected); + (unsigned long long)key_attrs, (unsigned long long)key_expected); return false; } if ((mask_attrs & mask_allowed) != mask_attrs) { /* Mask attributes check failed. */ OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n", - mask_attrs, mask_allowed); + (unsigned long long)mask_attrs, (unsigned long long)mask_allowed); return false; } @@ -266,20 +268,6 @@ static bool is_all_zero(const u8 *fp, size_t size) return true; } -static bool is_all_set(const u8 *fp, size_t size) -{ - int i; - - if (!fp) - return false; - - for (i = 0; i < size; i++) - if (fp[i] != 0xff) - return false; - - return true; -} - static int __parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[], u64 *attrsp, bool nz) @@ -501,9 +489,8 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, return 0; } -static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple, - u64 attrs, const struct nlattr **a, - bool is_mask) +static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, + const struct nlattr **a, bool is_mask) { int err; u64 orig_attrs = attrs; @@ -560,11 +547,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); } - if (is_mask && exact_5tuple) { - if (match->mask->key.eth.type != htons(0xffff)) - *exact_5tuple = false; - } - if (attrs & (1 << OVS_KEY_ATTR_IPV4)) { const struct ovs_key_ipv4 *ipv4_key; @@ -587,13 +569,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple SW_FLOW_KEY_PUT(match, ipv4.addr.dst, ipv4_key->ipv4_dst, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_IPV4); - - if (is_mask && exact_5tuple && *exact_5tuple) { - if (ipv4_key->ipv4_proto != 0xff || - ipv4_key->ipv4_src != htonl(0xffffffff) || - ipv4_key->ipv4_dst != htonl(0xffffffff)) - *exact_5tuple = false; - } } if (attrs & (1 << OVS_KEY_ATTR_IPV6)) { @@ -625,13 +600,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple is_mask); attrs &= ~(1 << OVS_KEY_ATTR_IPV6); - - if (is_mask && exact_5tuple && *exact_5tuple) { - if (ipv6_key->ipv6_proto != 0xff || - !is_all_set((u8 *)ipv6_key->ipv6_src, sizeof(match->key->ipv6.addr.src)) || - !is_all_set((u8 *)ipv6_key->ipv6_dst, sizeof(match->key->ipv6.addr.dst))) - *exact_5tuple = false; - } } if (attrs & (1 << OVS_KEY_ATTR_ARP)) { @@ -662,32 +630,18 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple const struct ovs_key_tcp *tcp_key; tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); - if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { - SW_FLOW_KEY_PUT(match, ipv4.tp.src, - tcp_key->tcp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, - tcp_key->tcp_dst, is_mask); - } else { - SW_FLOW_KEY_PUT(match, ipv6.tp.src, - tcp_key->tcp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, - tcp_key->tcp_dst, is_mask); - } + SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask); + SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_TCP); - - if (is_mask && exact_5tuple && *exact_5tuple && - (tcp_key->tcp_src != htons(0xffff) || - tcp_key->tcp_dst != htons(0xffff))) - *exact_5tuple = false; } if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) { if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { - SW_FLOW_KEY_PUT(match, ipv4.tp.flags, + SW_FLOW_KEY_PUT(match, tp.flags, nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), is_mask); } else { - SW_FLOW_KEY_PUT(match, ipv6.tp.flags, + SW_FLOW_KEY_PUT(match, tp.flags, nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), is_mask); } @@ -698,40 +652,17 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple const struct ovs_key_udp *udp_key; udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); - if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { - SW_FLOW_KEY_PUT(match, ipv4.tp.src, - udp_key->udp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, - udp_key->udp_dst, is_mask); - } else { - SW_FLOW_KEY_PUT(match, ipv6.tp.src, - udp_key->udp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, - udp_key->udp_dst, is_mask); - } + SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask); + SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_UDP); - - if (is_mask && exact_5tuple && *exact_5tuple && - (udp_key->udp_src != htons(0xffff) || - udp_key->udp_dst != htons(0xffff))) - *exact_5tuple = false; } if (attrs & (1 << OVS_KEY_ATTR_SCTP)) { const struct ovs_key_sctp *sctp_key; sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]); - if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { - SW_FLOW_KEY_PUT(match, ipv4.tp.src, - sctp_key->sctp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, - sctp_key->sctp_dst, is_mask); - } else { - SW_FLOW_KEY_PUT(match, ipv6.tp.src, - sctp_key->sctp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, - sctp_key->sctp_dst, is_mask); - } + SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask); + SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_SCTP); } @@ -739,9 +670,9 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple const struct ovs_key_icmp *icmp_key; icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]); - SW_FLOW_KEY_PUT(match, ipv4.tp.src, + SW_FLOW_KEY_PUT(match, tp.src, htons(icmp_key->icmp_type), is_mask); - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, + SW_FLOW_KEY_PUT(match, tp.dst, htons(icmp_key->icmp_code), is_mask); attrs &= ~(1 << OVS_KEY_ATTR_ICMP); } @@ -750,9 +681,9 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple const struct ovs_key_icmpv6 *icmpv6_key; icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]); - SW_FLOW_KEY_PUT(match, ipv6.tp.src, + SW_FLOW_KEY_PUT(match, tp.src, htons(icmpv6_key->icmpv6_type), is_mask); - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, + SW_FLOW_KEY_PUT(match, tp.dst, htons(icmpv6_key->icmpv6_code), is_mask); attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6); } @@ -800,7 +731,6 @@ static void sw_flow_mask_set(struct sw_flow_mask *mask, * attribute specifies the mask field of the wildcarded flow. */ int ovs_nla_get_match(struct sw_flow_match *match, - bool *exact_5tuple, const struct nlattr *key, const struct nlattr *mask) { @@ -848,13 +778,10 @@ int ovs_nla_get_match(struct sw_flow_match *match, } } - err = ovs_key_from_nlattrs(match, NULL, key_attrs, a, false); + err = ovs_key_from_nlattrs(match, key_attrs, a, false); if (err) return err; - if (exact_5tuple) - *exact_5tuple = true; - if (mask) { err = parse_flow_mask_nlattrs(mask, a, &mask_attrs); if (err) @@ -892,7 +819,7 @@ int ovs_nla_get_match(struct sw_flow_match *match, } } - err = ovs_key_from_nlattrs(match, exact_5tuple, mask_attrs, a, true); + err = ovs_key_from_nlattrs(match, mask_attrs, a, true); if (err) return err; } else { @@ -982,8 +909,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, goto nla_put_failure; eth_key = nla_data(nla); - memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN); - memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN); + ether_addr_copy(eth_key->eth_src, output->eth.src); + ether_addr_copy(eth_key->eth_dst, output->eth.dst); if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) { __be16 eth_type; @@ -1055,8 +982,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, arp_key->arp_sip = output->ipv4.addr.src; arp_key->arp_tip = output->ipv4.addr.dst; arp_key->arp_op = htons(output->ip.proto); - memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN); - memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN); + ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); + ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); } if ((swkey->eth.type == htons(ETH_P_IP) || @@ -1070,19 +997,11 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, if (!nla) goto nla_put_failure; tcp_key = nla_data(nla); - if (swkey->eth.type == htons(ETH_P_IP)) { - tcp_key->tcp_src = output->ipv4.tp.src; - tcp_key->tcp_dst = output->ipv4.tp.dst; - if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, - output->ipv4.tp.flags)) - goto nla_put_failure; - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { - tcp_key->tcp_src = output->ipv6.tp.src; - tcp_key->tcp_dst = output->ipv6.tp.dst; - if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, - output->ipv6.tp.flags)) - goto nla_put_failure; - } + tcp_key->tcp_src = output->tp.src; + tcp_key->tcp_dst = output->tp.dst; + if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, + output->tp.flags)) + goto nla_put_failure; } else if (swkey->ip.proto == IPPROTO_UDP) { struct ovs_key_udp *udp_key; @@ -1090,13 +1009,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, if (!nla) goto nla_put_failure; udp_key = nla_data(nla); - if (swkey->eth.type == htons(ETH_P_IP)) { - udp_key->udp_src = output->ipv4.tp.src; - udp_key->udp_dst = output->ipv4.tp.dst; - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { - udp_key->udp_src = output->ipv6.tp.src; - udp_key->udp_dst = output->ipv6.tp.dst; - } + udp_key->udp_src = output->tp.src; + udp_key->udp_dst = output->tp.dst; } else if (swkey->ip.proto == IPPROTO_SCTP) { struct ovs_key_sctp *sctp_key; @@ -1104,13 +1018,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, if (!nla) goto nla_put_failure; sctp_key = nla_data(nla); - if (swkey->eth.type == htons(ETH_P_IP)) { - sctp_key->sctp_src = swkey->ipv4.tp.src; - sctp_key->sctp_dst = swkey->ipv4.tp.dst; - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { - sctp_key->sctp_src = swkey->ipv6.tp.src; - sctp_key->sctp_dst = swkey->ipv6.tp.dst; - } + sctp_key->sctp_src = output->tp.src; + sctp_key->sctp_dst = output->tp.dst; } else if (swkey->eth.type == htons(ETH_P_IP) && swkey->ip.proto == IPPROTO_ICMP) { struct ovs_key_icmp *icmp_key; @@ -1119,8 +1028,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, if (!nla) goto nla_put_failure; icmp_key = nla_data(nla); - icmp_key->icmp_type = ntohs(output->ipv4.tp.src); - icmp_key->icmp_code = ntohs(output->ipv4.tp.dst); + icmp_key->icmp_type = ntohs(output->tp.src); + icmp_key->icmp_code = ntohs(output->tp.dst); } else if (swkey->eth.type == htons(ETH_P_IPV6) && swkey->ip.proto == IPPROTO_ICMPV6) { struct ovs_key_icmpv6 *icmpv6_key; @@ -1130,8 +1039,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, if (!nla) goto nla_put_failure; icmpv6_key = nla_data(nla); - icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src); - icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst); + icmpv6_key->icmpv6_type = ntohs(output->tp.src); + icmpv6_key->icmpv6_code = ntohs(output->tp.dst); if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { @@ -1143,8 +1052,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, nd_key = nla_data(nla); memcpy(nd_key->nd_target, &output->ipv6.nd.target, sizeof(nd_key->nd_target)); - memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN); - memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN); + ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll); + ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll); } } } @@ -1309,13 +1218,10 @@ static int validate_and_copy_sample(const struct nlattr *attr, static int validate_tp_port(const struct sw_flow_key *flow_key) { - if (flow_key->eth.type == htons(ETH_P_IP)) { - if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst) - return 0; - } else if (flow_key->eth.type == htons(ETH_P_IPV6)) { - if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst) - return 0; - } + if ((flow_key->eth.type == htons(ETH_P_IP) || + flow_key->eth.type == htons(ETH_P_IPV6)) && + (flow_key->tp.src || flow_key->tp.dst)) + return 0; return -EINVAL; } diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index b31fbe28bc7a..440151045d39 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -45,7 +45,6 @@ int ovs_nla_put_flow(const struct sw_flow_key *, int ovs_nla_get_flow_metadata(struct sw_flow *flow, const struct nlattr *attr); int ovs_nla_get_match(struct sw_flow_match *match, - bool *exact_5tuple, const struct nlattr *, const struct nlattr *); diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 3c268b3d71c3..cf2d853646f0 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -48,6 +48,7 @@ #define REHASH_INTERVAL (10 * 60 * HZ) static struct kmem_cache *flow_cache; +struct kmem_cache *flow_stats_cache __read_mostly; static u16 range_n_bytes(const struct sw_flow_key_range *range) { @@ -57,8 +58,10 @@ static u16 range_n_bytes(const struct sw_flow_key_range *range) void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, const struct sw_flow_mask *mask) { - const long *m = (long *)((u8 *)&mask->key + mask->range.start); - const long *s = (long *)((u8 *)src + mask->range.start); + const long *m = (const long *)((const u8 *)&mask->key + + mask->range.start); + const long *s = (const long *)((const u8 *)src + + mask->range.start); long *d = (long *)((u8 *)dst + mask->range.start); int i; @@ -70,10 +73,11 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, *d++ = *s++ & *m++; } -struct sw_flow *ovs_flow_alloc(bool percpu_stats) +struct sw_flow *ovs_flow_alloc(void) { struct sw_flow *flow; - int cpu; + struct flow_stats *stats; + int node; flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); if (!flow) @@ -81,27 +85,22 @@ struct sw_flow *ovs_flow_alloc(bool percpu_stats) flow->sf_acts = NULL; flow->mask = NULL; + flow->stats_last_writer = NUMA_NO_NODE; - flow->stats.is_percpu = percpu_stats; + /* Initialize the default stat node. */ + stats = kmem_cache_alloc_node(flow_stats_cache, + GFP_KERNEL | __GFP_ZERO, 0); + if (!stats) + goto err; - if (!percpu_stats) { - flow->stats.stat = kzalloc(sizeof(*flow->stats.stat), GFP_KERNEL); - if (!flow->stats.stat) - goto err; + spin_lock_init(&stats->lock); - spin_lock_init(&flow->stats.stat->lock); - } else { - flow->stats.cpu_stats = alloc_percpu(struct flow_stats); - if (!flow->stats.cpu_stats) - goto err; + RCU_INIT_POINTER(flow->stats[0], stats); - for_each_possible_cpu(cpu) { - struct flow_stats *cpu_stats; + for_each_node(node) + if (node != 0) + RCU_INIT_POINTER(flow->stats[node], NULL); - cpu_stats = per_cpu_ptr(flow->stats.cpu_stats, cpu); - spin_lock_init(&cpu_stats->lock); - } - } return flow; err: kmem_cache_free(flow_cache, flow); @@ -138,11 +137,13 @@ static struct flex_array *alloc_buckets(unsigned int n_buckets) static void flow_free(struct sw_flow *flow) { - kfree((struct sf_flow_acts __force *)flow->sf_acts); - if (flow->stats.is_percpu) - free_percpu(flow->stats.cpu_stats); - else - kfree(flow->stats.stat); + int node; + + kfree((struct sw_flow_actions __force *)flow->sf_acts); + for_each_node(node) + if (flow->stats[node]) + kmem_cache_free(flow_stats_cache, + (struct flow_stats __force *)flow->stats[node]); kmem_cache_free(flow_cache, flow); } @@ -158,25 +159,6 @@ void ovs_flow_free(struct sw_flow *flow, bool deferred) if (!flow) return; - if (flow->mask) { - struct sw_flow_mask *mask = flow->mask; - - /* ovs-lock is required to protect mask-refcount and - * mask list. - */ - ASSERT_OVSL(); - BUG_ON(!mask->ref_count); - mask->ref_count--; - - if (!mask->ref_count) { - list_del_rcu(&mask->list); - if (deferred) - kfree_rcu(mask, rcu); - else - kfree(mask); - } - } - if (deferred) call_rcu(&flow->rcu, rcu_free_flow_callback); else @@ -375,7 +357,7 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table) static u32 flow_hash(const struct sw_flow_key *key, int key_start, int key_end) { - u32 *hash_key = (u32 *)((u8 *)key + key_start); + const u32 *hash_key = (const u32 *)((const u8 *)key + key_start); int hash_u32s = (key_end - key_start) >> 2; /* Make sure number of hash bytes are multiple of u32. */ @@ -397,8 +379,8 @@ static bool cmp_key(const struct sw_flow_key *key1, const struct sw_flow_key *key2, int key_start, int key_end) { - const long *cp1 = (long *)((u8 *)key1 + key_start); - const long *cp2 = (long *)((u8 *)key2 + key_start); + const long *cp1 = (const long *)((const u8 *)key1 + key_start); + const long *cp2 = (const long *)((const u8 *)key2 + key_start); long diffs = 0; int i; @@ -474,6 +456,22 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, return ovs_flow_tbl_lookup_stats(tbl, key, &n_mask_hit); } +struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, + struct sw_flow_match *match) +{ + struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); + struct sw_flow_mask *mask; + struct sw_flow *flow; + + /* Always called under ovs-mutex. */ + list_for_each_entry(mask, &tbl->mask_list, list) { + flow = masked_flow_lookup(ti, match->key, mask); + if (flow && ovs_flow_cmp_unmasked_key(flow, match)) /* Found */ + return flow; + } + return NULL; +} + int ovs_flow_tbl_num_masks(const struct flow_table *table) { struct sw_flow_mask *mask; @@ -490,6 +488,25 @@ static struct table_instance *table_instance_expand(struct table_instance *ti) return table_instance_rehash(ti, ti->n_buckets * 2); } +/* Remove 'mask' from the mask list, if it is not needed any more. */ +static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) +{ + if (mask) { + /* ovs-lock is required to protect mask-refcount and + * mask list. + */ + ASSERT_OVSL(); + BUG_ON(!mask->ref_count); + mask->ref_count--; + + if (!mask->ref_count) { + list_del_rcu(&mask->list); + kfree_rcu(mask, rcu); + } + } +} + +/* Must be called with OVS mutex held. */ void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) { struct table_instance *ti = ovsl_dereference(table->ti); @@ -497,6 +514,11 @@ void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) BUG_ON(table->count == 0); hlist_del_rcu(&flow->hash_node[ti->node_ver]); table->count--; + + /* RCU delete the mask. 'flow->mask' is not NULLed, as it should be + * accessible as long as the RCU read lock is held. + */ + flow_mask_remove(table, flow->mask); } static struct sw_flow_mask *mask_alloc(void) @@ -513,8 +535,8 @@ static struct sw_flow_mask *mask_alloc(void) static bool mask_equal(const struct sw_flow_mask *a, const struct sw_flow_mask *b) { - u8 *a_ = (u8 *)&a->key + a->range.start; - u8 *b_ = (u8 *)&b->key + b->range.start; + const u8 *a_ = (const u8 *)&a->key + a->range.start; + const u8 *b_ = (const u8 *)&b->key + b->range.start; return (a->range.end == b->range.end) && (a->range.start == b->range.start) @@ -559,6 +581,7 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, return 0; } +/* Must be called with OVS mutex held. */ int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, struct sw_flow_mask *mask) { @@ -597,16 +620,28 @@ int ovs_flow_init(void) BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long)); BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); - flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, - 0, NULL); + flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow) + + (num_possible_nodes() + * sizeof(struct flow_stats *)), + 0, 0, NULL); if (flow_cache == NULL) return -ENOMEM; + flow_stats_cache + = kmem_cache_create("sw_flow_stats", sizeof(struct flow_stats), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (flow_stats_cache == NULL) { + kmem_cache_destroy(flow_cache); + flow_cache = NULL; + return -ENOMEM; + } + return 0; } /* Uninitializes the flow module. */ void ovs_flow_exit(void) { + kmem_cache_destroy(flow_stats_cache); kmem_cache_destroy(flow_cache); } diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index baaeb101924d..5918bff7f3f6 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -52,10 +52,12 @@ struct flow_table { unsigned int count; }; +extern struct kmem_cache *flow_stats_cache; + int ovs_flow_init(void); void ovs_flow_exit(void); -struct sw_flow *ovs_flow_alloc(bool percpu_stats); +struct sw_flow *ovs_flow_alloc(void); void ovs_flow_free(struct sw_flow *, bool deferred); int ovs_flow_tbl_init(struct flow_table *); @@ -74,7 +76,8 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *, u32 *n_mask_hit); struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, const struct sw_flow_key *); - +struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, + struct sw_flow_match *match); bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, struct sw_flow_match *match); diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index ebb6e2442554..f49148a07da2 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -110,6 +110,22 @@ static int gre_rcv(struct sk_buff *skb, return PACKET_RCVD; } +/* Called with rcu_read_lock and BH disabled. */ +static int gre_err(struct sk_buff *skb, u32 info, + const struct tnl_ptk_info *tpi) +{ + struct ovs_net *ovs_net; + struct vport *vport; + + ovs_net = net_generic(dev_net(skb->dev), ovs_net_id); + vport = rcu_dereference(ovs_net->vport_net.gre_vport); + + if (unlikely(!vport)) + return PACKET_REJECT; + else + return PACKET_RCVD; +} + static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) { struct net *net = ovs_dp_get_net(vport->dp); @@ -172,7 +188,7 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; - skb->local_df = 1; + skb->ignore_df = 1; return iptunnel_xmit(skb->sk, rt, skb, fl.saddr, OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE, @@ -186,6 +202,7 @@ error: static struct gre_cisco_protocol gre_protocol = { .handler = gre_rcv, + .err_handler = gre_err, .priority = 1, }; @@ -256,7 +273,7 @@ static void gre_tnl_destroy(struct vport *vport) ovs_net = net_generic(net, ovs_net_id); - rcu_assign_pointer(ovs_net->vport_net.gre_vport, NULL); + RCU_INIT_POINTER(ovs_net->vport_net.gre_vport, NULL); ovs_vport_deferred_free(vport); gre_exit(); } diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 729c68763fe7..84516126e5f3 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -26,6 +26,7 @@ #include <net/dst.h> #include <net/xfrm.h> +#include <net/rtnetlink.h> #include "datapath.h" #include "vport-internal_dev.h" @@ -121,6 +122,10 @@ static const struct net_device_ops internal_dev_netdev_ops = { .ndo_get_stats64 = internal_dev_get_stats, }; +static struct rtnl_link_ops internal_dev_link_ops __read_mostly = { + .kind = "openvswitch", +}; + static void do_setup(struct net_device *netdev) { ether_setup(netdev); @@ -130,15 +135,19 @@ static void do_setup(struct net_device *netdev) netdev->priv_flags &= ~IFF_TX_SKB_SHARING; netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE; netdev->destructor = internal_dev_destructor; - SET_ETHTOOL_OPS(netdev, &internal_dev_ethtool_ops); + netdev->ethtool_ops = &internal_dev_ethtool_ops; + netdev->rtnl_link_ops = &internal_dev_link_ops; netdev->tx_queue_len = 0; netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST | - NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE; + NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | + NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL; netdev->vlan_features = netdev->features; + netdev->hw_enc_features = netdev->features; netdev->features |= NETIF_F_HW_VLAN_CTAG_TX; netdev->hw_features = netdev->features & ~NETIF_F_LLTX; + eth_hw_addr_random(netdev); } @@ -159,7 +168,8 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) netdev_vport = netdev_vport_priv(vport); netdev_vport->dev = alloc_netdev(sizeof(struct internal_dev), - parms->name, do_setup); + parms->name, NET_NAME_UNKNOWN, + do_setup); if (!netdev_vport->dev) { err = -ENOMEM; goto error_free_vport; @@ -248,3 +258,13 @@ struct vport *ovs_internal_dev_get_vport(struct net_device *netdev) return internal_dev_priv(netdev)->vport; } + +int ovs_internal_dev_rtnl_link_register(void) +{ + return rtnl_link_register(&internal_dev_link_ops); +} + +void ovs_internal_dev_rtnl_link_unregister(void) +{ + rtnl_link_unregister(&internal_dev_link_ops); +} diff --git a/net/openvswitch/vport-internal_dev.h b/net/openvswitch/vport-internal_dev.h index 9a7d30ecc6a2..1b179a190cff 100644 --- a/net/openvswitch/vport-internal_dev.h +++ b/net/openvswitch/vport-internal_dev.h @@ -24,5 +24,7 @@ int ovs_is_internal_dev(const struct net_device *); struct vport *ovs_internal_dev_get_vport(struct net_device *); +int ovs_internal_dev_rtnl_link_register(void); +void ovs_internal_dev_rtnl_link_unregister(void); #endif /* vport-internal_dev.h */ diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index e797a50ac2be..d8b7e247bebf 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -122,7 +122,7 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms) vxlan_port = vxlan_vport(vport); strncpy(vxlan_port->name, parms->name, IFNAMSIZ); - vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, false); + vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, 0); if (IS_ERR(vs)) { ovs_vport_free(vport); return (void *)vs; @@ -143,8 +143,6 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) struct rtable *rt; struct flowi4 fl; __be16 src_port; - int port_min; - int port_max; __be16 df; int err; @@ -170,17 +168,17 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; - skb->local_df = 1; + skb->ignore_df = 1; - inet_get_local_port_range(net, &port_min, &port_max); - src_port = vxlan_src_port(port_min, port_max, skb); + src_port = udp_flow_src_port(net, skb, 0, 0, true); err = vxlan_xmit_skb(vxlan_port->vs, rt, skb, fl.saddr, OVS_CB(skb)->tun_key->ipv4_dst, OVS_CB(skb)->tun_key->ipv4_tos, OVS_CB(skb)->tun_key->ipv4_ttl, df, src_port, dst_port, - htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8)); + htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8), + false); if (err < 0) ip_rt_put(rt); error: diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 42c0f4a0b78c..702fb21bfe15 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -134,10 +134,12 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, vport->dp = parms->dp; vport->port_no = parms->port_no; - vport->upcall_portid = parms->upcall_portid; vport->ops = ops; INIT_HLIST_NODE(&vport->dp_hash_node); + if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids)) + return ERR_PTR(-EINVAL); + vport->percpu_stats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!vport->percpu_stats) { kfree(vport); @@ -161,6 +163,10 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, */ void ovs_vport_free(struct vport *vport) { + /* vport is freed from RCU callback or error path, Therefore + * it is safe to use raw dereference. + */ + kfree(rcu_dereference_raw(vport->upcall_portids)); free_percpu(vport->percpu_stats); kfree(vport); } @@ -327,6 +333,99 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb) } /** + * ovs_vport_set_upcall_portids - set upcall portids of @vport. + * + * @vport: vport to modify. + * @ids: new configuration, an array of port ids. + * + * Sets the vport's upcall_portids to @ids. + * + * Returns 0 if successful, -EINVAL if @ids is zero length or cannot be parsed + * as an array of U32. + * + * Must be called with ovs_mutex. + */ +int ovs_vport_set_upcall_portids(struct vport *vport, struct nlattr *ids) +{ + struct vport_portids *old, *vport_portids; + + if (!nla_len(ids) || nla_len(ids) % sizeof(u32)) + return -EINVAL; + + old = ovsl_dereference(vport->upcall_portids); + + vport_portids = kmalloc(sizeof(*vport_portids) + nla_len(ids), + GFP_KERNEL); + if (!vport_portids) + return -ENOMEM; + + vport_portids->n_ids = nla_len(ids) / sizeof(u32); + vport_portids->rn_ids = reciprocal_value(vport_portids->n_ids); + nla_memcpy(vport_portids->ids, ids, nla_len(ids)); + + rcu_assign_pointer(vport->upcall_portids, vport_portids); + + if (old) + kfree_rcu(old, rcu); + return 0; +} + +/** + * ovs_vport_get_upcall_portids - get the upcall_portids of @vport. + * + * @vport: vport from which to retrieve the portids. + * @skb: sk_buff where portids should be appended. + * + * Retrieves the configuration of the given vport, appending the + * %OVS_VPORT_ATTR_UPCALL_PID attribute which is the array of upcall + * portids to @skb. + * + * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room. + * If an error occurs, @skb is left unmodified. Must be called with + * ovs_mutex or rcu_read_lock. + */ +int ovs_vport_get_upcall_portids(const struct vport *vport, + struct sk_buff *skb) +{ + struct vport_portids *ids; + + ids = rcu_dereference_ovsl(vport->upcall_portids); + + if (vport->dp->user_features & OVS_DP_F_VPORT_PIDS) + return nla_put(skb, OVS_VPORT_ATTR_UPCALL_PID, + ids->n_ids * sizeof(u32), (void *)ids->ids); + else + return nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, ids->ids[0]); +} + +/** + * ovs_vport_find_upcall_portid - find the upcall portid to send upcall. + * + * @vport: vport from which the missed packet is received. + * @skb: skb that the missed packet was received. + * + * Uses the skb_get_hash() to select the upcall portid to send the + * upcall. + * + * Returns the portid of the target socket. Must be called with rcu_read_lock. + */ +u32 ovs_vport_find_upcall_portid(const struct vport *p, struct sk_buff *skb) +{ + struct vport_portids *ids; + u32 ids_index; + u32 hash; + + ids = rcu_dereference(p->upcall_portids); + + if (ids->n_ids == 1 && ids->ids[0] == 0) + return 0; + + hash = skb_get_hash(skb); + ids_index = hash - ids->n_ids * reciprocal_divide(hash, ids->rn_ids); + return ids->ids[ids_index]; +} + +/** * ovs_vport_receive - pass up received packet to the datapath for processing * * @vport: vport that received the packet diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index d7e50a17396c..35f89d84b45e 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -23,6 +23,7 @@ #include <linux/list.h> #include <linux/netlink.h> #include <linux/openvswitch.h> +#include <linux/reciprocal_div.h> #include <linux/skbuff.h> #include <linux/spinlock.h> #include <linux/u64_stats_sync.h> @@ -52,6 +53,10 @@ void ovs_vport_get_stats(struct vport *, struct ovs_vport_stats *); int ovs_vport_set_options(struct vport *, struct nlattr *options); int ovs_vport_get_options(const struct vport *, struct sk_buff *); +int ovs_vport_set_upcall_portids(struct vport *, struct nlattr *pids); +int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *); +u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *); + int ovs_vport_send(struct vport *, struct sk_buff *); /* The following definitions are for implementers of vport devices: */ @@ -62,13 +67,27 @@ struct vport_err_stats { u64 tx_dropped; u64 tx_errors; }; +/** + * struct vport_portids - array of netlink portids of a vport. + * must be protected by rcu. + * @rn_ids: The reciprocal value of @n_ids. + * @rcu: RCU callback head for deferred destruction. + * @n_ids: Size of @ids array. + * @ids: Array storing the Netlink socket pids to be used for packets received + * on this port that miss the flow table. + */ +struct vport_portids { + struct reciprocal_value rn_ids; + struct rcu_head rcu; + u32 n_ids; + u32 ids[]; +}; /** * struct vport - one port within a datapath * @rcu: RCU callback head for deferred destruction. * @dp: Datapath to which this port belongs. - * @upcall_portid: The Netlink port to use for packets received on this port that - * miss the flow table. + * @upcall_portids: RCU protected 'struct vport_portids'. * @port_no: Index into @dp's @ports array. * @hash_node: Element in @dev_table hash table in vport.c. * @dp_hash_node: Element in @datapath->ports hash table in datapath.c. @@ -80,7 +99,7 @@ struct vport_err_stats { struct vport { struct rcu_head rcu; struct datapath *dp; - u32 upcall_portid; + struct vport_portids __rcu *upcall_portids; u16 port_no; struct hlist_node hash_node; @@ -111,7 +130,7 @@ struct vport_parms { /* For ovs_vport_alloc(). */ struct datapath *dp; u16 port_no; - u32 upcall_portid; + struct nlattr *upcall_portids; }; /** @@ -172,7 +191,7 @@ void ovs_vport_deferred_free(struct vport *vport); */ static inline void *vport_priv(const struct vport *vport) { - return (u8 *)vport + ALIGN(sizeof(struct vport), VPORT_ALIGN); + return (u8 *)(uintptr_t)vport + ALIGN(sizeof(struct vport), VPORT_ALIGN); } /** @@ -185,9 +204,9 @@ static inline void *vport_priv(const struct vport *vport) * the result of a hash table lookup. @priv must point to the start of the * private data area. */ -static inline struct vport *vport_from_priv(const void *priv) +static inline struct vport *vport_from_priv(void *priv) { - return (struct vport *)(priv - ALIGN(sizeof(struct vport), VPORT_ALIGN)); + return (struct vport *)((u8 *)priv - ALIGN(sizeof(struct vport), VPORT_ALIGN)); } void ovs_vport_receive(struct vport *, struct sk_buff *, diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index b85c67ccb797..8d9f8042705a 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -441,14 +441,10 @@ static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts, { struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); - if (shhwtstamps) { - if ((flags & SOF_TIMESTAMPING_SYS_HARDWARE) && - ktime_to_timespec_cond(shhwtstamps->syststamp, ts)) - return TP_STATUS_TS_SYS_HARDWARE; - if ((flags & SOF_TIMESTAMPING_RAW_HARDWARE) && - ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts)) - return TP_STATUS_TS_RAW_HARDWARE; - } + if (shhwtstamps && + (flags & SOF_TIMESTAMPING_RAW_HARDWARE) && + ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts)) + return TP_STATUS_TS_RAW_HARDWARE; if (ktime_to_timespec_cond(skb->tstamp, ts)) return TP_STATUS_TS_SOFTWARE; @@ -3071,10 +3067,8 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i, break; case PACKET_MR_PROMISC: return dev_set_promiscuity(dev, what); - break; case PACKET_MR_ALLMULTI: return dev_set_allmulti(dev, what); - break; case PACKET_MR_UNICAST: if (i->alen != dev->addr_len) return -EINVAL; diff --git a/net/packet/diag.c b/net/packet/diag.c index 533ce4ff108a..92f2c7107eec 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -128,6 +128,7 @@ static int pdiag_put_fanout(struct packet_sock *po, struct sk_buff *nlskb) static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req, + bool may_report_filterinfo, struct user_namespace *user_ns, u32 portid, u32 seq, u32 flags, int sk_ino) { @@ -172,7 +173,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, goto out_nlmsg_trim; if ((req->pdiag_show & PACKET_SHOW_FILTER) && - sock_diag_put_filterinfo(user_ns, sk, skb, PACKET_DIAG_FILTER)) + sock_diag_put_filterinfo(may_report_filterinfo, sk, skb, + PACKET_DIAG_FILTER)) goto out_nlmsg_trim; return nlmsg_end(skb, nlh); @@ -188,9 +190,11 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) struct packet_diag_req *req; struct net *net; struct sock *sk; + bool may_report_filterinfo; net = sock_net(skb->sk); req = nlmsg_data(cb->nlh); + may_report_filterinfo = netlink_net_capable(cb->skb, CAP_NET_ADMIN); mutex_lock(&net->packet.sklist_lock); sk_for_each(sk, &net->packet.sklist) { @@ -200,6 +204,7 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) goto next; if (sk_diag_fill(sk, skb, req, + may_report_filterinfo, sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c index 66dc65e7c6a1..e9a83a637185 100644 --- a/net/phonet/pep-gprs.c +++ b/net/phonet/pep-gprs.c @@ -267,7 +267,7 @@ int gprs_attach(struct sock *sk) return -EINVAL; /* need packet boundaries */ /* Create net device */ - dev = alloc_netdev(sizeof(*gp), ifname, gprs_setup); + dev = alloc_netdev(sizeof(*gp), ifname, NET_NAME_UNKNOWN, gprs_setup); if (!dev) return -ENOMEM; gp = netdev_priv(dev); diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index dc15f4300808..b64151ade6b3 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -70,10 +70,10 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh) int err; u8 pnaddr; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; - if (!capable(CAP_SYS_ADMIN)) + if (!netlink_capable(skb, CAP_SYS_ADMIN)) return -EPERM; ASSERT_RTNL(); @@ -233,10 +233,10 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh) int err; u8 dst; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; - if (!capable(CAP_SYS_ADMIN)) + if (!netlink_capable(skb, CAP_SYS_ADMIN)) return -EPERM; ASSERT_RTNL(); diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index b7ebe23cdedf..d67de453c35a 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -598,7 +598,7 @@ static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, { atomic64_set(&ic->i_ack_next, seq); if (ack_required) { - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); } } @@ -606,7 +606,7 @@ static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, static u64 rds_ib_get_ack(struct rds_ib_connection *ic) { clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); return atomic64_read(&ic->i_ack_next); } diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 37be6e226d1b..1dde91e3dc70 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -298,7 +298,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) rds_ib_stats_inc(s_ib_tx_cq_event); if (wc.wr_id == RDS_IB_ACK_WR_ID) { - if (ic->i_ack_queued + HZ/2 < jiffies) + if (time_after(jiffies, ic->i_ack_queued + HZ/2)) rds_ib_stats_inc(s_ib_tx_stalled); rds_ib_ack_send_complete(ic); continue; @@ -315,7 +315,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) rm = rds_ib_send_unmap_op(ic, send, wc.status); - if (send->s_queued + HZ/2 < jiffies) + if (time_after(jiffies, send->s_queued + HZ/2)) rds_ib_stats_inc(s_ib_tx_stalled); if (send->s_op) { diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c index 45033358358e..aa8bf6786008 100644 --- a/net/rds/iw_recv.c +++ b/net/rds/iw_recv.c @@ -429,7 +429,7 @@ static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq, { atomic64_set(&ic->i_ack_next, seq); if (ack_required) { - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); } } @@ -437,7 +437,7 @@ static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq, static u64 rds_iw_get_ack(struct rds_iw_connection *ic) { clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); return atomic64_read(&ic->i_ack_next); } diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c index e40c3c5db2c4..9105ea03aec5 100644 --- a/net/rds/iw_send.c +++ b/net/rds/iw_send.c @@ -232,7 +232,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context) } if (wc.wr_id == RDS_IW_ACK_WR_ID) { - if (ic->i_ack_queued + HZ/2 < jiffies) + if (time_after(jiffies, ic->i_ack_queued + HZ/2)) rds_iw_stats_inc(s_iw_tx_stalled); rds_iw_ack_send_complete(ic); continue; @@ -267,7 +267,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context) send->s_wr.opcode = 0xdead; send->s_wr.num_sge = 1; - if (send->s_queued + HZ/2 < jiffies) + if (time_after(jiffies, send->s_queued + HZ/2)) rds_iw_stats_inc(s_iw_tx_stalled); /* If a RDMA operation produced an error, signal this right diff --git a/net/rds/iw_sysctl.c b/net/rds/iw_sysctl.c index 89c91515ed0c..139239d2cb22 100644 --- a/net/rds/iw_sysctl.c +++ b/net/rds/iw_sysctl.c @@ -111,8 +111,7 @@ static struct ctl_table rds_iw_sysctl_table[] = { void rds_iw_sysctl_exit(void) { - if (rds_iw_sysctl_hdr) - unregister_net_sysctl_table(rds_iw_sysctl_hdr); + unregister_net_sysctl_table(rds_iw_sysctl_hdr); } int rds_iw_sysctl_init(void) diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index c2be901d19ee..6cd9d1deafc3 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -168,7 +168,7 @@ static int rds_rdma_listen_init(void) return ret; } - sin.sin_family = AF_INET, + sin.sin_family = AF_INET; sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY); sin.sin_port = (__force u16)htons(RDS_PORT); diff --git a/net/rds/send.c b/net/rds/send.c index a82fb660ec00..23718160d71e 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -107,7 +107,7 @@ static int acquire_in_xmit(struct rds_connection *conn) static void release_in_xmit(struct rds_connection *conn) { clear_bit(RDS_IN_XMIT, &conn->c_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); /* * We don't use wait_on_bit()/wake_up_bit() because our waking is in a * hot path and finding waiters is very rare. We don't want to walk @@ -661,7 +661,7 @@ void rds_send_drop_acked(struct rds_connection *conn, u64 ack, /* order flag updates with spin locks */ if (!list_empty(&list)) - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); spin_unlock_irqrestore(&conn->c_lock, flags); @@ -691,7 +691,7 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) } /* order flag updates with the rs lock */ - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); spin_unlock_irqrestore(&rs->rs_lock, flags); diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c index b5cb2aa08f33..c3b0cd43eb56 100644 --- a/net/rds/sysctl.c +++ b/net/rds/sysctl.c @@ -94,8 +94,7 @@ static struct ctl_table rds_sysctl_rds_table[] = { void rds_sysctl_exit(void) { - if (rds_sysctl_reg_table) - unregister_net_sysctl_table(rds_sysctl_reg_table); + unregister_net_sysctl_table(rds_sysctl_reg_table); } int rds_sysctl_init(void) diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 4e638f851185..23ab4dcd1d9f 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -153,7 +153,7 @@ int rds_tcp_listen_init(void) sock->sk->sk_data_ready = rds_tcp_listen_data_ready; write_unlock_bh(&sock->sk->sk_callback_lock); - sin.sin_family = PF_INET, + sin.sin_family = PF_INET; sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY); sin.sin_port = (__force u16)htons(RDS_TCP_PORT); diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 81cf5a4c5e40..53b17ca0dff5 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -93,7 +93,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, rm->m_ack_seq = tc->t_last_sent_nxt + sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len) - 1; - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); set_bit(RDS_MSG_HAS_ACK_SEQ, &rm->m_flags); tc->t_last_expected_una = rm->m_ack_seq + 1; diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c index bd2a5b90400c..14c98e48f261 100644 --- a/net/rfkill/rfkill-gpio.c +++ b/net/rfkill/rfkill-gpio.c @@ -36,8 +36,6 @@ struct rfkill_gpio_data { struct gpio_desc *shutdown_gpio; struct rfkill *rfkill_dev; - char *reset_name; - char *shutdown_name; struct clk *clk; bool clk_enabled; @@ -47,17 +45,14 @@ static int rfkill_gpio_set_power(void *data, bool blocked) { struct rfkill_gpio_data *rfkill = data; - if (blocked) { - gpiod_set_value(rfkill->shutdown_gpio, 0); - gpiod_set_value(rfkill->reset_gpio, 0); - if (!IS_ERR(rfkill->clk) && rfkill->clk_enabled) - clk_disable(rfkill->clk); - } else { - if (!IS_ERR(rfkill->clk) && !rfkill->clk_enabled) - clk_enable(rfkill->clk); - gpiod_set_value(rfkill->reset_gpio, 1); - gpiod_set_value(rfkill->shutdown_gpio, 1); - } + if (!blocked && !IS_ERR(rfkill->clk) && !rfkill->clk_enabled) + clk_enable(rfkill->clk); + + gpiod_set_value_cansleep(rfkill->shutdown_gpio, !blocked); + gpiod_set_value_cansleep(rfkill->reset_gpio, !blocked); + + if (blocked && !IS_ERR(rfkill->clk) && rfkill->clk_enabled) + clk_disable(rfkill->clk); rfkill->clk_enabled = blocked; @@ -87,10 +82,8 @@ static int rfkill_gpio_probe(struct platform_device *pdev) { struct rfkill_gpio_platform_data *pdata = pdev->dev.platform_data; struct rfkill_gpio_data *rfkill; - const char *clk_name = NULL; struct gpio_desc *gpio; int ret; - int len; rfkill = devm_kzalloc(&pdev->dev, sizeof(*rfkill), GFP_KERNEL); if (!rfkill) @@ -101,28 +94,15 @@ static int rfkill_gpio_probe(struct platform_device *pdev) if (ret) return ret; } else if (pdata) { - clk_name = pdata->power_clk_name; rfkill->name = pdata->name; rfkill->type = pdata->type; } else { return -ENODEV; } - len = strlen(rfkill->name); - rfkill->reset_name = devm_kzalloc(&pdev->dev, len + 7, GFP_KERNEL); - if (!rfkill->reset_name) - return -ENOMEM; - - rfkill->shutdown_name = devm_kzalloc(&pdev->dev, len + 10, GFP_KERNEL); - if (!rfkill->shutdown_name) - return -ENOMEM; + rfkill->clk = devm_clk_get(&pdev->dev, NULL); - snprintf(rfkill->reset_name, len + 6 , "%s_reset", rfkill->name); - snprintf(rfkill->shutdown_name, len + 9, "%s_shutdown", rfkill->name); - - rfkill->clk = devm_clk_get(&pdev->dev, clk_name); - - gpio = devm_gpiod_get_index(&pdev->dev, rfkill->reset_name, 0); + gpio = devm_gpiod_get_index(&pdev->dev, "reset", 0); if (!IS_ERR(gpio)) { ret = gpiod_direction_output(gpio, 0); if (ret) @@ -130,7 +110,7 @@ static int rfkill_gpio_probe(struct platform_device *pdev) rfkill->reset_gpio = gpio; } - gpio = devm_gpiod_get_index(&pdev->dev, rfkill->shutdown_name, 1); + gpio = devm_gpiod_get_index(&pdev->dev, "shutdown", 1); if (!IS_ERR(gpio)) { ret = gpiod_direction_output(gpio, 0); if (ret) @@ -146,14 +126,6 @@ static int rfkill_gpio_probe(struct platform_device *pdev) return -EINVAL; } - if (pdata && pdata->gpio_runtime_setup) { - ret = pdata->gpio_runtime_setup(pdev); - if (ret) { - dev_err(&pdev->dev, "can't set up gpio\n"); - return ret; - } - } - rfkill->rfkill_dev = rfkill_alloc(rfkill->name, &pdev->dev, rfkill->type, &rfkill_gpio_ops, rfkill); @@ -174,20 +146,23 @@ static int rfkill_gpio_probe(struct platform_device *pdev) static int rfkill_gpio_remove(struct platform_device *pdev) { struct rfkill_gpio_data *rfkill = platform_get_drvdata(pdev); - struct rfkill_gpio_platform_data *pdata = pdev->dev.platform_data; - if (pdata && pdata->gpio_runtime_close) - pdata->gpio_runtime_close(pdev); rfkill_unregister(rfkill->rfkill_dev); rfkill_destroy(rfkill->rfkill_dev); return 0; } +#ifdef CONFIG_ACPI static const struct acpi_device_id rfkill_acpi_match[] = { + { "BCM2E1A", RFKILL_TYPE_BLUETOOTH }, + { "BCM2E39", RFKILL_TYPE_BLUETOOTH }, + { "BCM2E3D", RFKILL_TYPE_BLUETOOTH }, { "BCM4752", RFKILL_TYPE_GPS }, + { "LNV4752", RFKILL_TYPE_GPS }, { }, }; +#endif static struct platform_driver rfkill_gpio_driver = { .probe = rfkill_gpio_probe, diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 8451c8cdc9de..a85c1a086ae4 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1538,7 +1538,7 @@ static int __init rose_proto_init(void) char name[IFNAMSIZ]; sprintf(name, "rose%d", i); - dev = alloc_netdev(0, name, rose_setup); + dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, rose_setup); if (!dev) { printk(KERN_ERR "ROSE: rose_proto_init - unable to allocate memory\n"); rc = -ENOMEM; diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c index 7633a752c65e..b45d080e64a7 100644 --- a/net/rxrpc/ar-key.c +++ b/net/rxrpc/ar-key.c @@ -26,8 +26,10 @@ #include "ar-internal.h" static int rxrpc_vet_description_s(const char *); -static int rxrpc_instantiate(struct key *, struct key_preparsed_payload *); -static int rxrpc_instantiate_s(struct key *, struct key_preparsed_payload *); +static int rxrpc_preparse(struct key_preparsed_payload *); +static int rxrpc_preparse_s(struct key_preparsed_payload *); +static void rxrpc_free_preparse(struct key_preparsed_payload *); +static void rxrpc_free_preparse_s(struct key_preparsed_payload *); static void rxrpc_destroy(struct key *); static void rxrpc_destroy_s(struct key *); static void rxrpc_describe(const struct key *, struct seq_file *); @@ -39,7 +41,9 @@ static long rxrpc_read(const struct key *, char __user *, size_t); */ struct key_type key_type_rxrpc = { .name = "rxrpc", - .instantiate = rxrpc_instantiate, + .preparse = rxrpc_preparse, + .free_preparse = rxrpc_free_preparse, + .instantiate = generic_key_instantiate, .match = user_match, .destroy = rxrpc_destroy, .describe = rxrpc_describe, @@ -54,7 +58,9 @@ EXPORT_SYMBOL(key_type_rxrpc); struct key_type key_type_rxrpc_s = { .name = "rxrpc_s", .vet_description = rxrpc_vet_description_s, - .instantiate = rxrpc_instantiate_s, + .preparse = rxrpc_preparse_s, + .free_preparse = rxrpc_free_preparse_s, + .instantiate = generic_key_instantiate, .match = user_match, .destroy = rxrpc_destroy_s, .describe = rxrpc_describe, @@ -81,13 +87,13 @@ static int rxrpc_vet_description_s(const char *desc) * parse an RxKAD type XDR format token * - the caller guarantees we have at least 4 words */ -static int rxrpc_instantiate_xdr_rxkad(struct key *key, const __be32 *xdr, - unsigned int toklen) +static int rxrpc_preparse_xdr_rxkad(struct key_preparsed_payload *prep, + size_t datalen, + const __be32 *xdr, unsigned int toklen) { struct rxrpc_key_token *token, **pptoken; size_t plen; u32 tktlen; - int ret; _enter(",{%x,%x,%x,%x},%u", ntohl(xdr[0]), ntohl(xdr[1]), ntohl(xdr[2]), ntohl(xdr[3]), @@ -99,13 +105,11 @@ static int rxrpc_instantiate_xdr_rxkad(struct key *key, const __be32 *xdr, _debug("tktlen: %x", tktlen); if (tktlen > AFSTOKEN_RK_TIX_MAX) return -EKEYREJECTED; - if (8 * 4 + tktlen != toklen) + if (toklen < 8 * 4 + tktlen) return -EKEYREJECTED; plen = sizeof(*token) + sizeof(*token->kad) + tktlen; - ret = key_payload_reserve(key, key->datalen + plen); - if (ret < 0) - return ret; + prep->quotalen = datalen + plen; plen -= sizeof(*token); token = kzalloc(sizeof(*token), GFP_KERNEL); @@ -146,16 +150,16 @@ static int rxrpc_instantiate_xdr_rxkad(struct key *key, const __be32 *xdr, token->kad->ticket[6], token->kad->ticket[7]); /* count the number of tokens attached */ - key->type_data.x[0]++; + prep->type_data[0] = (void *)((unsigned long)prep->type_data[0] + 1); /* attach the data */ - for (pptoken = (struct rxrpc_key_token **)&key->payload.data; + for (pptoken = (struct rxrpc_key_token **)&prep->payload[0]; *pptoken; pptoken = &(*pptoken)->next) continue; *pptoken = token; - if (token->kad->expiry < key->expiry) - key->expiry = token->kad->expiry; + if (token->kad->expiry < prep->expiry) + prep->expiry = token->kad->expiry; _leave(" = 0"); return 0; @@ -346,7 +350,7 @@ static int rxrpc_krb5_decode_tagged_array(struct krb5_tagged_data **_td, n_elem = ntohl(*xdr++); toklen -= 4; - if (n_elem < 0 || n_elem > max_n_elem) + if (n_elem > max_n_elem) return -EINVAL; *_n_elem = n_elem; if (n_elem > 0) { @@ -418,8 +422,9 @@ static int rxrpc_krb5_decode_ticket(u8 **_ticket, u16 *_tktlen, * parse an RxK5 type XDR format token * - the caller guarantees we have at least 4 words */ -static int rxrpc_instantiate_xdr_rxk5(struct key *key, const __be32 *xdr, - unsigned int toklen) +static int rxrpc_preparse_xdr_rxk5(struct key_preparsed_payload *prep, + size_t datalen, + const __be32 *xdr, unsigned int toklen) { struct rxrpc_key_token *token, **pptoken; struct rxk5_key *rxk5; @@ -432,9 +437,7 @@ static int rxrpc_instantiate_xdr_rxk5(struct key *key, const __be32 *xdr, /* reserve some payload space for this subkey - the length of the token * is a reasonable approximation */ - ret = key_payload_reserve(key, key->datalen + toklen); - if (ret < 0) - return ret; + prep->quotalen = datalen + toklen; token = kzalloc(sizeof(*token), GFP_KERNEL); if (!token) @@ -520,14 +523,14 @@ static int rxrpc_instantiate_xdr_rxk5(struct key *key, const __be32 *xdr, if (toklen != 0) goto inval; - /* attach the payload to the key */ - for (pptoken = (struct rxrpc_key_token **)&key->payload.data; + /* attach the payload */ + for (pptoken = (struct rxrpc_key_token **)&prep->payload[0]; *pptoken; pptoken = &(*pptoken)->next) continue; *pptoken = token; - if (token->kad->expiry < key->expiry) - key->expiry = token->kad->expiry; + if (token->kad->expiry < prep->expiry) + prep->expiry = token->kad->expiry; _leave(" = 0"); return 0; @@ -545,16 +548,17 @@ error: * attempt to parse the data as the XDR format * - the caller guarantees we have more than 7 words */ -static int rxrpc_instantiate_xdr(struct key *key, const void *data, size_t datalen) +static int rxrpc_preparse_xdr(struct key_preparsed_payload *prep) { - const __be32 *xdr = data, *token; + const __be32 *xdr = prep->data, *token; const char *cp; unsigned int len, tmp, loop, ntoken, toklen, sec_ix; + size_t datalen = prep->datalen; int ret; _enter(",{%x,%x,%x,%x},%zu", ntohl(xdr[0]), ntohl(xdr[1]), ntohl(xdr[2]), ntohl(xdr[3]), - datalen); + prep->datalen); if (datalen > AFSTOKEN_LENGTH_MAX) goto not_xdr; @@ -635,13 +639,13 @@ static int rxrpc_instantiate_xdr(struct key *key, const void *data, size_t datal switch (sec_ix) { case RXRPC_SECURITY_RXKAD: - ret = rxrpc_instantiate_xdr_rxkad(key, xdr, toklen); + ret = rxrpc_preparse_xdr_rxkad(prep, datalen, xdr, toklen); if (ret != 0) goto error; break; case RXRPC_SECURITY_RXK5: - ret = rxrpc_instantiate_xdr_rxk5(key, xdr, toklen); + ret = rxrpc_preparse_xdr_rxk5(prep, datalen, xdr, toklen); if (ret != 0) goto error; break; @@ -665,8 +669,9 @@ error: } /* - * instantiate an rxrpc defined key - * data should be of the form: + * Preparse an rxrpc defined key. + * + * Data should be of the form: * OFFSET LEN CONTENT * 0 4 key interface version number * 4 2 security index (type) @@ -678,7 +683,7 @@ error: * * if no data is provided, then a no-security key is made */ -static int rxrpc_instantiate(struct key *key, struct key_preparsed_payload *prep) +static int rxrpc_preparse(struct key_preparsed_payload *prep) { const struct rxrpc_key_data_v1 *v1; struct rxrpc_key_token *token, **pp; @@ -686,7 +691,7 @@ static int rxrpc_instantiate(struct key *key, struct key_preparsed_payload *prep u32 kver; int ret; - _enter("{%x},,%zu", key_serial(key), prep->datalen); + _enter("%zu", prep->datalen); /* handle a no-security key */ if (!prep->data && prep->datalen == 0) @@ -694,7 +699,7 @@ static int rxrpc_instantiate(struct key *key, struct key_preparsed_payload *prep /* determine if the XDR payload format is being used */ if (prep->datalen > 7 * 4) { - ret = rxrpc_instantiate_xdr(key, prep->data, prep->datalen); + ret = rxrpc_preparse_xdr(prep); if (ret != -EPROTO) return ret; } @@ -743,9 +748,7 @@ static int rxrpc_instantiate(struct key *key, struct key_preparsed_payload *prep goto error; plen = sizeof(*token->kad) + v1->ticket_length; - ret = key_payload_reserve(key, plen + sizeof(*token)); - if (ret < 0) - goto error; + prep->quotalen = plen + sizeof(*token); ret = -ENOMEM; token = kzalloc(sizeof(*token), GFP_KERNEL); @@ -762,15 +765,16 @@ static int rxrpc_instantiate(struct key *key, struct key_preparsed_payload *prep memcpy(&token->kad->session_key, &v1->session_key, 8); memcpy(&token->kad->ticket, v1->ticket, v1->ticket_length); - /* attach the data */ - key->type_data.x[0]++; + /* count the number of tokens attached */ + prep->type_data[0] = (void *)((unsigned long)prep->type_data[0] + 1); - pp = (struct rxrpc_key_token **)&key->payload.data; + /* attach the data */ + pp = (struct rxrpc_key_token **)&prep->payload[0]; while (*pp) pp = &(*pp)->next; *pp = token; - if (token->kad->expiry < key->expiry) - key->expiry = token->kad->expiry; + if (token->kad->expiry < prep->expiry) + prep->expiry = token->kad->expiry; token = NULL; ret = 0; @@ -781,20 +785,55 @@ error: } /* - * instantiate a server secret key - * data should be a pointer to the 8-byte secret key + * Free token list. */ -static int rxrpc_instantiate_s(struct key *key, - struct key_preparsed_payload *prep) +static void rxrpc_free_token_list(struct rxrpc_key_token *token) +{ + struct rxrpc_key_token *next; + + for (; token; token = next) { + next = token->next; + switch (token->security_index) { + case RXRPC_SECURITY_RXKAD: + kfree(token->kad); + break; + case RXRPC_SECURITY_RXK5: + if (token->k5) + rxrpc_rxk5_free(token->k5); + break; + default: + printk(KERN_ERR "Unknown token type %x on rxrpc key\n", + token->security_index); + BUG(); + } + + kfree(token); + } +} + +/* + * Clean up preparse data. + */ +static void rxrpc_free_preparse(struct key_preparsed_payload *prep) +{ + rxrpc_free_token_list(prep->payload[0]); +} + +/* + * Preparse a server secret key. + * + * The data should be the 8-byte secret key. + */ +static int rxrpc_preparse_s(struct key_preparsed_payload *prep) { struct crypto_blkcipher *ci; - _enter("{%x},,%zu", key_serial(key), prep->datalen); + _enter("%zu", prep->datalen); if (prep->datalen != 8) return -EINVAL; - memcpy(&key->type_data, prep->data, 8); + memcpy(&prep->type_data, prep->data, 8); ci = crypto_alloc_blkcipher("pcbc(des)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(ci)) { @@ -805,36 +844,26 @@ static int rxrpc_instantiate_s(struct key *key, if (crypto_blkcipher_setkey(ci, prep->data, 8) < 0) BUG(); - key->payload.data = ci; + prep->payload[0] = ci; _leave(" = 0"); return 0; } /* + * Clean up preparse data. + */ +static void rxrpc_free_preparse_s(struct key_preparsed_payload *prep) +{ + if (prep->payload[0]) + crypto_free_blkcipher(prep->payload[0]); +} + +/* * dispose of the data dangling from the corpse of a rxrpc key */ static void rxrpc_destroy(struct key *key) { - struct rxrpc_key_token *token; - - while ((token = key->payload.data)) { - key->payload.data = token->next; - switch (token->security_index) { - case RXRPC_SECURITY_RXKAD: - kfree(token->kad); - break; - case RXRPC_SECURITY_RXK5: - if (token->k5) - rxrpc_rxk5_free(token->k5); - break; - default: - printk(KERN_ERR "Unknown token type %x on rxrpc key\n", - token->security_index); - BUG(); - } - - kfree(token); - } + rxrpc_free_token_list(key->payload.data); } /* diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 8a5ba5add4bc..648778aef1a2 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -948,7 +948,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n) u32 portid = skb ? NETLINK_CB(skb).portid : 0; int ret = 0, ovr = 0; - if ((n->nlmsg_type != RTM_GETACTION) && !capable(CAP_NET_ADMIN)) + if ((n->nlmsg_type != RTM_GETACTION) && !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 4f912c0e225b..eb48306033d9 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -218,10 +218,12 @@ static int mirred_device_event(struct notifier_block *unused, if (event == NETDEV_UNREGISTER) list_for_each_entry(m, &mirred_list, tcfm_list) { + spin_lock_bh(&m->tcf_lock); if (m->tcfm_dev == dev) { dev_put(dev); m->tcfm_dev = NULL; } + spin_unlock_bh(&m->tcf_lock); } return NOTIFY_DONE; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 29a30a14c315..c28b0d327b12 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -134,7 +134,8 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) int err; int tp_created = 0; - if ((n->nlmsg_type != RTM_GETTFILTER) && !capable(CAP_NET_ADMIN)) + if ((n->nlmsg_type != RTM_GETTFILTER) && + !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; replay: @@ -317,7 +318,8 @@ replay: } } - err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh); + err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, + n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE); if (err == 0) { if (tp_created) { spin_lock_bh(root_lock); @@ -504,7 +506,7 @@ void tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts) EXPORT_SYMBOL(tcf_exts_destroy); int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, - struct nlattr *rate_tlv, struct tcf_exts *exts) + struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr) { #ifdef CONFIG_NET_CLS_ACT { @@ -513,7 +515,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, INIT_LIST_HEAD(&exts->actions); if (exts->police && tb[exts->police]) { act = tcf_action_init_1(net, tb[exts->police], rate_tlv, - "police", TCA_ACT_NOREPLACE, + "police", ovr, TCA_ACT_BIND); if (IS_ERR(act)) return PTR_ERR(act); @@ -523,7 +525,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, } else if (exts->action && tb[exts->action]) { int err; err = tcf_action_init(net, tb[exts->action], rate_tlv, - NULL, TCA_ACT_NOREPLACE, + NULL, ovr, TCA_ACT_BIND, &exts->actions); if (err) return err; @@ -543,14 +545,12 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, struct tcf_exts *src) { #ifdef CONFIG_NET_CLS_ACT - if (!list_empty(&src->actions)) { - LIST_HEAD(tmp); - tcf_tree_lock(tp); - list_splice_init(&dst->actions, &tmp); - list_splice(&src->actions, &dst->actions); - tcf_tree_unlock(tp); - tcf_action_destroy(&tmp, TCA_ACT_UNBIND); - } + LIST_HEAD(tmp); + tcf_tree_lock(tp); + list_splice_init(&dst->actions, &tmp); + list_splice(&src->actions, &dst->actions); + tcf_tree_unlock(tp); + tcf_action_destroy(&tmp, TCA_ACT_UNBIND); #endif } EXPORT_SYMBOL(tcf_exts_change); @@ -561,13 +561,14 @@ EXPORT_SYMBOL(tcf_exts_change); int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT + struct nlattr *nest; + if (exts->action && !list_empty(&exts->actions)) { /* * again for backward compatible mode - we want * to work with both old and new modes of entering * tc data even if iproute2 was newer - jhs */ - struct nlattr *nest; if (exts->type != TCA_OLD_COMPAT) { nest = nla_nest_start(skb, exts->action); if (nest == NULL) @@ -585,10 +586,14 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts) nla_nest_end(skb, nest); } } -#endif return 0; -nla_put_failure: __attribute__ ((unused)) + +nla_put_failure: + nla_nest_cancel(skb, nest); return -1; +#else + return 0; +#endif } EXPORT_SYMBOL(tcf_exts_dump); diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index e98ca99c202b..0ae1813e3e90 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -130,14 +130,14 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = { static int basic_set_parms(struct net *net, struct tcf_proto *tp, struct basic_filter *f, unsigned long base, struct nlattr **tb, - struct nlattr *est) + struct nlattr *est, bool ovr) { int err; struct tcf_exts e; struct tcf_ematch_tree t; tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE); - err = tcf_exts_validate(net, tp, tb, est, &e); + err = tcf_exts_validate(net, tp, tb, est, &e, ovr); if (err < 0) return err; @@ -161,7 +161,7 @@ errout: static int basic_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, unsigned long *arg) + struct nlattr **tca, unsigned long *arg, bool ovr) { int err; struct basic_head *head = tp->root; @@ -179,7 +179,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, if (f != NULL) { if (handle && f->handle != handle) return -EINVAL; - return basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE]); + return basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE], ovr); } err = -ENOBUFS; @@ -206,7 +206,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, f->handle = head->hgenerator; } - err = basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE]); + err = basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE], ovr); if (err < 0) goto errout; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 8e3cf49118e3..0e30d58149da 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -30,7 +30,7 @@ struct cls_bpf_head { }; struct cls_bpf_prog { - struct sk_filter *filter; + struct bpf_prog *filter; struct sock_filter *bpf_ops; struct tcf_exts exts; struct tcf_result res; @@ -54,7 +54,7 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, int ret; list_for_each_entry(prog, &head->plist, link) { - int filter_res = SK_RUN_FILTER(prog->filter, skb); + int filter_res = BPF_PROG_RUN(prog->filter, skb); if (filter_res == 0) continue; @@ -92,7 +92,7 @@ static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog) tcf_unbind_filter(tp, &prog->res); tcf_exts_destroy(tp, &prog->exts); - sk_unattached_filter_destroy(prog->filter); + bpf_prog_destroy(prog->filter); kfree(prog->bpf_ops); kfree(prog); @@ -156,12 +156,12 @@ static void cls_bpf_put(struct tcf_proto *tp, unsigned long f) static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, struct cls_bpf_prog *prog, unsigned long base, struct nlattr **tb, - struct nlattr *est) + struct nlattr *est, bool ovr) { struct sock_filter *bpf_ops, *bpf_old; struct tcf_exts exts; - struct sock_fprog tmp; - struct sk_filter *fp, *fp_old; + struct sock_fprog_kern tmp; + struct bpf_prog *fp, *fp_old; u16 bpf_size, bpf_len; u32 classid; int ret; @@ -170,7 +170,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, return -EINVAL; tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE); - ret = tcf_exts_validate(net, tp, tb, est, &exts); + ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr); if (ret < 0) return ret; @@ -191,9 +191,9 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size); tmp.len = bpf_len; - tmp.filter = (struct sock_filter __user *) bpf_ops; + tmp.filter = bpf_ops; - ret = sk_unattached_filter_create(&fp, &tmp); + ret = bpf_prog_create(&fp, &tmp); if (ret) goto errout_free; @@ -211,7 +211,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, tcf_exts_change(tp, &prog->exts, &exts); if (fp_old) - sk_unattached_filter_destroy(fp_old); + bpf_prog_destroy(fp_old); if (bpf_old) kfree(bpf_old); @@ -242,7 +242,7 @@ static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp, static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg) + unsigned long *arg, bool ovr) { struct cls_bpf_head *head = tp->root; struct cls_bpf_prog *prog = (struct cls_bpf_prog *) *arg; @@ -260,7 +260,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, if (handle && prog->handle != handle) return -EINVAL; return cls_bpf_modify_existing(net, tp, prog, base, tb, - tca[TCA_RATE]); + tca[TCA_RATE], ovr); } prog = kzalloc(sizeof(*prog), GFP_KERNEL); @@ -277,7 +277,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, goto errout; } - ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE]); + ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE], ovr); if (ret < 0) goto errout; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 8e2158ab551c..cacf01bd04f0 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -83,7 +83,7 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = { static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg) + unsigned long *arg, bool ovr) { struct nlattr *tb[TCA_CGROUP_MAX + 1]; struct cls_cgroup_head *head = tp->root; @@ -119,7 +119,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, return err; tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE); - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); if (err < 0) return err; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 257029c54332..35be16f7c192 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -349,7 +349,7 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { static int flow_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg) + unsigned long *arg, bool ovr) { struct flow_head *head = tp->root; struct flow_filter *f; @@ -393,7 +393,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, } tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE); - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); if (err < 0) return err; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 63a3ce75c02e..861b03ccfed0 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -169,7 +169,7 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = { static int fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, - struct nlattr **tb, struct nlattr **tca, unsigned long base) + struct nlattr **tb, struct nlattr **tca, unsigned long base, bool ovr) { struct fw_head *head = tp->root; struct tcf_exts e; @@ -177,7 +177,7 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, int err; tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE); - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); if (err < 0) return err; @@ -218,7 +218,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg) + unsigned long *arg, bool ovr) { struct fw_head *head = tp->root; struct fw_filter *f = (struct fw_filter *) *arg; @@ -236,7 +236,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, if (f != NULL) { if (f->id != handle && handle) return -EINVAL; - return fw_change_attrs(net, tp, f, tb, tca, base); + return fw_change_attrs(net, tp, f, tb, tca, base, ovr); } if (!handle) @@ -264,7 +264,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE); f->id = handle; - err = fw_change_attrs(net, tp, f, tb, tca, base); + err = fw_change_attrs(net, tp, f, tb, tca, base, ovr); if (err < 0) goto errout; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 1ad3068f2ce1..dd9fc2523c76 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -333,7 +333,8 @@ static const struct nla_policy route4_policy[TCA_ROUTE4_MAX + 1] = { static int route4_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct route4_filter *f, u32 handle, struct route4_head *head, - struct nlattr **tb, struct nlattr *est, int new) + struct nlattr **tb, struct nlattr *est, int new, + bool ovr) { int err; u32 id = 0, to = 0, nhandle = 0x8000; @@ -343,7 +344,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, struct tcf_exts e; tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE); - err = tcf_exts_validate(net, tp, tb, est, &e); + err = tcf_exts_validate(net, tp, tb, est, &e, ovr); if (err < 0) return err; @@ -428,7 +429,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg) + unsigned long *arg, bool ovr) { struct route4_head *head = tp->root; struct route4_filter *f, *f1, **fp; @@ -455,7 +456,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, old_handle = f->handle; err = route4_set_parms(net, tp, base, f, handle, head, tb, - tca[TCA_RATE], 0); + tca[TCA_RATE], 0, ovr); if (err < 0) return err; @@ -479,7 +480,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE); err = route4_set_parms(net, tp, base, f, handle, head, tb, - tca[TCA_RATE], 1); + tca[TCA_RATE], 1, ovr); if (err < 0) goto errout; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 19f8e5dfa8bd..1020e233a5d6 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -415,7 +415,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg) + unsigned long *arg, bool ovr) { struct rsvp_head *data = tp->root; struct rsvp_filter *f, **fp; @@ -436,7 +436,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, return err; tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE); - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); if (err < 0) return err; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index eed8404443d8..3e9f76413b3b 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -188,11 +188,17 @@ static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = { [TCA_TCINDEX_CLASSID] = { .type = NLA_U32 }, }; +static void tcindex_filter_result_init(struct tcindex_filter_result *r) +{ + memset(r, 0, sizeof(*r)); + tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); +} + static int tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, u32 handle, struct tcindex_data *p, struct tcindex_filter_result *r, struct nlattr **tb, - struct nlattr *est) + struct nlattr *est, bool ovr) { int err, balloc = 0; struct tcindex_filter_result new_filter_result, *old_r = r; @@ -202,20 +208,16 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct tcf_exts e; tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); - err = tcf_exts_validate(net, tp, tb, est, &e); + err = tcf_exts_validate(net, tp, tb, est, &e, ovr); if (err < 0) return err; memcpy(&cp, p, sizeof(cp)); - memset(&new_filter_result, 0, sizeof(new_filter_result)); - tcf_exts_init(&new_filter_result.exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); + tcindex_filter_result_init(&new_filter_result); + tcindex_filter_result_init(&cr); if (old_r) - memcpy(&cr, r, sizeof(cr)); - else { - memset(&cr, 0, sizeof(cr)); - tcf_exts_init(&cr.exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); - } + cr.res = r->res; if (tb[TCA_TCINDEX_HASH]) cp.hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); @@ -267,9 +269,14 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, err = -ENOMEM; if (!cp.perfect && !cp.h) { if (valid_perfect_hash(&cp)) { + int i; + cp.perfect = kcalloc(cp.hash, sizeof(*r), GFP_KERNEL); if (!cp.perfect) goto errout; + for (i = 0; i < cp.hash; i++) + tcf_exts_init(&cp.perfect[i].exts, TCA_TCINDEX_ACT, + TCA_TCINDEX_POLICE); balloc = 1; } else { cp.h = kcalloc(cp.hash, sizeof(f), GFP_KERNEL); @@ -295,14 +302,17 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, tcf_bind_filter(tp, &cr.res, base); } - tcf_exts_change(tp, &cr.exts, &e); + if (old_r) + tcf_exts_change(tp, &r->exts, &e); + else + tcf_exts_change(tp, &cr.exts, &e); tcf_tree_lock(tp); if (old_r && old_r != r) - memset(old_r, 0, sizeof(*old_r)); + tcindex_filter_result_init(old_r); memcpy(p, &cp, sizeof(cp)); - memcpy(r, &cr, sizeof(cr)); + r->res = cr.res; if (r == &new_filter_result) { struct tcindex_filter **fp; @@ -331,7 +341,7 @@ errout: static int tcindex_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, unsigned long *arg) + struct nlattr **tca, unsigned long *arg, bool ovr) { struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_TCINDEX_MAX + 1]; @@ -351,7 +361,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb, return err; return tcindex_set_parms(net, tp, base, handle, p, r, tb, - tca[TCA_RATE]); + tca[TCA_RATE], ovr); } @@ -410,7 +420,7 @@ static void tcindex_destroy(struct tcf_proto *tp) pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p); walker.count = 0; walker.skip = 0; - walker.fn = &tcindex_destroy_element; + walker.fn = tcindex_destroy_element; tcindex_walk(tp, &walker); kfree(p->perfect); kfree(p->h); diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 84c28daff848..70c0be8d0121 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -38,6 +38,7 @@ #include <linux/errno.h> #include <linux/rtnetlink.h> #include <linux/skbuff.h> +#include <linux/bitmap.h> #include <net/netlink.h> #include <net/act_api.h> #include <net/pkt_cls.h> @@ -460,17 +461,25 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg) return 0; } +#define NR_U32_NODE (1<<12) static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle) { struct tc_u_knode *n; - unsigned int i = 0x7FF; + unsigned long i; + unsigned long *bitmap = kzalloc(BITS_TO_LONGS(NR_U32_NODE) * sizeof(unsigned long), + GFP_KERNEL); + if (!bitmap) + return handle | 0xFFF; for (n = ht->ht[TC_U32_HASH(handle)]; n; n = n->next) - if (i < TC_U32_NODE(n->handle)) - i = TC_U32_NODE(n->handle); - i++; + set_bit(TC_U32_NODE(n->handle), bitmap); - return handle | (i > 0xFFF ? 0xFFF : i); + i = find_next_zero_bit(bitmap, NR_U32_NODE, 0x800); + if (i >= NR_U32_NODE) + i = find_next_zero_bit(bitmap, NR_U32_NODE, 1); + + kfree(bitmap); + return handle | (i >= NR_U32_NODE ? 0xFFF : i); } static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { @@ -486,13 +495,13 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { static int u32_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct tc_u_hnode *ht, struct tc_u_knode *n, struct nlattr **tb, - struct nlattr *est) + struct nlattr *est, bool ovr) { int err; struct tcf_exts e; tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE); - err = tcf_exts_validate(net, tp, tb, est, &e); + err = tcf_exts_validate(net, tp, tb, est, &e, ovr); if (err < 0) return err; @@ -545,7 +554,7 @@ errout: static int u32_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg) + unsigned long *arg, bool ovr) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *ht; @@ -569,7 +578,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; return u32_set_parms(net, tp, base, n->ht_up, n, tb, - tca[TCA_RATE]); + tca[TCA_RATE], ovr); } if (tb[TCA_U32_DIVISOR]) { @@ -656,7 +665,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, } #endif - err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE]); + err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr); if (err == 0) { struct tc_u_knode **ins; for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next) diff --git a/net/sched/em_canid.c b/net/sched/em_canid.c index bfd34e4c1afc..7c292d474f47 100644 --- a/net/sched/em_canid.c +++ b/net/sched/em_canid.c @@ -125,7 +125,6 @@ static int em_canid_change(struct tcf_proto *tp, void *data, int len, { struct can_filter *conf = data; /* Array with rules */ struct canid_match *cm; - struct canid_match *cm_old = (struct canid_match *)m->data; int i; if (!len) @@ -181,12 +180,6 @@ static int em_canid_change(struct tcf_proto *tp, void *data, int len, m->datalen = sizeof(struct canid_match) + len; m->data = (unsigned long)cm; - - if (cm_old != NULL) { - pr_err("canid: Configuring an existing ematch!\n"); - kfree(cm_old); - } - return 0; } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index a0b84e0e22de..58bed7599db7 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -563,7 +563,7 @@ out: } EXPORT_SYMBOL(__qdisc_calculate_pkt_len); -void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc) +void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc) { if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { pr_warn("%s: %s qdisc %X: is non-work-conserving?\n", @@ -1084,7 +1084,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n) struct Qdisc *p = NULL; int err; - if ((n->nlmsg_type != RTM_GETQDISC) && !capable(CAP_NET_ADMIN)) + if ((n->nlmsg_type != RTM_GETQDISC) && + !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); @@ -1151,7 +1152,7 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n) struct Qdisc *q, *p; int err; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; replay: @@ -1490,7 +1491,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n) u32 qid; int err; - if ((n->nlmsg_type != RTM_GETTCLASS) && !capable(CAP_NET_ADMIN)) + if ((n->nlmsg_type != RTM_GETTCLASS) && + !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 2aee02802c27..ed30e436128b 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -391,12 +391,7 @@ static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = { static void choke_free(void *addr) { - if (addr) { - if (is_vmalloc_addr(addr)) - vfree(addr); - else - kfree(addr); - } + kvfree(addr); } static int choke_change(struct Qdisc *sch, struct nlattr *opt) diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 8302717ea303..7bbbfe112192 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -391,8 +391,10 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch) while (1) { cl = list_first_entry(&q->active, struct drr_class, alist); skb = cl->qdisc->ops->peek(cl->qdisc); - if (skb == NULL) + if (skb == NULL) { + qdisc_warn_nonwc(__func__, cl->qdisc); goto out; + } len = qdisc_pkt_len(skb); if (len <= cl->deficit) { diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 23c682b42f99..ba32c2b005d0 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -591,10 +591,7 @@ static void *fq_alloc_node(size_t sz, int node) static void fq_free(void *addr) { - if (addr && is_vmalloc_addr(addr)) - vfree(addr); - else - kfree(addr); + kvfree(addr); } static int fq_resize(struct Qdisc *sch, u32 log) diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 0bf432c782c1..063b726bf1f8 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -365,12 +365,7 @@ static void *fq_codel_zalloc(size_t sz) static void fq_codel_free(void *addr) { - if (addr) { - if (is_vmalloc_addr(addr)) - vfree(addr); - else - kfree(addr); - } + kvfree(addr); } static void fq_codel_destroy(struct Qdisc *sch) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index e1543b03e39d..fc04fe93c2da 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -108,7 +108,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, /* * Transmit one skb, and handle the return status as required. Holding the - * __QDISC_STATE_RUNNING bit guarantees that only one CPU can execute this + * __QDISC___STATE_RUNNING bit guarantees that only one CPU can execute this * function. * * Returns to the caller: @@ -156,7 +156,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, /* * NOTE: Called under qdisc_lock(q) with locally disabled BH. * - * __QDISC_STATE_RUNNING guarantees only one CPU can process + * __QDISC___STATE_RUNNING guarantees only one CPU can process * this qdisc at a time. qdisc_lock(q) serializes queue accesses for * this queue. * diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index edee03d922e2..d85b6812a7d4 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -414,7 +414,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch) } bucket->deficit = weight * q->quantum; } - if (++sch->q.qlen < sch->limit) + if (++sch->q.qlen <= sch->limit) return NET_XMIT_SUCCESS; q->drop_overlimit++; @@ -494,12 +494,7 @@ static void *hhf_zalloc(size_t sz) static void hhf_free(void *addr) { - if (addr) { - if (is_vmalloc_addr(addr)) - vfree(addr); - else - kfree(addr); - } + kvfree(addr); } static void hhf_destroy(struct Qdisc *sch) @@ -553,11 +548,6 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt) if (err < 0) return err; - sch_tree_lock(sch); - - if (tb[TCA_HHF_BACKLOG_LIMIT]) - sch->limit = nla_get_u32(tb[TCA_HHF_BACKLOG_LIMIT]); - if (tb[TCA_HHF_QUANTUM]) new_quantum = nla_get_u32(tb[TCA_HHF_QUANTUM]); @@ -567,6 +557,12 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt) non_hh_quantum = (u64)new_quantum * new_hhf_non_hh_weight; if (non_hh_quantum > INT_MAX) return -EINVAL; + + sch_tree_lock(sch); + + if (tb[TCA_HHF_BACKLOG_LIMIT]) + sch->limit = nla_get_u32(tb[TCA_HHF_BACKLOG_LIMIT]); + q->quantum = new_quantum; q->hhf_non_hh_weight = new_hhf_non_hh_weight; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index f1669a00f571..111d70fddaea 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -648,12 +648,7 @@ static void netem_reset(struct Qdisc *sch) static void dist_free(struct disttable *d) { - if (d) { - if (is_vmalloc_addr(d)) - vfree(d); - else - kfree(d); - } + kvfree(d); } /* diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 87317ff0b4ec..1af2f73906d0 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -716,12 +716,7 @@ static void *sfq_alloc(size_t sz) static void sfq_free(void *addr) { - if (addr) { - if (is_vmalloc_addr(addr)) - vfree(addr); - else - kfree(addr); - } + kvfree(addr); } static void sfq_destroy(struct Qdisc *sch) diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 474167162947..bd33793b527e 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -485,8 +485,8 @@ static int __init teql_init(void) struct net_device *dev; struct teql_master *master; - dev = alloc_netdev(sizeof(struct teql_master), - "teql%d", teql_master_setup); + dev = alloc_netdev(sizeof(struct teql_master), "teql%d", + NET_NAME_UNKNOWN, teql_master_setup); if (!dev) { err = -ENOMEM; break; diff --git a/net/sctp/Makefile b/net/sctp/Makefile index 5c30b7a873df..3b4ffb021cf1 100644 --- a/net/sctp/Makefile +++ b/net/sctp/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_NET_SCTPPROBE) += sctp_probe.o sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ protocol.o endpointola.o associola.o \ transport.o chunk.o sm_make_chunk.o ulpevent.o \ - inqueue.o outqueue.o ulpqueue.o command.o \ + inqueue.o outqueue.o ulpqueue.o \ tsnmap.o bind_addr.o socket.o primitive.o \ output.o input.o debug.o ssnmap.o auth.o diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 39579c3e0d14..06a9ee6b2d3a 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -55,6 +55,7 @@ #include <net/sctp/sm.h> /* Forward declarations for internal functions. */ +static void sctp_select_active_and_retran_path(struct sctp_association *asoc); static void sctp_assoc_bh_rcv(struct work_struct *work); static void sctp_assoc_free_asconf_acks(struct sctp_association *asoc); static void sctp_assoc_free_asconf_queue(struct sctp_association *asoc); @@ -330,7 +331,7 @@ void sctp_association_free(struct sctp_association *asoc) /* Only real associations count against the endpoint, so * don't bother for if this is a temporary association. */ - if (!asoc->temp) { + if (!list_empty(&asoc->asocs)) { list_del(&asoc->asocs); /* Decrement the backlog value for a TCP-style listening @@ -774,9 +775,6 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, sctp_transport_cmd_t command, sctp_sn_error_t error) { - struct sctp_transport *t = NULL; - struct sctp_transport *first; - struct sctp_transport *second; struct sctp_ulpevent *event; struct sockaddr_storage addr; int spc_state = 0; @@ -829,13 +827,14 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, return; } - /* Generate and send a SCTP_PEER_ADDR_CHANGE notification to the - * user. + /* Generate and send a SCTP_PEER_ADDR_CHANGE notification + * to the user. */ if (ulp_notify) { memset(&addr, 0, sizeof(struct sockaddr_storage)); memcpy(&addr, &transport->ipaddr, transport->af_specific->sockaddr_len); + event = sctp_ulpevent_make_peer_addr_change(asoc, &addr, 0, spc_state, error, GFP_ATOMIC); if (event) @@ -843,60 +842,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, } /* Select new active and retran paths. */ - - /* Look for the two most recently used active transports. - * - * This code produces the wrong ordering whenever jiffies - * rolls over, but we still get usable transports, so we don't - * worry about it. - */ - first = NULL; second = NULL; - - list_for_each_entry(t, &asoc->peer.transport_addr_list, - transports) { - - if ((t->state == SCTP_INACTIVE) || - (t->state == SCTP_UNCONFIRMED) || - (t->state == SCTP_PF)) - continue; - if (!first || t->last_time_heard > first->last_time_heard) { - second = first; - first = t; - } else if (!second || - t->last_time_heard > second->last_time_heard) - second = t; - } - - /* RFC 2960 6.4 Multi-Homed SCTP Endpoints - * - * By default, an endpoint should always transmit to the - * primary path, unless the SCTP user explicitly specifies the - * destination transport address (and possibly source - * transport address) to use. - * - * [If the primary is active but not most recent, bump the most - * recently used transport.] - */ - if (((asoc->peer.primary_path->state == SCTP_ACTIVE) || - (asoc->peer.primary_path->state == SCTP_UNKNOWN)) && - first != asoc->peer.primary_path) { - second = first; - first = asoc->peer.primary_path; - } - - if (!second) - second = first; - /* If we failed to find a usable transport, just camp on the - * primary, even if it is inactive. - */ - if (!first) { - first = asoc->peer.primary_path; - second = asoc->peer.primary_path; - } - - /* Set the active and retran transports. */ - asoc->peer.active_path = first; - asoc->peer.retran_path = second; + sctp_select_active_and_retran_path(asoc); } /* Hold a reference to an association. */ @@ -1090,7 +1036,7 @@ static void sctp_assoc_bh_rcv(struct work_struct *work) } if (chunk->transport) - chunk->transport->last_time_heard = jiffies; + chunk->transport->last_time_heard = ktime_get(); /* Run through the state machine. */ error = sctp_do_sm(net, SCTP_EVENT_T_CHUNK, subtype, @@ -1151,6 +1097,7 @@ void sctp_assoc_update(struct sctp_association *asoc, asoc->c = new->c; asoc->peer.rwnd = new->peer.rwnd; asoc->peer.sack_needed = new->peer.sack_needed; + asoc->peer.auth_capable = new->peer.auth_capable; asoc->peer.i = new->peer.i; sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL, asoc->peer.i.initial_tsn, GFP_ATOMIC); @@ -1278,13 +1225,41 @@ static u8 sctp_trans_score(const struct sctp_transport *trans) return sctp_trans_state_to_prio_map[trans->state]; } +static struct sctp_transport *sctp_trans_elect_tie(struct sctp_transport *trans1, + struct sctp_transport *trans2) +{ + if (trans1->error_count > trans2->error_count) { + return trans2; + } else if (trans1->error_count == trans2->error_count && + ktime_after(trans2->last_time_heard, + trans1->last_time_heard)) { + return trans2; + } else { + return trans1; + } +} + static struct sctp_transport *sctp_trans_elect_best(struct sctp_transport *curr, struct sctp_transport *best) { + u8 score_curr, score_best; + if (best == NULL) return curr; - return sctp_trans_score(curr) > sctp_trans_score(best) ? curr : best; + score_curr = sctp_trans_score(curr); + score_best = sctp_trans_score(best); + + /* First, try a score-based selection if both transport states + * differ. If we're in a tie, lets try to make a more clever + * decision here based on error counts and last time heard. + */ + if (score_curr > score_best) + return curr; + else if (score_curr == score_best) + return sctp_trans_elect_tie(curr, best); + else + return best; } void sctp_assoc_update_retran_path(struct sctp_association *asoc) @@ -1325,6 +1300,76 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc) __func__, asoc, &asoc->peer.retran_path->ipaddr.sa); } +static void sctp_select_active_and_retran_path(struct sctp_association *asoc) +{ + struct sctp_transport *trans, *trans_pri = NULL, *trans_sec = NULL; + struct sctp_transport *trans_pf = NULL; + + /* Look for the two most recently used active transports. */ + list_for_each_entry(trans, &asoc->peer.transport_addr_list, + transports) { + /* Skip uninteresting transports. */ + if (trans->state == SCTP_INACTIVE || + trans->state == SCTP_UNCONFIRMED) + continue; + /* Keep track of the best PF transport from our + * list in case we don't find an active one. + */ + if (trans->state == SCTP_PF) { + trans_pf = sctp_trans_elect_best(trans, trans_pf); + continue; + } + /* For active transports, pick the most recent ones. */ + if (trans_pri == NULL || + ktime_after(trans->last_time_heard, + trans_pri->last_time_heard)) { + trans_sec = trans_pri; + trans_pri = trans; + } else if (trans_sec == NULL || + ktime_after(trans->last_time_heard, + trans_sec->last_time_heard)) { + trans_sec = trans; + } + } + + /* RFC 2960 6.4 Multi-Homed SCTP Endpoints + * + * By default, an endpoint should always transmit to the primary + * path, unless the SCTP user explicitly specifies the + * destination transport address (and possibly source transport + * address) to use. [If the primary is active but not most recent, + * bump the most recently used transport.] + */ + if ((asoc->peer.primary_path->state == SCTP_ACTIVE || + asoc->peer.primary_path->state == SCTP_UNKNOWN) && + asoc->peer.primary_path != trans_pri) { + trans_sec = trans_pri; + trans_pri = asoc->peer.primary_path; + } + + /* We did not find anything useful for a possible retransmission + * path; either primary path that we found is the the same as + * the current one, or we didn't generally find an active one. + */ + if (trans_sec == NULL) + trans_sec = trans_pri; + + /* If we failed to find a usable transport, just camp on the + * primary or retran, even if they are inactive, if possible + * pick a PF iff it's the better choice. + */ + if (trans_pri == NULL) { + trans_pri = sctp_trans_elect_best(asoc->peer.primary_path, + asoc->peer.retran_path); + trans_pri = sctp_trans_elect_best(trans_pri, trans_pf); + trans_sec = asoc->peer.primary_path; + } + + /* Set the active and retran transports. */ + asoc->peer.active_path = trans_pri; + asoc->peer.retran_path = trans_sec; +} + struct sctp_transport * sctp_assoc_choose_alter_transport(struct sctp_association *asoc, struct sctp_transport *last_sent_to) @@ -1547,7 +1592,7 @@ int sctp_assoc_lookup_laddr(struct sctp_association *asoc, /* Set an association id for a given association */ int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp) { - bool preload = gfp & __GFP_WAIT; + bool preload = !!(gfp & __GFP_WAIT); int ret; /* If the id is already assigned, keep it. */ diff --git a/net/sctp/command.c b/net/sctp/command.c deleted file mode 100644 index dd7375851618..000000000000 --- a/net/sctp/command.c +++ /dev/null @@ -1,68 +0,0 @@ -/* SCTP kernel implementation Copyright (C) 1999-2001 - * Cisco, Motorola, and IBM - * Copyright 2001 La Monte H.P. Yarroll - * - * This file is part of the SCTP kernel implementation - * - * These functions manipulate sctp command sequences. - * - * This SCTP implementation is free software; - * you can redistribute it and/or modify it under the terms of - * the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This SCTP implementation is distributed in the hope that it - * will be useful, but WITHOUT ANY WARRANTY; without even the implied - * ************************ - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, see - * <http://www.gnu.org/licenses/>. - * - * Please send any bug reports or fixes you make to the - * email address(es): - * lksctp developers <linux-sctp@vger.kernel.org> - * - * Written or modified by: - * La Monte H.P. Yarroll <piggy@acm.org> - * Karl Knutson <karl@athena.chicago.il.us> - */ - -#include <linux/types.h> -#include <net/sctp/sctp.h> -#include <net/sctp/sm.h> - -/* Initialize a block of memory as a command sequence. */ -int sctp_init_cmd_seq(sctp_cmd_seq_t *seq) -{ - memset(seq, 0, sizeof(sctp_cmd_seq_t)); - return 1; /* We always succeed. */ -} - -/* Add a command to a sctp_cmd_seq_t. - * Return 0 if the command sequence is full. - */ -void sctp_add_cmd_sf(sctp_cmd_seq_t *seq, sctp_verb_t verb, sctp_arg_t obj) -{ - BUG_ON(seq->next_free_slot >= SCTP_MAX_NUM_COMMANDS); - - seq->cmds[seq->next_free_slot].verb = verb; - seq->cmds[seq->next_free_slot++].obj = obj; -} - -/* Return the next command structure in a sctp_cmd_seq. - * Returns NULL at the end of the sequence. - */ -sctp_cmd_t *sctp_next_cmd(sctp_cmd_seq_t *seq) -{ - sctp_cmd_t *retval = NULL; - - if (seq->next_cmd < seq->next_free_slot) - retval = &seq->cmds[seq->next_cmd++]; - - return retval; -} - diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 3d9f429858dc..9da76ba4d10f 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -481,7 +481,7 @@ normal: } if (chunk->transport) - chunk->transport->last_time_heard = jiffies; + chunk->transport->last_time_heard = ktime_get(); error = sctp_do_sm(net, SCTP_EVENT_T_CHUNK, subtype, state, ep, asoc, chunk, GFP_ATOMIC); diff --git a/net/sctp/input.c b/net/sctp/input.c index f2e2cbd2d750..c1b991294516 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -575,11 +575,6 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) int err; struct net *net = dev_net(skb->dev); - if (skb->len < ihlen + 8) { - ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); - return; - } - /* Fix up skb to look at the embedded net header. */ saveip = skb->network_header; savesctp = skb->transport_header; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 2b1738ef9394..0e4198ee2370 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -216,7 +216,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) IP6_ECN_flow_xmit(sk, fl6->flowlabel); if (!(transport->param_flags & SPP_PMTUD_ENABLE)) - skb->local_df = 1; + skb->ignore_df = 1; SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS); @@ -434,7 +434,7 @@ static void sctp_v6_from_sk(union sctp_addr *addr, struct sock *sk) /* Initialize sk->sk_rcv_saddr from sctp_addr. */ static void sctp_v6_to_sk_saddr(union sctp_addr *addr, struct sock *sk) { - if (addr->sa.sa_family == AF_INET && sctp_sk(sk)->v4mapped) { + if (addr->sa.sa_family == AF_INET) { sk->sk_v6_rcv_saddr.s6_addr32[0] = 0; sk->sk_v6_rcv_saddr.s6_addr32[1] = 0; sk->sk_v6_rcv_saddr.s6_addr32[2] = htonl(0x0000ffff); @@ -448,7 +448,7 @@ static void sctp_v6_to_sk_saddr(union sctp_addr *addr, struct sock *sk) /* Initialize sk->sk_daddr from sctp_addr. */ static void sctp_v6_to_sk_daddr(union sctp_addr *addr, struct sock *sk) { - if (addr->sa.sa_family == AF_INET && sctp_sk(sk)->v4mapped) { + if (addr->sa.sa_family == AF_INET) { sk->sk_v6_daddr.s6_addr32[0] = 0; sk->sk_v6_daddr.s6_addr32[1] = 0; sk->sk_v6_daddr.s6_addr32[2] = htonl(0x0000ffff); @@ -556,8 +556,6 @@ static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp) if (IPV6_ADDR_ANY == type) return 1; if (type == IPV6_ADDR_MAPPED) { - if (sp && !sp->v4mapped) - return 0; if (sp && ipv6_only_sock(sctp_opt2sk(sp))) return 0; sctp_v6_map_v4(addr); @@ -587,8 +585,6 @@ static int sctp_v6_addr_valid(union sctp_addr *addr, /* Note: This routine is used in input, so v4-mapped-v6 * are disallowed here when there is no sctp_sock. */ - if (!sp || !sp->v4mapped) - return 0; if (sp && ipv6_only_sock(sctp_opt2sk(sp))) return 0; sctp_v6_map_v4(addr); @@ -675,11 +671,23 @@ out: return newsk; } -/* Map v4 address to mapped v6 address */ -static void sctp_v6_addr_v4map(struct sctp_sock *sp, union sctp_addr *addr) +/* Format a sockaddr for return to user space. This makes sure the return is + * AF_INET or AF_INET6 depending on the SCTP_I_WANT_MAPPED_V4_ADDR option. + */ +static int sctp_v6_addr_to_user(struct sctp_sock *sp, union sctp_addr *addr) { - if (sp->v4mapped && AF_INET == addr->sa.sa_family) - sctp_v4_map_v6(addr); + if (sp->v4mapped) { + if (addr->sa.sa_family == AF_INET) + sctp_v4_map_v6(addr); + } else { + if (addr->sa.sa_family == AF_INET6 && + ipv6_addr_v4mapped(&addr->v6.sin6_addr)) + sctp_v6_map_v4(addr); + } + + if (addr->sa.sa_family == AF_INET) + return sizeof(struct sockaddr_in); + return sizeof(struct sockaddr_in6); } /* Where did this skb come from? */ @@ -706,82 +714,68 @@ static void sctp_v6_ecn_capable(struct sock *sk) inet6_sk(sk)->tclass |= INET_ECN_ECT_0; } -/* Initialize a PF_INET6 socket msg_name. */ -static void sctp_inet6_msgname(char *msgname, int *addr_len) -{ - struct sockaddr_in6 *sin6; - - sin6 = (struct sockaddr_in6 *)msgname; - sin6->sin6_family = AF_INET6; - sin6->sin6_flowinfo = 0; - sin6->sin6_scope_id = 0; /*FIXME */ - *addr_len = sizeof(struct sockaddr_in6); -} - /* Initialize a PF_INET msgname from a ulpevent. */ static void sctp_inet6_event_msgname(struct sctp_ulpevent *event, char *msgname, int *addrlen) { - struct sockaddr_in6 *sin6, *sin6from; - - if (msgname) { - union sctp_addr *addr; - struct sctp_association *asoc; - - asoc = event->asoc; - sctp_inet6_msgname(msgname, addrlen); - sin6 = (struct sockaddr_in6 *)msgname; - sin6->sin6_port = htons(asoc->peer.port); - addr = &asoc->peer.primary_addr; + union sctp_addr *addr; + struct sctp_association *asoc; + union sctp_addr *paddr; - /* Note: If we go to a common v6 format, this code - * will change. - */ + if (!msgname) + return; - /* Map ipv4 address into v4-mapped-on-v6 address. */ - if (sctp_sk(asoc->base.sk)->v4mapped && - AF_INET == addr->sa.sa_family) { - sctp_v4_map_v6((union sctp_addr *)sin6); - sin6->sin6_addr.s6_addr32[3] = - addr->v4.sin_addr.s_addr; - return; - } + addr = (union sctp_addr *)msgname; + asoc = event->asoc; + paddr = &asoc->peer.primary_addr; - sin6from = &asoc->peer.primary_addr.v6; - sin6->sin6_addr = sin6from->sin6_addr; - if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) - sin6->sin6_scope_id = sin6from->sin6_scope_id; + if (paddr->sa.sa_family == AF_INET) { + addr->v4.sin_family = AF_INET; + addr->v4.sin_port = htons(asoc->peer.port); + addr->v4.sin_addr = paddr->v4.sin_addr; + } else { + addr->v6.sin6_family = AF_INET6; + addr->v6.sin6_flowinfo = 0; + if (ipv6_addr_type(&paddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) + addr->v6.sin6_scope_id = paddr->v6.sin6_scope_id; + else + addr->v6.sin6_scope_id = 0; + addr->v6.sin6_port = htons(asoc->peer.port); + addr->v6.sin6_addr = paddr->v6.sin6_addr; } + + *addrlen = sctp_v6_addr_to_user(sctp_sk(asoc->base.sk), addr); } /* Initialize a msg_name from an inbound skb. */ static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname, int *addr_len) { + union sctp_addr *addr; struct sctphdr *sh; - struct sockaddr_in6 *sin6; - - if (msgname) { - sctp_inet6_msgname(msgname, addr_len); - sin6 = (struct sockaddr_in6 *)msgname; - sh = sctp_hdr(skb); - sin6->sin6_port = sh->source; - - /* Map ipv4 address into v4-mapped-on-v6 address. */ - if (sctp_sk(skb->sk)->v4mapped && - ip_hdr(skb)->version == 4) { - sctp_v4_map_v6((union sctp_addr *)sin6); - sin6->sin6_addr.s6_addr32[3] = ip_hdr(skb)->saddr; - return; - } - /* Otherwise, just copy the v6 address. */ - sin6->sin6_addr = ipv6_hdr(skb)->saddr; - if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) { + if (!msgname) + return; + + addr = (union sctp_addr *)msgname; + sh = sctp_hdr(skb); + + if (ip_hdr(skb)->version == 4) { + addr->v4.sin_family = AF_INET; + addr->v4.sin_port = sh->source; + addr->v4.sin_addr.s_addr = ip_hdr(skb)->saddr; + } else { + addr->v6.sin6_family = AF_INET6; + addr->v6.sin6_flowinfo = 0; + addr->v6.sin6_port = sh->source; + addr->v6.sin6_addr = ipv6_hdr(skb)->saddr; + if (ipv6_addr_type(&addr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) { struct sctp_ulpevent *ev = sctp_skb2event(skb); - sin6->sin6_scope_id = ev->iif; + addr->v6.sin6_scope_id = ev->iif; } } + + *addr_len = sctp_v6_addr_to_user(sctp_sk(skb->sk), addr); } /* Do we support this AF? */ @@ -857,9 +851,6 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr) return 0; } rcu_read_unlock(); - } else if (type == IPV6_ADDR_MAPPED) { - if (!opt->v4mapped) - return 0; } af = opt->pf->af; @@ -914,6 +905,23 @@ static int sctp_inet6_supported_addrs(const struct sctp_sock *opt, return 1; } +/* Handle SCTP_I_WANT_MAPPED_V4_ADDR for getpeername() and getsockname() */ +static int sctp_getname(struct socket *sock, struct sockaddr *uaddr, + int *uaddr_len, int peer) +{ + int rc; + + rc = inet6_getname(sock, uaddr, uaddr_len, peer); + + if (rc != 0) + return rc; + + *uaddr_len = sctp_v6_addr_to_user(sctp_sk(sock->sk), + (union sctp_addr *)uaddr); + + return rc; +} + static const struct proto_ops inet6_seqpacket_ops = { .family = PF_INET6, .owner = THIS_MODULE, @@ -922,7 +930,7 @@ static const struct proto_ops inet6_seqpacket_ops = { .connect = inet_dgram_connect, .socketpair = sock_no_socketpair, .accept = inet_accept, - .getname = inet6_getname, + .getname = sctp_getname, .poll = sctp_poll, .ioctl = inet6_ioctl, .listen = sctp_inet_listen, @@ -943,7 +951,6 @@ static struct inet_protosw sctpv6_seqpacket_protosw = { .protocol = IPPROTO_SCTP, .prot = &sctpv6_prot, .ops = &inet6_seqpacket_ops, - .no_check = 0, .flags = SCTP_PROTOSW_FLAG }; static struct inet_protosw sctpv6_stream_protosw = { @@ -951,7 +958,6 @@ static struct inet_protosw sctpv6_stream_protosw = { .protocol = IPPROTO_SCTP, .prot = &sctpv6_prot, .ops = &inet6_seqpacket_ops, - .no_check = 0, .flags = SCTP_PROTOSW_FLAG, }; @@ -976,8 +982,6 @@ static struct sctp_af sctp_af_inet6 = { .copy_addrlist = sctp_v6_copy_addrlist, .from_skb = sctp_v6_from_skb, .from_sk = sctp_v6_from_sk, - .to_sk_saddr = sctp_v6_to_sk_saddr, - .to_sk_daddr = sctp_v6_to_sk_daddr, .from_addr_param = sctp_v6_from_addr_param, .to_addr_param = sctp_v6_to_addr_param, .cmp_addr = sctp_v6_cmp_addr, @@ -1007,7 +1011,9 @@ static struct sctp_pf sctp_pf_inet6 = { .send_verify = sctp_inet6_send_verify, .supported_addrs = sctp_inet6_supported_addrs, .create_accept_sk = sctp_v6_create_accept_sk, - .addr_v4map = sctp_v6_addr_v4map, + .addr_to_user = sctp_v6_addr_to_user, + .to_sk_saddr = sctp_v6_to_sk_saddr, + .to_sk_daddr = sctp_v6_to_sk_daddr, .af = &sctp_af_inet6, }; diff --git a/net/sctp/output.c b/net/sctp/output.c index 0f4d15fc2627..42dffd428389 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -178,7 +178,7 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet, case SCTP_XMIT_RWND_FULL: case SCTP_XMIT_OK: - case SCTP_XMIT_NAGLE_DELAY: + case SCTP_XMIT_DELAY: break; } @@ -591,7 +591,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) pr_debug("***sctp_transmit_packet*** skb->len:%d\n", nskb->len); - nskb->local_df = packet->ipfragok; + nskb->ignore_df = packet->ipfragok; tp->af_specific->sctp_xmit(nskb, tp); out: @@ -599,7 +599,7 @@ out: return err; no_route: kfree_skb(nskb); - IP_INC_STATS_BH(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES); /* FIXME: Returning the 'err' will effect all the associations * associated with a socket, although only one of the paths of the @@ -633,7 +633,6 @@ nomem: static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, struct sctp_chunk *chunk) { - sctp_xmit_t retval = SCTP_XMIT_OK; size_t datasize, rwnd, inflight, flight_size; struct sctp_transport *transport = packet->transport; struct sctp_association *asoc = transport->asoc; @@ -658,15 +657,11 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, datasize = sctp_data_size(chunk); - if (datasize > rwnd) { - if (inflight > 0) { - /* We have (at least) one data chunk in flight, - * so we can't fall back to rule 6.1 B). - */ - retval = SCTP_XMIT_RWND_FULL; - goto finish; - } - } + if (datasize > rwnd && inflight > 0) + /* We have (at least) one data chunk in flight, + * so we can't fall back to rule 6.1 B). + */ + return SCTP_XMIT_RWND_FULL; /* RFC 2960 6.1 Transmission of DATA Chunks * @@ -680,36 +675,44 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, * When a Fast Retransmit is being performed the sender SHOULD * ignore the value of cwnd and SHOULD NOT delay retransmission. */ - if (chunk->fast_retransmit != SCTP_NEED_FRTX) - if (flight_size >= transport->cwnd) { - retval = SCTP_XMIT_RWND_FULL; - goto finish; - } + if (chunk->fast_retransmit != SCTP_NEED_FRTX && + flight_size >= transport->cwnd) + return SCTP_XMIT_RWND_FULL; /* Nagle's algorithm to solve small-packet problem: * Inhibit the sending of new chunks when new outgoing data arrives * if any previously transmitted data on the connection remains * unacknowledged. */ - if (!sctp_sk(asoc->base.sk)->nodelay && sctp_packet_empty(packet) && - inflight && sctp_state(asoc, ESTABLISHED)) { - unsigned int max = transport->pathmtu - packet->overhead; - unsigned int len = chunk->skb->len + q->out_qlen; - - /* Check whether this chunk and all the rest of pending - * data will fit or delay in hopes of bundling a full - * sized packet. - * Don't delay large message writes that may have been - * fragmeneted into small peices. - */ - if ((len < max) && chunk->msg->can_delay) { - retval = SCTP_XMIT_NAGLE_DELAY; - goto finish; - } - } -finish: - return retval; + if (sctp_sk(asoc->base.sk)->nodelay) + /* Nagle disabled */ + return SCTP_XMIT_OK; + + if (!sctp_packet_empty(packet)) + /* Append to packet */ + return SCTP_XMIT_OK; + + if (inflight == 0) + /* Nothing unacked */ + return SCTP_XMIT_OK; + + if (!sctp_state(asoc, ESTABLISHED)) + return SCTP_XMIT_OK; + + /* Check whether this chunk and all the rest of pending data will fit + * or delay in hopes of bundling a full sized packet. + */ + if (chunk->skb->len + q->out_qlen >= transport->pathmtu - packet->overhead) + /* Enough data queued to fill a packet */ + return SCTP_XMIT_OK; + + /* Don't delay large message writes that may have been fragmented */ + if (!chunk->msg->can_delay) + return SCTP_XMIT_OK; + + /* Defer until all data acked or packet full */ + return SCTP_XMIT_DELAY; } /* This private function does management things when adding DATA chunk */ diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 9c77947c0597..7e8f0a117106 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -629,7 +629,7 @@ redo: done = 1; break; - case SCTP_XMIT_NAGLE_DELAY: + case SCTP_XMIT_DELAY: /* Send this packet. */ error = sctp_packet_transmit(pkt); @@ -1015,7 +1015,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) switch (status) { case SCTP_XMIT_PMTU_FULL: case SCTP_XMIT_RWND_FULL: - case SCTP_XMIT_NAGLE_DELAY: + case SCTP_XMIT_DELAY: /* We could not append this chunk, so put * the chunk back on the output queue. */ @@ -1025,7 +1025,6 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) sctp_outq_head_data(q, chunk); goto sctp_flush_out; - break; case SCTP_XMIT_OK: /* The sender is in the SHUTDOWN-PENDING state, diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 0947f1e15eb8..34229ee7f379 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -78,7 +78,7 @@ static int sctp_snmp_seq_show(struct seq_file *seq, void *v) for (i = 0; sctp_snmp_list[i].name != NULL; i++) seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name, - snmp_fold_field((void __percpu **)net->sctp.sctp_statistics, + snmp_fold_field(net->sctp.sctp_statistics, sctp_snmp_list[i].entry)); return 0; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index c09757fbf803..6240834f4b95 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -491,8 +491,13 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, continue; if ((laddr->state == SCTP_ADDR_SRC) && (AF_INET == laddr->a.sa.sa_family)) { - fl4->saddr = laddr->a.v4.sin_addr.s_addr; fl4->fl4_sport = laddr->a.v4.sin_port; + flowi4_update_output(fl4, + asoc->base.sk->sk_bound_dev_if, + RT_CONN_FLAGS(asoc->base.sk), + daddr->v4.sin_addr.s_addr, + laddr->a.v4.sin_addr.s_addr); + rt = ip_route_output_key(sock_net(sk), fl4); if (!IS_ERR(rt)) { dst = &rt->dst; @@ -571,10 +576,10 @@ out: return newsk; } -/* Map address, empty for v4 family */ -static void sctp_v4_addr_v4map(struct sctp_sock *sp, union sctp_addr *addr) +static int sctp_v4_addr_to_user(struct sctp_sock *sp, union sctp_addr *addr) { - /* Empty */ + /* No address mapping for V4 sockets */ + return sizeof(struct sockaddr_in); } /* Dump the v4 addr to the seq file. */ @@ -971,7 +976,9 @@ static struct sctp_pf sctp_pf_inet = { .send_verify = sctp_inet_send_verify, .supported_addrs = sctp_inet_supported_addrs, .create_accept_sk = sctp_v4_create_accept_sk, - .addr_v4map = sctp_v4_addr_v4map, + .addr_to_user = sctp_v4_addr_to_user, + .to_sk_saddr = sctp_v4_to_sk_saddr, + .to_sk_daddr = sctp_v4_to_sk_daddr, .af = &sctp_af_inet }; @@ -1012,7 +1019,6 @@ static struct inet_protosw sctp_seqpacket_protosw = { .protocol = IPPROTO_SCTP, .prot = &sctp_prot, .ops = &inet_seqpacket_ops, - .no_check = 0, .flags = SCTP_PROTOSW_FLAG }; static struct inet_protosw sctp_stream_protosw = { @@ -1020,7 +1026,6 @@ static struct inet_protosw sctp_stream_protosw = { .protocol = IPPROTO_SCTP, .prot = &sctp_prot, .ops = &inet_seqpacket_ops, - .no_check = 0, .flags = SCTP_PROTOSW_FLAG }; @@ -1044,8 +1049,6 @@ static struct sctp_af sctp_af_inet = { .copy_addrlist = sctp_v4_copy_addrlist, .from_skb = sctp_v4_from_skb, .from_sk = sctp_v4_from_sk, - .to_sk_saddr = sctp_v4_to_sk_saddr, - .to_sk_daddr = sctp_v4_to_sk_daddr, .from_addr_param = sctp_v4_from_addr_param, .to_addr_param = sctp_v4_to_addr_param, .cmp_addr = sctp_v4_cmp_addr, @@ -1100,14 +1103,15 @@ int sctp_register_pf(struct sctp_pf *pf, sa_family_t family) static inline int init_sctp_mibs(struct net *net) { - return snmp_mib_init((void __percpu **)net->sctp.sctp_statistics, - sizeof(struct sctp_mib), - __alignof__(struct sctp_mib)); + net->sctp.sctp_statistics = alloc_percpu(struct sctp_mib); + if (!net->sctp.sctp_statistics) + return -ENOMEM; + return 0; } static inline void cleanup_sctp_mibs(struct net *net) { - snmp_mib_free((void __percpu **)net->sctp.sctp_statistics); + free_percpu(net->sctp.sctp_statistics); } static void sctp_v4_pf_init(void) diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index fee5552ddf92..ae0e616a7ca5 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1782,7 +1782,7 @@ no_hmac: else kt = ktime_get(); - if (!asoc && ktime_compare(bear_cookie->expiration, kt) < 0) { + if (!asoc && ktime_before(bear_cookie->expiration, kt)) { /* * Section 3.3.10.3 Stale Cookie Error (3) * diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 5d6883ff00c3..fef2acdf4a2e 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -496,11 +496,10 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands, /* If the transport error count is greater than the pf_retrans * threshold, and less than pathmaxrtx, and if the current state - * is not SCTP_UNCONFIRMED, then mark this transport as Partially - * Failed, see SCTP Quick Failover Draft, section 5.1 + * is SCTP_ACTIVE, then mark this transport as Partially Failed, + * see SCTP Quick Failover Draft, section 5.1 */ - if ((transport->state != SCTP_PF) && - (transport->state != SCTP_UNCONFIRMED) && + if ((transport->state == SCTP_ACTIVE) && (asoc->pf_retrans < transport->pathmaxrxt) && (transport->error_count > asoc->pf_retrans)) { diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 5170a1ff95a1..d3f1ea460c50 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -4182,7 +4182,6 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net, case SCTP_CID_ACTION_DISCARD: /* Discard the packet. */ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); - break; case SCTP_CID_ACTION_DISCARD_ERR: /* Generate an ERROR chunk as response. */ hdr = unk_chunk->chunk_hdr; @@ -4198,11 +4197,9 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net, /* Discard the packet. */ sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); return SCTP_DISPOSITION_CONSUME; - break; case SCTP_CID_ACTION_SKIP: /* Skip the chunk. */ return SCTP_DISPOSITION_DISCARD; - break; case SCTP_CID_ACTION_SKIP_ERR: /* Generate an ERROR chunk as response. */ hdr = unk_chunk->chunk_hdr; @@ -4216,7 +4213,6 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net, } /* Skip the chunk. */ return SCTP_DISPOSITION_CONSUME; - break; default: break; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index fee06b99a4da..eb71d49e7653 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -71,6 +71,7 @@ #include <net/route.h> #include <net/ipv6.h> #include <net/inet_common.h> +#include <net/busy_poll.h> #include <linux/socket.h> /* for sa_family_t */ #include <linux/export.h> @@ -253,7 +254,7 @@ static struct sctp_transport *sctp_addr_id2transport(struct sock *sk, if (id_asoc && (id_asoc != addr_asoc)) return NULL; - sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk), + sctp_get_pf_specific(sk->sk_family)->addr_to_user(sctp_sk(sk), (union sctp_addr *)addr); return transport; @@ -395,7 +396,7 @@ static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) /* Copy back into socket for getsockname() use. */ if (!ret) { inet_sk(sk)->inet_sport = htons(inet_sk(sk)->inet_num); - af->to_sk_saddr(addr, sk); + sp->pf->to_sk_saddr(addr, sk); } return ret; @@ -1052,7 +1053,6 @@ static int __sctp_connect(struct sock *sk, struct sctp_association *asoc2; struct sctp_transport *transport; union sctp_addr to; - struct sctp_af *af; sctp_scope_t scope; long timeo; int err = 0; @@ -1080,6 +1080,8 @@ static int __sctp_connect(struct sock *sk, /* Walk through the addrs buffer and count the number of addresses. */ addr_buf = kaddrs; while (walk_size < addrs_size) { + struct sctp_af *af; + if (walk_size + sizeof(sa_family_t) > addrs_size) { err = -EINVAL; goto out_free; @@ -1204,8 +1206,7 @@ static int __sctp_connect(struct sock *sk, /* Initialize sk's dport and daddr for getpeername() */ inet_sk(sk)->inet_dport = htons(asoc->peer.port); - af = sctp_get_af_specific(sa_addr->sa.sa_family); - af->to_sk_daddr(sa_addr, sk); + sp->pf->to_sk_daddr(sa_addr, sk); sk->sk_err = 0; /* in-kernel sockets don't generally have a file allocated to them @@ -1601,12 +1602,13 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, struct sctp_initmsg *sinit; sctp_assoc_t associd = 0; sctp_cmsgs_t cmsgs = { NULL }; - int err; sctp_scope_t scope; - long timeo; - __u16 sinfo_flags = 0; + bool fill_sinfo_ttl = false; struct sctp_datamsg *datamsg; int msg_flags = msg->msg_flags; + __u16 sinfo_flags = 0; + long timeo; + int err; err = 0; sp = sctp_sk(sk); @@ -1647,10 +1649,21 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, msg_name = msg->msg_name; } - sinfo = cmsgs.info; sinit = cmsgs.init; + if (cmsgs.sinfo != NULL) { + memset(&default_sinfo, 0, sizeof(default_sinfo)); + default_sinfo.sinfo_stream = cmsgs.sinfo->snd_sid; + default_sinfo.sinfo_flags = cmsgs.sinfo->snd_flags; + default_sinfo.sinfo_ppid = cmsgs.sinfo->snd_ppid; + default_sinfo.sinfo_context = cmsgs.sinfo->snd_context; + default_sinfo.sinfo_assoc_id = cmsgs.sinfo->snd_assoc_id; - /* Did the user specify SNDRCVINFO? */ + sinfo = &default_sinfo; + fill_sinfo_ttl = true; + } else { + sinfo = cmsgs.srinfo; + } + /* Did the user specify SNDINFO/SNDRCVINFO? */ if (sinfo) { sinfo_flags = sinfo->sinfo_flags; associd = sinfo->sinfo_assoc_id; @@ -1857,8 +1870,8 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, pr_debug("%s: we have a valid association\n", __func__); if (!sinfo) { - /* If the user didn't specify SNDRCVINFO, make up one with - * some defaults. + /* If the user didn't specify SNDINFO/SNDRCVINFO, make up + * one with some defaults. */ memset(&default_sinfo, 0, sizeof(default_sinfo)); default_sinfo.sinfo_stream = asoc->default_stream; @@ -1867,7 +1880,13 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, default_sinfo.sinfo_context = asoc->default_context; default_sinfo.sinfo_timetolive = asoc->default_timetolive; default_sinfo.sinfo_assoc_id = sctp_assoc2id(asoc); + sinfo = &default_sinfo; + } else if (fill_sinfo_ttl) { + /* In case SNDINFO was specified, we still need to fill + * it with a default ttl from the assoc here. + */ + sinfo->sinfo_timetolive = asoc->default_timetolive; } /* API 7.1.7, the sndbuf size per association bounds the @@ -2041,8 +2060,6 @@ static int sctp_skb_pull(struct sk_buff *skb, int len) * flags - flags sent or received with the user message, see Section * 5 for complete description of the flags. */ -static struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); - static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len) @@ -2093,9 +2110,16 @@ static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk, sp->pf->skb_msgname(skb, msg->msg_name, addr_len); } + /* Check if we allow SCTP_NXTINFO. */ + if (sp->recvnxtinfo) + sctp_ulpevent_read_nxtinfo(event, msg, sk); + /* Check if we allow SCTP_RCVINFO. */ + if (sp->recvrcvinfo) + sctp_ulpevent_read_rcvinfo(event, msg); /* Check if we allow SCTP_SNDRCVINFO. */ if (sp->subscribe.sctp_data_io_event) sctp_ulpevent_read_sndrcvinfo(event, msg); + #if 0 /* FIXME: we should be calling IP/IPv6 layers. */ if (sk->sk_protinfo.af_inet.cmsg_flags) @@ -2181,8 +2205,13 @@ static int sctp_setsockopt_events(struct sock *sk, char __user *optval, if (copy_from_user(&sctp_sk(sk)->subscribe, optval, optlen)) return -EFAULT; - /* - * At the time when a user app subscribes to SCTP_SENDER_DRY_EVENT, + if (sctp_sk(sk)->subscribe.sctp_data_io_event) + pr_warn_ratelimited(DEPRECATED "%s (pid %d) " + "Requested SCTP_SNDRCVINFO event.\n" + "Use SCTP_RCVINFO through SCTP_RECVRCVINFO option instead.\n", + current->comm, task_pid_nr(current)); + + /* At the time when a user app subscribes to SCTP_SENDER_DRY_EVENT, * if there is no data to be sent or retransmit, the stack will * immediately send up this notification. */ @@ -2746,19 +2775,22 @@ static int sctp_setsockopt_default_send_param(struct sock *sk, char __user *optval, unsigned int optlen) { - struct sctp_sndrcvinfo info; - struct sctp_association *asoc; struct sctp_sock *sp = sctp_sk(sk); + struct sctp_association *asoc; + struct sctp_sndrcvinfo info; - if (optlen != sizeof(struct sctp_sndrcvinfo)) + if (optlen != sizeof(info)) return -EINVAL; if (copy_from_user(&info, optval, optlen)) return -EFAULT; + if (info.sinfo_flags & + ~(SCTP_UNORDERED | SCTP_ADDR_OVER | + SCTP_ABORT | SCTP_EOF)) + return -EINVAL; asoc = sctp_id2assoc(sk, info.sinfo_assoc_id); if (!asoc && info.sinfo_assoc_id && sctp_style(sk, UDP)) return -EINVAL; - if (asoc) { asoc->default_stream = info.sinfo_stream; asoc->default_flags = info.sinfo_flags; @@ -2776,6 +2808,44 @@ static int sctp_setsockopt_default_send_param(struct sock *sk, return 0; } +/* RFC6458, Section 8.1.31. Set/get Default Send Parameters + * (SCTP_DEFAULT_SNDINFO) + */ +static int sctp_setsockopt_default_sndinfo(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + struct sctp_sock *sp = sctp_sk(sk); + struct sctp_association *asoc; + struct sctp_sndinfo info; + + if (optlen != sizeof(info)) + return -EINVAL; + if (copy_from_user(&info, optval, optlen)) + return -EFAULT; + if (info.snd_flags & + ~(SCTP_UNORDERED | SCTP_ADDR_OVER | + SCTP_ABORT | SCTP_EOF)) + return -EINVAL; + + asoc = sctp_id2assoc(sk, info.snd_assoc_id); + if (!asoc && info.snd_assoc_id && sctp_style(sk, UDP)) + return -EINVAL; + if (asoc) { + asoc->default_stream = info.snd_sid; + asoc->default_flags = info.snd_flags; + asoc->default_ppid = info.snd_ppid; + asoc->default_context = info.snd_context; + } else { + sp->default_stream = info.snd_sid; + sp->default_flags = info.snd_flags; + sp->default_ppid = info.snd_ppid; + sp->default_context = info.snd_context; + } + + return 0; +} + /* 7.1.10 Set Primary Address (SCTP_PRIMARY_ADDR) * * Requests that the local SCTP stack use the enclosed peer address as @@ -3522,7 +3592,6 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval, return 0; } - /* * SCTP_PEER_ADDR_THLDS * @@ -3573,6 +3642,38 @@ static int sctp_setsockopt_paddr_thresholds(struct sock *sk, return 0; } +static int sctp_setsockopt_recvrcvinfo(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + int val; + + if (optlen < sizeof(int)) + return -EINVAL; + if (get_user(val, (int __user *) optval)) + return -EFAULT; + + sctp_sk(sk)->recvrcvinfo = (val == 0) ? 0 : 1; + + return 0; +} + +static int sctp_setsockopt_recvnxtinfo(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + int val; + + if (optlen < sizeof(int)) + return -EINVAL; + if (get_user(val, (int __user *) optval)) + return -EFAULT; + + sctp_sk(sk)->recvnxtinfo = (val == 0) ? 0 : 1; + + return 0; +} + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@ -3670,6 +3771,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_default_send_param(sk, optval, optlen); break; + case SCTP_DEFAULT_SNDINFO: + retval = sctp_setsockopt_default_sndinfo(sk, optval, optlen); + break; case SCTP_PRIMARY_ADDR: retval = sctp_setsockopt_primary_addr(sk, optval, optlen); break; @@ -3724,6 +3828,12 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_PEER_ADDR_THLDS: retval = sctp_setsockopt_paddr_thresholds(sk, optval, optlen); break; + case SCTP_RECVRCVINFO: + retval = sctp_setsockopt_recvrcvinfo(sk, optval, optlen); + break; + case SCTP_RECVNXTINFO: + retval = sctp_setsockopt_recvnxtinfo(sk, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -3970,6 +4080,9 @@ static int sctp_init_sock(struct sock *sk) /* Enable Nagle algorithm by default. */ sp->nodelay = 0; + sp->recvrcvinfo = 0; + sp->recvnxtinfo = 0; + /* Enable by default. */ sp->v4mapped = 1; @@ -4142,7 +4255,7 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len, memcpy(&status.sstat_primary.spinfo_address, &transport->ipaddr, transport->af_specific->sockaddr_len); /* Map ipv4 address into v4-mapped-on-v6 address. */ - sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk), + sctp_get_pf_specific(sk->sk_family)->addr_to_user(sctp_sk(sk), (union sctp_addr *)&status.sstat_primary.spinfo_address); status.sstat_primary.spinfo_state = transport->state; status.sstat_primary.spinfo_cwnd = transport->cwnd; @@ -4300,8 +4413,8 @@ static int sctp_getsockopt_autoclose(struct sock *sk, int len, char __user *optv int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp) { struct sctp_association *asoc = sctp_id2assoc(sk, id); + struct sctp_sock *sp = sctp_sk(sk); struct socket *sock; - struct sctp_af *af; int err = 0; if (!asoc) @@ -4323,8 +4436,7 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp) /* Make peeled-off sockets more like 1-1 accepted sockets. * Set the daddr and initialize id to something more random */ - af = sctp_get_af_specific(asoc->peer.primary_addr.sa.sa_family); - af->to_sk_daddr(&asoc->peer.primary_addr, sk); + sp->pf->to_sk_daddr(&asoc->peer.primary_addr, sk); /* Populate the fields of the newsk from the oldsk and migrate the * asoc to the newsk. @@ -4708,8 +4820,8 @@ static int sctp_getsockopt_peer_addrs(struct sock *sk, int len, list_for_each_entry(from, &asoc->peer.transport_addr_list, transports) { memcpy(&temp, &from->ipaddr, sizeof(temp)); - sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp); - addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; + addrlen = sctp_get_pf_specific(sk->sk_family) + ->addr_to_user(sp, &temp); if (space_left < addrlen) return -ENOMEM; if (copy_to_user(to, &temp, addrlen)) @@ -4753,9 +4865,9 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to, if (!temp.v4.sin_port) temp.v4.sin_port = htons(port); - sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk), - &temp); - addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; + addrlen = sctp_get_pf_specific(sk->sk_family) + ->addr_to_user(sctp_sk(sk), &temp); + if (space_left < addrlen) { cnt = -ENOMEM; break; @@ -4843,8 +4955,8 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, */ list_for_each_entry(addr, &bp->address_list, list) { memcpy(&temp, &addr->a, sizeof(temp)); - sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp); - addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; + addrlen = sctp_get_pf_specific(sk->sk_family) + ->addr_to_user(sp, &temp); if (space_left < addrlen) { err = -ENOMEM; /*fixme: right error?*/ goto out; @@ -4903,7 +5015,7 @@ static int sctp_getsockopt_primary_addr(struct sock *sk, int len, memcpy(&prim.ssp_addr, &asoc->peer.primary_path->ipaddr, asoc->peer.primary_path->af_specific->sockaddr_len); - sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, + sctp_get_pf_specific(sk->sk_family)->addr_to_user(sp, (union sctp_addr *)&prim.ssp_addr); if (put_user(len, optlen)) @@ -4963,14 +5075,14 @@ static int sctp_getsockopt_default_send_param(struct sock *sk, int len, char __user *optval, int __user *optlen) { - struct sctp_sndrcvinfo info; - struct sctp_association *asoc; struct sctp_sock *sp = sctp_sk(sk); + struct sctp_association *asoc; + struct sctp_sndrcvinfo info; - if (len < sizeof(struct sctp_sndrcvinfo)) + if (len < sizeof(info)) return -EINVAL; - len = sizeof(struct sctp_sndrcvinfo); + len = sizeof(info); if (copy_from_user(&info, optval, len)) return -EFAULT; @@ -4978,7 +5090,6 @@ static int sctp_getsockopt_default_send_param(struct sock *sk, asoc = sctp_id2assoc(sk, info.sinfo_assoc_id); if (!asoc && info.sinfo_assoc_id && sctp_style(sk, UDP)) return -EINVAL; - if (asoc) { info.sinfo_stream = asoc->default_stream; info.sinfo_flags = asoc->default_flags; @@ -5001,6 +5112,48 @@ static int sctp_getsockopt_default_send_param(struct sock *sk, return 0; } +/* RFC6458, Section 8.1.31. Set/get Default Send Parameters + * (SCTP_DEFAULT_SNDINFO) + */ +static int sctp_getsockopt_default_sndinfo(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + struct sctp_sock *sp = sctp_sk(sk); + struct sctp_association *asoc; + struct sctp_sndinfo info; + + if (len < sizeof(info)) + return -EINVAL; + + len = sizeof(info); + + if (copy_from_user(&info, optval, len)) + return -EFAULT; + + asoc = sctp_id2assoc(sk, info.snd_assoc_id); + if (!asoc && info.snd_assoc_id && sctp_style(sk, UDP)) + return -EINVAL; + if (asoc) { + info.snd_sid = asoc->default_stream; + info.snd_flags = asoc->default_flags; + info.snd_ppid = asoc->default_ppid; + info.snd_context = asoc->default_context; + } else { + info.snd_sid = sp->default_stream; + info.snd_flags = sp->default_flags; + info.snd_ppid = sp->default_ppid; + info.snd_context = sp->default_context; + } + + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &info, len)) + return -EFAULT; + + return 0; +} + /* * * 7.1.5 SCTP_NODELAY @@ -5751,6 +5904,46 @@ static int sctp_getsockopt_assoc_stats(struct sock *sk, int len, return 0; } +static int sctp_getsockopt_recvrcvinfo(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + int val = 0; + + if (len < sizeof(int)) + return -EINVAL; + + len = sizeof(int); + if (sctp_sk(sk)->recvrcvinfo) + val = 1; + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &val, len)) + return -EFAULT; + + return 0; +} + +static int sctp_getsockopt_recvnxtinfo(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + int val = 0; + + if (len < sizeof(int)) + return -EINVAL; + + len = sizeof(int); + if (sctp_sk(sk)->recvnxtinfo) + val = 1; + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &val, len)) + return -EFAULT; + + return 0; +} + static int sctp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -5820,6 +6013,10 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, retval = sctp_getsockopt_default_send_param(sk, len, optval, optlen); break; + case SCTP_DEFAULT_SNDINFO: + retval = sctp_getsockopt_default_sndinfo(sk, len, + optval, optlen); + break; case SCTP_PRIMARY_ADDR: retval = sctp_getsockopt_primary_addr(sk, len, optval, optlen); break; @@ -5894,6 +6091,12 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, case SCTP_GET_ASSOC_STATS: retval = sctp_getsockopt_assoc_stats(sk, len, optval, optlen); break; + case SCTP_RECVRCVINFO: + retval = sctp_getsockopt_recvrcvinfo(sk, len, optval, optlen); + break; + case SCTP_RECVNXTINFO: + retval = sctp_getsockopt_recvnxtinfo(sk, len, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -5945,8 +6148,9 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) /* Search for an available port. */ int low, high, remaining, index; unsigned int rover; + struct net *net = sock_net(sk); - inet_get_local_port_range(sock_net(sk), &low, &high); + inet_get_local_port_range(net, &low, &high); remaining = (high - low) + 1; rover = prandom_u32() % remaining + low; @@ -5954,7 +6158,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) rover++; if ((rover < low) || (rover > high)) rover = low; - if (inet_is_reserved_local_port(rover)) + if (inet_is_local_reserved_port(net, rover)) continue; index = sctp_phashfn(sock_net(sk), rover); head = &sctp_port_hashtable[index]; @@ -6388,8 +6592,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs) struct cmsghdr *cmsg; struct msghdr *my_msg = (struct msghdr *)msg; - for (cmsg = CMSG_FIRSTHDR(msg); - cmsg != NULL; + for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; cmsg = CMSG_NXTHDR(my_msg, cmsg)) { if (!CMSG_OK(my_msg, cmsg)) return -EINVAL; @@ -6402,7 +6605,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs) switch (cmsg->cmsg_type) { case SCTP_INIT: /* SCTP Socket API Extension - * 5.2.1 SCTP Initiation Structure (SCTP_INIT) + * 5.3.1 SCTP Initiation Structure (SCTP_INIT) * * This cmsghdr structure provides information for * initializing new SCTP associations with sendmsg(). @@ -6414,15 +6617,15 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs) * ------------ ------------ ---------------------- * IPPROTO_SCTP SCTP_INIT struct sctp_initmsg */ - if (cmsg->cmsg_len != - CMSG_LEN(sizeof(struct sctp_initmsg))) + if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_initmsg))) return -EINVAL; - cmsgs->init = (struct sctp_initmsg *)CMSG_DATA(cmsg); + + cmsgs->init = CMSG_DATA(cmsg); break; case SCTP_SNDRCV: /* SCTP Socket API Extension - * 5.2.2 SCTP Header Information Structure(SCTP_SNDRCV) + * 5.3.2 SCTP Header Information Structure(SCTP_SNDRCV) * * This cmsghdr structure specifies SCTP options for * sendmsg() and describes SCTP header information @@ -6432,24 +6635,44 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs) * ------------ ------------ ---------------------- * IPPROTO_SCTP SCTP_SNDRCV struct sctp_sndrcvinfo */ - if (cmsg->cmsg_len != - CMSG_LEN(sizeof(struct sctp_sndrcvinfo))) + if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_sndrcvinfo))) return -EINVAL; - cmsgs->info = - (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg); + cmsgs->srinfo = CMSG_DATA(cmsg); - /* Minimally, validate the sinfo_flags. */ - if (cmsgs->info->sinfo_flags & + if (cmsgs->srinfo->sinfo_flags & ~(SCTP_UNORDERED | SCTP_ADDR_OVER | SCTP_ABORT | SCTP_EOF)) return -EINVAL; break; + case SCTP_SNDINFO: + /* SCTP Socket API Extension + * 5.3.4 SCTP Send Information Structure (SCTP_SNDINFO) + * + * This cmsghdr structure specifies SCTP options for + * sendmsg(). This structure and SCTP_RCVINFO replaces + * SCTP_SNDRCV which has been deprecated. + * + * cmsg_level cmsg_type cmsg_data[] + * ------------ ------------ --------------------- + * IPPROTO_SCTP SCTP_SNDINFO struct sctp_sndinfo + */ + if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_sndinfo))) + return -EINVAL; + + cmsgs->sinfo = CMSG_DATA(cmsg); + + if (cmsgs->sinfo->snd_flags & + ~(SCTP_UNORDERED | SCTP_ADDR_OVER | + SCTP_ABORT | SCTP_EOF)) + return -EINVAL; + break; default: return -EINVAL; } } + return 0; } @@ -6516,8 +6739,8 @@ out: * Note: This is pretty much the same routine as in core/datagram.c * with a few changes to make lksctp work. */ -static struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, - int noblock, int *err) +struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, + int noblock, int *err) { int error; struct sk_buff *skb; @@ -6557,6 +6780,10 @@ static struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, if (sk->sk_shutdown & RCV_SHUTDOWN) break; + if (sk_can_busy_loop(sk) && + sk_busy_loop(sk, noblock)) + continue; + /* User doesn't want to wait. */ error = -EAGAIN; if (!timeo) @@ -6940,7 +7167,8 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, newsk->sk_type = sk->sk_type; newsk->sk_bound_dev_if = sk->sk_bound_dev_if; newsk->sk_flags = sk->sk_flags; - newsk->sk_no_check = sk->sk_no_check; + newsk->sk_no_check_tx = sk->sk_no_check_tx; + newsk->sk_no_check_rx = sk->sk_no_check_rx; newsk->sk_reuse = sk->sk_reuse; newsk->sk_shutdown = sk->sk_shutdown; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index c82fdc1eab7c..2e9ada10fd84 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -34,6 +34,8 @@ * Sridhar Samudrala <sri@us.ibm.com> */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <net/sctp/structs.h> #include <net/sctp/sctp.h> #include <linux/sysctl.h> @@ -46,6 +48,11 @@ static int sack_timer_min = 1; static int sack_timer_max = 500; static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */ static int rwnd_scale_max = 16; +static int rto_alpha_min = 0; +static int rto_beta_min = 0; +static int rto_alpha_max = 1000; +static int rto_beta_max = 1000; + static unsigned long max_autoclose_min = 0; static unsigned long max_autoclose_max = (MAX_SCHEDULE_TIMEOUT / HZ > UINT_MAX) @@ -64,6 +71,9 @@ static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write, static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +static int proc_sctp_do_alpha_beta(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); static int proc_sctp_do_auth(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos); @@ -126,15 +136,19 @@ static struct ctl_table sctp_net_table[] = { .procname = "rto_alpha_exp_divisor", .data = &init_net.sctp.rto_alpha, .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_dointvec, + .mode = 0644, + .proc_handler = proc_sctp_do_alpha_beta, + .extra1 = &rto_alpha_min, + .extra2 = &rto_alpha_max, }, { .procname = "rto_beta_exp_divisor", .data = &init_net.sctp.rto_beta, .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_dointvec, + .mode = 0644, + .proc_handler = proc_sctp_do_alpha_beta, + .extra1 = &rto_beta_min, + .extra2 = &rto_beta_max, }, { .procname = "max_burst", @@ -307,41 +321,40 @@ static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; - char tmp[8]; struct ctl_table tbl; - int ret; - int changed = 0; + bool changed = false; char *none = "none"; + char tmp[8]; + int ret; memset(&tbl, 0, sizeof(struct ctl_table)); if (write) { tbl.data = tmp; - tbl.maxlen = 8; + tbl.maxlen = sizeof(tmp); } else { tbl.data = net->sctp.sctp_hmac_alg ? : none; tbl.maxlen = strlen(tbl.data); } - ret = proc_dostring(&tbl, write, buffer, lenp, ppos); - if (write) { + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); + if (write && ret == 0) { #ifdef CONFIG_CRYPTO_MD5 if (!strncmp(tmp, "md5", 3)) { net->sctp.sctp_hmac_alg = "md5"; - changed = 1; + changed = true; } #endif #ifdef CONFIG_CRYPTO_SHA1 if (!strncmp(tmp, "sha1", 4)) { net->sctp.sctp_hmac_alg = "sha1"; - changed = 1; + changed = true; } #endif if (!strncmp(tmp, "none", 4)) { net->sctp.sctp_hmac_alg = NULL; - changed = 1; + changed = true; } - if (!changed) ret = -EINVAL; } @@ -354,11 +367,10 @@ static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; - int new_value; - struct ctl_table tbl; unsigned int min = *(unsigned int *) ctl->extra1; unsigned int max = *(unsigned int *) ctl->extra2; - int ret; + struct ctl_table tbl; + int ret, new_value; memset(&tbl, 0, sizeof(struct ctl_table)); tbl.maxlen = sizeof(unsigned int); @@ -367,12 +379,15 @@ static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write, tbl.data = &new_value; else tbl.data = &net->sctp.rto_min; + ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); - if (write) { - if (ret || new_value > max || new_value < min) + if (write && ret == 0) { + if (new_value > max || new_value < min) return -EINVAL; + net->sctp.rto_min = new_value; } + return ret; } @@ -381,11 +396,10 @@ static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; - int new_value; - struct ctl_table tbl; unsigned int min = *(unsigned int *) ctl->extra1; unsigned int max = *(unsigned int *) ctl->extra2; - int ret; + struct ctl_table tbl; + int ret, new_value; memset(&tbl, 0, sizeof(struct ctl_table)); tbl.maxlen = sizeof(unsigned int); @@ -394,15 +408,29 @@ static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write, tbl.data = &new_value; else tbl.data = &net->sctp.rto_max; + ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); - if (write) { - if (ret || new_value > max || new_value < min) + if (write && ret == 0) { + if (new_value > max || new_value < min) return -EINVAL; + net->sctp.rto_max = new_value; } + return ret; } +static int proc_sctp_do_alpha_beta(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + if (write) + pr_warn_once("Changing rto_alpha or rto_beta may lead to " + "suboptimal rtt/srtt estimations!\n"); + + return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); +} + static int proc_sctp_do_auth(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -420,8 +448,7 @@ static int proc_sctp_do_auth(struct ctl_table *ctl, int write, tbl.data = &net->sctp.auth_enable; ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); - - if (write) { + if (write && ret == 0) { struct sock *sk = net->sctp.ctl_sock; net->sctp.auth_enable = new_value; @@ -436,20 +463,21 @@ static int proc_sctp_do_auth(struct ctl_table *ctl, int write, int sctp_sysctl_net_register(struct net *net) { - struct ctl_table *table = sctp_net_table; - - if (!net_eq(net, &init_net)) { - int i; + struct ctl_table *table; + int i; - table = kmemdup(sctp_net_table, sizeof(sctp_net_table), GFP_KERNEL); - if (!table) - return -ENOMEM; + table = kmemdup(sctp_net_table, sizeof(sctp_net_table), GFP_KERNEL); + if (!table) + return -ENOMEM; - for (i = 0; table[i].data; i++) - table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp; - } + for (i = 0; table[i].data; i++) + table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp; net->sctp.sysctl_header = register_net_sysctl(net, "net/sctp", table); + if (net->sctp.sysctl_header == NULL) { + kfree(table); + return -ENOMEM; + } return 0; } diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 1d348d15b33d..a0a431824f63 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -72,7 +72,7 @@ static struct sctp_transport *sctp_transport_init(struct net *net, */ peer->rto = msecs_to_jiffies(net->sctp.rto_initial); - peer->last_time_heard = jiffies; + peer->last_time_heard = ktime_get(); peer->last_time_ecne_reduced = jiffies; peer->param_flags = SPP_HB_DISABLE | @@ -289,8 +289,8 @@ void sctp_transport_route(struct sctp_transport *transport, */ if (asoc && (!asoc->peer.primary_path || (transport == asoc->peer.active_path))) - opt->pf->af->to_sk_saddr(&transport->saddr, - asoc->base.sk); + opt->pf->to_sk_saddr(&transport->saddr, + asoc->base.sk); } else transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; } @@ -594,15 +594,16 @@ void sctp_transport_burst_reset(struct sctp_transport *t) } /* What is the next timeout value for this transport? */ -unsigned long sctp_transport_timeout(struct sctp_transport *t) +unsigned long sctp_transport_timeout(struct sctp_transport *trans) { - unsigned long timeout; - timeout = t->rto + sctp_jitter(t->rto); - if ((t->state != SCTP_UNCONFIRMED) && - (t->state != SCTP_PF)) - timeout += t->hbinterval; - timeout += jiffies; - return timeout; + /* RTO + timer slack +/- 50% of RTO */ + unsigned long timeout = (trans->rto >> 1) + prandom_u32_max(trans->rto); + + if (trans->state != SCTP_UNCONFIRMED && + trans->state != SCTP_PF) + timeout += trans->hbinterval; + + return timeout + jiffies; } /* Reset transport variables to their initial values */ diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 85c64658bd0b..d1e38308f615 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -341,7 +341,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change( memcpy(&spc->spc_aaddr, aaddr, sizeof(struct sockaddr_storage)); /* Map ipv4 address into v4-mapped-on-v6 address. */ - sctp_get_pf_specific(asoc->base.sk->sk_family)->addr_v4map( + sctp_get_pf_specific(asoc->base.sk->sk_family)->addr_to_user( sctp_sk(asoc->base.sk), (union sctp_addr *)&spc->spc_aaddr); @@ -366,9 +366,10 @@ fail: * specification [SCTP] and any extensions for a list of possible * error formats. */ -struct sctp_ulpevent *sctp_ulpevent_make_remote_error( - const struct sctp_association *asoc, struct sctp_chunk *chunk, - __u16 flags, gfp_t gfp) +struct sctp_ulpevent * +sctp_ulpevent_make_remote_error(const struct sctp_association *asoc, + struct sctp_chunk *chunk, __u16 flags, + gfp_t gfp) { struct sctp_ulpevent *event; struct sctp_remote_error *sre; @@ -387,8 +388,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_remote_error( /* Copy the skb to a new skb with room for us to prepend * notification with. */ - skb = skb_copy_expand(chunk->skb, sizeof(struct sctp_remote_error), - 0, gfp); + skb = skb_copy_expand(chunk->skb, sizeof(*sre), 0, gfp); /* Pull off the rest of the cause TLV from the chunk. */ skb_pull(chunk->skb, elen); @@ -399,62 +399,21 @@ struct sctp_ulpevent *sctp_ulpevent_make_remote_error( event = sctp_skb2event(skb); sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize); - sre = (struct sctp_remote_error *) - skb_push(skb, sizeof(struct sctp_remote_error)); + sre = (struct sctp_remote_error *) skb_push(skb, sizeof(*sre)); /* Trim the buffer to the right length. */ - skb_trim(skb, sizeof(struct sctp_remote_error) + elen); + skb_trim(skb, sizeof(*sre) + elen); - /* Socket Extensions for SCTP - * 5.3.1.3 SCTP_REMOTE_ERROR - * - * sre_type: - * It should be SCTP_REMOTE_ERROR. - */ + /* RFC6458, Section 6.1.3. SCTP_REMOTE_ERROR */ + memset(sre, 0, sizeof(*sre)); sre->sre_type = SCTP_REMOTE_ERROR; - - /* - * Socket Extensions for SCTP - * 5.3.1.3 SCTP_REMOTE_ERROR - * - * sre_flags: 16 bits (unsigned integer) - * Currently unused. - */ sre->sre_flags = 0; - - /* Socket Extensions for SCTP - * 5.3.1.3 SCTP_REMOTE_ERROR - * - * sre_length: sizeof (__u32) - * - * This field is the total length of the notification data, - * including the notification header. - */ sre->sre_length = skb->len; - - /* Socket Extensions for SCTP - * 5.3.1.3 SCTP_REMOTE_ERROR - * - * sre_error: 16 bits (unsigned integer) - * This value represents one of the Operational Error causes defined in - * the SCTP specification, in network byte order. - */ sre->sre_error = cause; - - /* Socket Extensions for SCTP - * 5.3.1.3 SCTP_REMOTE_ERROR - * - * sre_assoc_id: sizeof (sctp_assoc_t) - * - * The association id field, holds the identifier for the association. - * All notifications for a given association have the same association - * identifier. For TCP style socket, this field is ignored. - */ sctp_ulpevent_set_owner(event, asoc); sre->sre_assoc_id = sctp_assoc2id(asoc); return event; - fail: return NULL; } @@ -899,7 +858,9 @@ __u16 sctp_ulpevent_get_notification_type(const struct sctp_ulpevent *event) return notification->sn_header.sn_type; } -/* Copy out the sndrcvinfo into a msghdr. */ +/* RFC6458, Section 5.3.2. SCTP Header Information Structure + * (SCTP_SNDRCV, DEPRECATED) + */ void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event, struct msghdr *msghdr) { @@ -908,74 +869,84 @@ void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event, if (sctp_ulpevent_is_notification(event)) return; - /* Sockets API Extensions for SCTP - * Section 5.2.2 SCTP Header Information Structure (SCTP_SNDRCV) - * - * sinfo_stream: 16 bits (unsigned integer) - * - * For recvmsg() the SCTP stack places the message's stream number in - * this value. - */ + memset(&sinfo, 0, sizeof(sinfo)); sinfo.sinfo_stream = event->stream; - /* sinfo_ssn: 16 bits (unsigned integer) - * - * For recvmsg() this value contains the stream sequence number that - * the remote endpoint placed in the DATA chunk. For fragmented - * messages this is the same number for all deliveries of the message - * (if more than one recvmsg() is needed to read the message). - */ sinfo.sinfo_ssn = event->ssn; - /* sinfo_ppid: 32 bits (unsigned integer) - * - * In recvmsg() this value is - * the same information that was passed by the upper layer in the peer - * application. Please note that byte order issues are NOT accounted - * for and this information is passed opaquely by the SCTP stack from - * one end to the other. - */ sinfo.sinfo_ppid = event->ppid; - /* sinfo_flags: 16 bits (unsigned integer) - * - * This field may contain any of the following flags and is composed of - * a bitwise OR of these values. - * - * recvmsg() flags: - * - * SCTP_UNORDERED - This flag is present when the message was sent - * non-ordered. - */ sinfo.sinfo_flags = event->flags; - /* sinfo_tsn: 32 bit (unsigned integer) - * - * For the receiving side, this field holds a TSN that was - * assigned to one of the SCTP Data Chunks. - */ sinfo.sinfo_tsn = event->tsn; - /* sinfo_cumtsn: 32 bit (unsigned integer) - * - * This field will hold the current cumulative TSN as - * known by the underlying SCTP layer. Note this field is - * ignored when sending and only valid for a receive - * operation when sinfo_flags are set to SCTP_UNORDERED. - */ sinfo.sinfo_cumtsn = event->cumtsn; - /* sinfo_assoc_id: sizeof (sctp_assoc_t) - * - * The association handle field, sinfo_assoc_id, holds the identifier - * for the association announced in the COMMUNICATION_UP notification. - * All notifications for a given association have the same identifier. - * Ignored for one-to-one style sockets. - */ sinfo.sinfo_assoc_id = sctp_assoc2id(event->asoc); - - /* context value that is set via SCTP_CONTEXT socket option. */ + /* Context value that is set via SCTP_CONTEXT socket option. */ sinfo.sinfo_context = event->asoc->default_rcv_context; - /* These fields are not used while receiving. */ sinfo.sinfo_timetolive = 0; put_cmsg(msghdr, IPPROTO_SCTP, SCTP_SNDRCV, - sizeof(struct sctp_sndrcvinfo), (void *)&sinfo); + sizeof(sinfo), &sinfo); +} + +/* RFC6458, Section 5.3.5 SCTP Receive Information Structure + * (SCTP_SNDRCV) + */ +void sctp_ulpevent_read_rcvinfo(const struct sctp_ulpevent *event, + struct msghdr *msghdr) +{ + struct sctp_rcvinfo rinfo; + + if (sctp_ulpevent_is_notification(event)) + return; + + memset(&rinfo, 0, sizeof(struct sctp_rcvinfo)); + rinfo.rcv_sid = event->stream; + rinfo.rcv_ssn = event->ssn; + rinfo.rcv_ppid = event->ppid; + rinfo.rcv_flags = event->flags; + rinfo.rcv_tsn = event->tsn; + rinfo.rcv_cumtsn = event->cumtsn; + rinfo.rcv_assoc_id = sctp_assoc2id(event->asoc); + rinfo.rcv_context = event->asoc->default_rcv_context; + + put_cmsg(msghdr, IPPROTO_SCTP, SCTP_RCVINFO, + sizeof(rinfo), &rinfo); +} + +/* RFC6458, Section 5.3.6. SCTP Next Receive Information Structure + * (SCTP_NXTINFO) + */ +static void __sctp_ulpevent_read_nxtinfo(const struct sctp_ulpevent *event, + struct msghdr *msghdr, + const struct sk_buff *skb) +{ + struct sctp_nxtinfo nxtinfo; + + memset(&nxtinfo, 0, sizeof(nxtinfo)); + nxtinfo.nxt_sid = event->stream; + nxtinfo.nxt_ppid = event->ppid; + nxtinfo.nxt_flags = event->flags; + if (sctp_ulpevent_is_notification(event)) + nxtinfo.nxt_flags |= SCTP_NOTIFICATION; + nxtinfo.nxt_length = skb->len; + nxtinfo.nxt_assoc_id = sctp_assoc2id(event->asoc); + + put_cmsg(msghdr, IPPROTO_SCTP, SCTP_NXTINFO, + sizeof(nxtinfo), &nxtinfo); +} + +void sctp_ulpevent_read_nxtinfo(const struct sctp_ulpevent *event, + struct msghdr *msghdr, + struct sock *sk) +{ + struct sk_buff *skb; + int err; + + skb = sctp_skb_recv_datagram(sk, MSG_PEEK, 1, &err); + if (skb != NULL) { + __sctp_ulpevent_read_nxtinfo(sctp_skb2event(skb), + msghdr, skb); + /* Just release refcount here. */ + kfree_skb(skb); + } } /* Do accounting for bytes received and hold a reference to the association diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 7144eb6a1b95..d49dc2ed30ad 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -38,6 +38,7 @@ #include <linux/types.h> #include <linux/skbuff.h> #include <net/sock.h> +#include <net/busy_poll.h> #include <net/sctp/structs.h> #include <net/sctp/sctp.h> #include <net/sctp/sm.h> @@ -204,6 +205,9 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) if (sock_flag(sk, SOCK_DEAD) || (sk->sk_shutdown & RCV_SHUTDOWN)) goto out_free; + if (!sctp_ulpevent_is_notification(event)) + sk_mark_napi_id(sk, skb); + /* Check if the user wishes to receive this event. */ if (!sctp_ulpevent_is_enabled(event, &sctp_sk(sk)->subscribe)) goto out_free; diff --git a/net/socket.c b/net/socket.c index abf56b2a14f9..ae89569a2db5 100644 --- a/net/socket.c +++ b/net/socket.c @@ -106,6 +106,7 @@ #include <linux/sockios.h> #include <linux/atalk.h> #include <net/busy_poll.h> +#include <linux/errqueue.h> #ifdef CONFIG_NET_RX_BUSY_POLL unsigned int sysctl_net_busy_read __read_mostly; @@ -612,10 +613,15 @@ EXPORT_SYMBOL(sock_release); void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) { *tx_flags = 0; - if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) + if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_HARDWARE) *tx_flags |= SKBTX_HW_TSTAMP; - if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) + if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SOFTWARE) *tx_flags |= SKBTX_SW_TSTAMP; + if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED) + *tx_flags |= SKBTX_SCHED_TSTAMP; + if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK) + *tx_flags |= SKBTX_ACK_TSTAMP; + if (sock_flag(sk, SOCK_WIFI_STATUS)) *tx_flags |= SKBTX_WIFI_STATUS; } @@ -697,7 +703,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP); - struct timespec ts[3]; + struct scm_timestamping tss; int empty = 1; struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); @@ -714,28 +720,25 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv); } else { - skb_get_timestampns(skb, &ts[0]); + struct timespec ts; + skb_get_timestampns(skb, &ts); put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, - sizeof(ts[0]), &ts[0]); + sizeof(ts), &ts); } } - - memset(ts, 0, sizeof(ts)); - if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) && - ktime_to_timespec_cond(skb->tstamp, ts + 0)) + memset(&tss, 0, sizeof(tss)); + if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE || + skb_shinfo(skb)->tx_flags & SKBTX_ANY_SW_TSTAMP) && + ktime_to_timespec_cond(skb->tstamp, tss.ts + 0)) + empty = 0; + if (shhwtstamps && + (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) && + ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) empty = 0; - if (shhwtstamps) { - if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) && - ktime_to_timespec_cond(shhwtstamps->syststamp, ts + 1)) - empty = 0; - if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) && - ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts + 2)) - empty = 0; - } if (!empty) put_cmsg(msg, SOL_SOCKET, - SCM_TIMESTAMPING, sizeof(ts), &ts); + SCM_TIMESTAMPING, sizeof(tss), &tss); } EXPORT_SYMBOL_GPL(__sock_recv_timestamp); diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 5285ead196c0..f77366717420 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -296,7 +296,7 @@ static void rpcauth_unhash_cred_locked(struct rpc_cred *cred) { hlist_del_rcu(&cred->cr_hash); - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); clear_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags); } @@ -592,6 +592,7 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags) put_group_info(acred.group_info); return ret; } +EXPORT_SYMBOL_GPL(rpcauth_lookupcred); void rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 36e431ee1c90..b6e440baccc3 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -143,7 +143,7 @@ gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx) gss_get_ctx(ctx); rcu_assign_pointer(gss_cred->gc_ctx, ctx); set_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags); } diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 27ce26240932..92d5ab99fbf3 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -218,10 +218,8 @@ static struct gss_api_mech *_gss_mech_get_by_pseudoflavor(u32 pseudoflavor) spin_lock(®istered_mechs_lock); list_for_each_entry(pos, ®istered_mechs, gm_list) { - if (!mech_supports_pseudoflavor(pos, pseudoflavor)) { - module_put(pos->gm_owner); + if (!mech_supports_pseudoflavor(pos, pseudoflavor)) continue; - } if (try_module_get(pos->gm_owner)) gm = pos; break; diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 0f73f4507746..4ce5eccec1f6 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1503,6 +1503,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) if (unwrap_integ_data(rqstp, &rqstp->rq_arg, gc->gc_seq, rsci->mechctx)) goto garbage_args; + rqstp->rq_auth_slack = RPC_MAX_AUTH_SIZE; break; case RPC_GSS_SVC_PRIVACY: /* placeholders for length and seq. number: */ @@ -1511,6 +1512,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) if (unwrap_priv_data(rqstp, &rqstp->rq_arg, gc->gc_seq, rsci->mechctx)) goto garbage_args; + rqstp->rq_auth_slack = RPC_MAX_AUTH_SIZE * 2; break; default: goto auth_err; diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 3513d559bc45..9761a0da964d 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -244,10 +244,10 @@ void xprt_free_bc_request(struct rpc_rqst *req) dprintk("RPC: free backchannel req=%p\n", req); req->rq_connect_cookie = xprt->connect_cookie - 1; - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state)); clear_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); if (!xprt_need_to_requeue(xprt)) { /* diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index ae333c1845bb..066362141133 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -374,7 +374,7 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd) } return; out: - printk(KERN_ERR "nfsd: failed to unregister %s cache\n", cd->name); + printk(KERN_ERR "RPC: failed to unregister %s cache\n", cd->name); } EXPORT_SYMBOL_GPL(sunrpc_destroy_cache_detail); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 25578afe1548..9358c79fd589 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -250,7 +250,7 @@ void rpc_destroy_wait_queue(struct rpc_wait_queue *queue) } EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue); -static int rpc_wait_bit_killable(void *word) +static int rpc_wait_bit_killable(struct wait_bit_key *key) { if (fatal_signal_pending(current)) return -ERESTARTSYS; @@ -309,7 +309,7 @@ static int rpc_complete_task(struct rpc_task *task) * to enforce taking of the wq->lock and hence avoid races with * rpc_complete_task(). */ -int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *)) +int __rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *action) { if (action == NULL) action = rpc_wait_bit_killable; @@ -832,7 +832,8 @@ static void rpc_async_schedule(struct work_struct *work) * @size: requested byte size * * To prevent rpciod from hanging, this allocator never sleeps, - * returning NULL if the request cannot be serviced immediately. + * returning NULL and suppressing warning if the request cannot be serviced + * immediately. * The caller can arrange to sleep in a way that is safe for rpciod. * * Most requests are 'small' (under 2KiB) and can be serviced from a @@ -845,7 +846,7 @@ static void rpc_async_schedule(struct work_struct *work) void *rpc_malloc(struct rpc_task *task, size_t size) { struct rpc_buffer *buf; - gfp_t gfp = GFP_NOWAIT; + gfp_t gfp = GFP_NOWAIT | __GFP_NOWARN; if (RPC_IS_SWAPPER(task)) gfp |= __GFP_MEMALLOC; diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index 0a648c502fc3..2df87f78e518 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c @@ -173,7 +173,8 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) return -1; if (csum_fold(desc.csum)) return -1; - if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && + !skb->csum_complete_sw) netdev_rx_csum_fault(skb->dev); return 0; no_checksum: diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h index 14c9f6d1c5ff..f2b7cb540e61 100644 --- a/net/sunrpc/sunrpc.h +++ b/net/sunrpc/sunrpc.h @@ -43,6 +43,19 @@ static inline int rpc_reply_expected(struct rpc_task *task) (task->tk_msg.rpc_proc->p_decode != NULL); } +static inline int sock_is_loopback(struct sock *sk) +{ + struct dst_entry *dst; + int loopback = 0; + rcu_read_lock(); + dst = rcu_dereference(sk->sk_dst_cache); + if (dst && dst->dev && + (dst->dev->features & NETIF_F_LOOPBACK)) + loopback = 1; + rcu_read_unlock(); + return loopback; +} + int svc_send_common(struct socket *sock, struct xdr_buf *xdr, struct page *headpage, unsigned long headoffset, struct page *tailpage, unsigned long tailoffset); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 06c6ff0cb911..b4737fbdec13 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -597,6 +597,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp) } rqstp->rq_pages[i] = p; } + rqstp->rq_page_end = &rqstp->rq_pages[i]; rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */ /* Make arg->head point to first page and arg->pages point to rest */ @@ -730,6 +731,8 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) newxpt = xprt->xpt_ops->xpo_accept(xprt); if (newxpt) svc_add_new_temp_xprt(serv, newxpt); + else + module_put(xprt->xpt_class->xcl_owner); } else if (xprt->xpt_ops->xpo_has_wspace(xprt)) { /* XPT_DATA|XPT_DEFERRED case: */ dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", @@ -793,7 +796,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) clear_bit(XPT_OLD, &xprt->xpt_flags); - rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp)); + rqstp->rq_secure = xprt->xpt_ops->xpo_secure_port(rqstp); rqstp->rq_chandle.defer = svc_defer; if (serv->sv_stats) diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index 2af7b0cba43a..79c0f3459b5c 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -54,6 +54,8 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp) } spin_unlock(&authtab_lock); + rqstp->rq_auth_slack = 0; + rqstp->rq_authop = aops; return aops->accept(rqstp, authp); } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 43bcb4699d69..b507cd327d9b 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -400,6 +400,12 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, release_sock(sock->sk); #endif } + +static int svc_sock_secure_port(struct svc_rqst *rqstp) +{ + return svc_port_is_privileged(svc_addr(rqstp)); +} + /* * INET callback when data has been received on the socket. */ @@ -678,6 +684,7 @@ static struct svc_xprt_ops svc_udp_ops = { .xpo_prep_reply_hdr = svc_udp_prep_reply_hdr, .xpo_has_wspace = svc_udp_has_wspace, .xpo_accept = svc_udp_accept, + .xpo_secure_port = svc_sock_secure_port, }; static struct svc_xprt_class svc_udp_class = { @@ -842,8 +849,7 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) * tell us anything. For now just warn about unpriv connections. */ if (!svc_port_is_privileged(sin)) { - dprintk(KERN_WARNING - "%s: connect from unprivileged port: %s\n", + dprintk("%s: connect from unprivileged port: %s\n", serv->sv_name, __svc_print_addr(sin, buf, sizeof(buf))); } @@ -867,6 +873,10 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) } svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen); + if (sock_is_loopback(newsock->sk)) + set_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags); + else + clear_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags); if (serv->sv_stats) serv->sv_stats->nettcpconn++; @@ -1112,6 +1122,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) rqstp->rq_xprt_ctxt = NULL; rqstp->rq_prot = IPPROTO_TCP; + rqstp->rq_local = !!test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags); p = (__be32 *)rqstp->rq_arg.head[0].iov_base; calldir = p[1]; @@ -1234,6 +1245,7 @@ static struct svc_xprt_ops svc_tcp_bc_ops = { .xpo_detach = svc_bc_tcp_sock_detach, .xpo_free = svc_bc_sock_free, .xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr, + .xpo_secure_port = svc_sock_secure_port, }; static struct svc_xprt_class svc_tcp_bc_class = { @@ -1272,6 +1284,7 @@ static struct svc_xprt_ops svc_tcp_ops = { .xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr, .xpo_has_wspace = svc_tcp_has_wspace, .xpo_accept = svc_tcp_accept, + .xpo_secure_port = svc_sock_secure_port, }; static struct svc_xprt_class svc_tcp_class = { diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index dd97ba3c4456..23fb4e75e245 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -462,6 +462,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) struct kvec *iov = buf->head; int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len; + xdr_set_scratch_buffer(xdr, NULL, 0); BUG_ON(scratch_len < 0); xdr->buf = buf; xdr->iov = iov; @@ -482,6 +483,73 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) EXPORT_SYMBOL_GPL(xdr_init_encode); /** + * xdr_commit_encode - Ensure all data is written to buffer + * @xdr: pointer to xdr_stream + * + * We handle encoding across page boundaries by giving the caller a + * temporary location to write to, then later copying the data into + * place; xdr_commit_encode does that copying. + * + * Normally the caller doesn't need to call this directly, as the + * following xdr_reserve_space will do it. But an explicit call may be + * required at the end of encoding, or any other time when the xdr_buf + * data might be read. + */ +void xdr_commit_encode(struct xdr_stream *xdr) +{ + int shift = xdr->scratch.iov_len; + void *page; + + if (shift == 0) + return; + page = page_address(*xdr->page_ptr); + memcpy(xdr->scratch.iov_base, page, shift); + memmove(page, page + shift, (void *)xdr->p - page); + xdr->scratch.iov_len = 0; +} +EXPORT_SYMBOL_GPL(xdr_commit_encode); + +__be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, size_t nbytes) +{ + static __be32 *p; + int space_left; + int frag1bytes, frag2bytes; + + if (nbytes > PAGE_SIZE) + return NULL; /* Bigger buffers require special handling */ + if (xdr->buf->len + nbytes > xdr->buf->buflen) + return NULL; /* Sorry, we're totally out of space */ + frag1bytes = (xdr->end - xdr->p) << 2; + frag2bytes = nbytes - frag1bytes; + if (xdr->iov) + xdr->iov->iov_len += frag1bytes; + else + xdr->buf->page_len += frag1bytes; + xdr->page_ptr++; + xdr->iov = NULL; + /* + * If the last encode didn't end exactly on a page boundary, the + * next one will straddle boundaries. Encode into the next + * page, then copy it back later in xdr_commit_encode. We use + * the "scratch" iov to track any temporarily unused fragment of + * space at the end of the previous buffer: + */ + xdr->scratch.iov_base = xdr->p; + xdr->scratch.iov_len = frag1bytes; + p = page_address(*xdr->page_ptr); + /* + * Note this is where the next encode will start after we've + * shifted this one back: + */ + xdr->p = (void *)p + frag2bytes; + space_left = xdr->buf->buflen - xdr->buf->len; + xdr->end = (void *)p + min_t(int, space_left, PAGE_SIZE); + xdr->buf->page_len += frag2bytes; + xdr->buf->len += nbytes; + return p; +} + +/** * xdr_reserve_space - Reserve buffer space for sending * @xdr: pointer to xdr_stream * @nbytes: number of bytes to reserve @@ -495,20 +563,122 @@ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes) __be32 *p = xdr->p; __be32 *q; + xdr_commit_encode(xdr); /* align nbytes on the next 32-bit boundary */ nbytes += 3; nbytes &= ~3; q = p + (nbytes >> 2); if (unlikely(q > xdr->end || q < p)) - return NULL; + return xdr_get_next_encode_buffer(xdr, nbytes); xdr->p = q; - xdr->iov->iov_len += nbytes; + if (xdr->iov) + xdr->iov->iov_len += nbytes; + else + xdr->buf->page_len += nbytes; xdr->buf->len += nbytes; return p; } EXPORT_SYMBOL_GPL(xdr_reserve_space); /** + * xdr_truncate_encode - truncate an encode buffer + * @xdr: pointer to xdr_stream + * @len: new length of buffer + * + * Truncates the xdr stream, so that xdr->buf->len == len, + * and xdr->p points at offset len from the start of the buffer, and + * head, tail, and page lengths are adjusted to correspond. + * + * If this means moving xdr->p to a different buffer, we assume that + * that the end pointer should be set to the end of the current page, + * except in the case of the head buffer when we assume the head + * buffer's current length represents the end of the available buffer. + * + * This is *not* safe to use on a buffer that already has inlined page + * cache pages (as in a zero-copy server read reply), except for the + * simple case of truncating from one position in the tail to another. + * + */ +void xdr_truncate_encode(struct xdr_stream *xdr, size_t len) +{ + struct xdr_buf *buf = xdr->buf; + struct kvec *head = buf->head; + struct kvec *tail = buf->tail; + int fraglen; + int new, old; + + if (len > buf->len) { + WARN_ON_ONCE(1); + return; + } + xdr_commit_encode(xdr); + + fraglen = min_t(int, buf->len - len, tail->iov_len); + tail->iov_len -= fraglen; + buf->len -= fraglen; + if (tail->iov_len && buf->len == len) { + xdr->p = tail->iov_base + tail->iov_len; + /* xdr->end, xdr->iov should be set already */ + return; + } + WARN_ON_ONCE(fraglen); + fraglen = min_t(int, buf->len - len, buf->page_len); + buf->page_len -= fraglen; + buf->len -= fraglen; + + new = buf->page_base + buf->page_len; + old = new + fraglen; + xdr->page_ptr -= (old >> PAGE_SHIFT) - (new >> PAGE_SHIFT); + + if (buf->page_len && buf->len == len) { + xdr->p = page_address(*xdr->page_ptr); + xdr->end = (void *)xdr->p + PAGE_SIZE; + xdr->p = (void *)xdr->p + (new % PAGE_SIZE); + /* xdr->iov should already be NULL */ + return; + } + if (fraglen) { + xdr->end = head->iov_base + head->iov_len; + xdr->page_ptr--; + } + /* (otherwise assume xdr->end is already set) */ + head->iov_len = len; + buf->len = len; + xdr->p = head->iov_base + head->iov_len; + xdr->iov = buf->head; +} +EXPORT_SYMBOL(xdr_truncate_encode); + +/** + * xdr_restrict_buflen - decrease available buffer space + * @xdr: pointer to xdr_stream + * @newbuflen: new maximum number of bytes available + * + * Adjust our idea of how much space is available in the buffer. + * If we've already used too much space in the buffer, returns -1. + * If the available space is already smaller than newbuflen, returns 0 + * and does nothing. Otherwise, adjusts xdr->buf->buflen to newbuflen + * and ensures xdr->end is set at most offset newbuflen from the start + * of the buffer. + */ +int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen) +{ + struct xdr_buf *buf = xdr->buf; + int left_in_this_buf = (void *)xdr->end - (void *)xdr->p; + int end_offset = buf->len + left_in_this_buf; + + if (newbuflen < 0 || newbuflen < buf->len) + return -1; + if (newbuflen > buf->buflen) + return 0; + if (newbuflen < end_offset) + xdr->end = (void *)xdr->end + newbuflen - end_offset; + buf->buflen = newbuflen; + return 0; +} +EXPORT_SYMBOL(xdr_restrict_buflen); + +/** * xdr_write_pages - Insert a list of pages into an XDR buffer for sending * @xdr: pointer to xdr_stream * @pages: list of pages diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index d173f79947c6..c3b2b3369e52 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -71,24 +71,6 @@ static void xprt_destroy(struct rpc_xprt *xprt); static DEFINE_SPINLOCK(xprt_list_lock); static LIST_HEAD(xprt_list); -/* - * The transport code maintains an estimate on the maximum number of out- - * standing RPC requests, using a smoothed version of the congestion - * avoidance implemented in 44BSD. This is basically the Van Jacobson - * congestion algorithm: If a retransmit occurs, the congestion window is - * halved; otherwise, it is incremented by 1/cwnd when - * - * - a reply is received and - * - a full number of requests are outstanding and - * - the congestion window hasn't been updated recently. - */ -#define RPC_CWNDSHIFT (8U) -#define RPC_CWNDSCALE (1U << RPC_CWNDSHIFT) -#define RPC_INITCWND RPC_CWNDSCALE -#define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT) - -#define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) - /** * xprt_register_transport - register a transport implementation * @transport: transport to register @@ -230,9 +212,9 @@ static void xprt_clear_locked(struct rpc_xprt *xprt) { xprt->snd_task = NULL; if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state)) { - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); clear_bit(XPRT_LOCKED, &xprt->state); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); } else queue_work(rpciod_workqueue, &xprt->task_cleanup); } @@ -446,7 +428,15 @@ EXPORT_SYMBOL_GPL(xprt_release_rqst_cong); * @task: recently completed RPC request used to adjust window * @result: result code of completed RPC request * - * We use a time-smoothed congestion estimator to avoid heavy oscillation. + * The transport code maintains an estimate on the maximum number of out- + * standing RPC requests, using a smoothed version of the congestion + * avoidance implemented in 44BSD. This is basically the Van Jacobson + * congestion algorithm: If a retransmit occurs, the congestion window is + * halved; otherwise, it is incremented by 1/cwnd when + * + * - a reply is received and + * - a full number of requests are outstanding and + * - the congestion window hasn't been updated recently. */ void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result) { diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 96ead526b125..693966d3f33b 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -78,8 +78,7 @@ static const char transfertypes[][12] = { * elements. Segments are then coalesced when registered, if possible * within the selected memreg mode. * - * Note, this routine is never called if the connection's memory - * registration strategy is 0 (bounce buffers). + * Returns positive number of segments converted, or a negative errno. */ static int @@ -102,10 +101,17 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, page_base = xdrbuf->page_base & ~PAGE_MASK; p = 0; while (len && n < nsegs) { + if (!ppages[p]) { + /* alloc the pagelist for receiving buffer */ + ppages[p] = alloc_page(GFP_ATOMIC); + if (!ppages[p]) + return -ENOMEM; + } seg[n].mr_page = ppages[p]; seg[n].mr_offset = (void *)(unsigned long) page_base; seg[n].mr_len = min_t(u32, PAGE_SIZE - page_base, len); - BUG_ON(seg[n].mr_len > PAGE_SIZE); + if (seg[n].mr_len > PAGE_SIZE) + return -EIO; len -= seg[n].mr_len; ++n; ++p; @@ -114,7 +120,7 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, /* Message overflows the seg array */ if (len && n == nsegs) - return 0; + return -EIO; if (xdrbuf->tail[0].iov_len) { /* the rpcrdma protocol allows us to omit any trailing @@ -123,7 +129,7 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, return n; if (n == nsegs) /* Tail remains, but we're out of segments */ - return 0; + return -EIO; seg[n].mr_page = NULL; seg[n].mr_offset = xdrbuf->tail[0].iov_base; seg[n].mr_len = xdrbuf->tail[0].iov_len; @@ -164,15 +170,17 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, * Reply chunk (a counted array): * N elements: * 1 - N - HLOO - HLOO - ... - HLOO + * + * Returns positive RPC/RDMA header size, or negative errno. */ -static unsigned int +static ssize_t rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, struct rpcrdma_msg *headerp, enum rpcrdma_chunktype type) { struct rpcrdma_req *req = rpcr_to_rdmar(rqst); struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); - int nsegs, nchunks = 0; + int n, nsegs, nchunks = 0; unsigned int pos; struct rpcrdma_mr_seg *seg = req->rl_segments; struct rpcrdma_read_chunk *cur_rchunk = NULL; @@ -198,12 +206,11 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, pos = target->head[0].iov_len; nsegs = rpcrdma_convert_iovs(target, pos, type, seg, RPCRDMA_MAX_SEGS); - if (nsegs == 0) - return 0; + if (nsegs < 0) + return nsegs; do { - /* bind/register the memory, then build chunk from result. */ - int n = rpcrdma_register_external(seg, nsegs, + n = rpcrdma_register_external(seg, nsegs, cur_wchunk != NULL, r_xprt); if (n <= 0) goto out; @@ -248,10 +255,6 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, /* success. all failures return above */ req->rl_nchunks = nchunks; - BUG_ON(nchunks == 0); - BUG_ON((r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR) - && (nchunks > 3)); - /* * finish off header. If write, marshal discrim and nchunks. */ @@ -278,8 +281,8 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, out: for (pos = 0; nchunks--;) pos += rpcrdma_deregister_external( - &req->rl_segments[pos], r_xprt, NULL); - return 0; + &req->rl_segments[pos], r_xprt); + return n; } /* @@ -361,6 +364,8 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad) * [1] -- the RPC header/data, marshaled by RPC and the NFS protocol. * [2] -- optional padding. * [3] -- if padded, header only in [1] and data here. + * + * Returns zero on success, otherwise a negative errno. */ int @@ -370,7 +375,8 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); struct rpcrdma_req *req = rpcr_to_rdmar(rqst); char *base; - size_t hdrlen, rpclen, padlen; + size_t rpclen, padlen; + ssize_t hdrlen; enum rpcrdma_chunktype rtype, wtype; struct rpcrdma_msg *headerp; @@ -441,14 +447,10 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) /* The following simplification is not true forever */ if (rtype != rpcrdma_noch && wtype == rpcrdma_replych) wtype = rpcrdma_noch; - BUG_ON(rtype != rpcrdma_noch && wtype != rpcrdma_noch); - - if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS && - (rtype != rpcrdma_noch || wtype != rpcrdma_noch)) { - /* forced to "pure inline"? */ - dprintk("RPC: %s: too much data (%d/%d) for inline\n", - __func__, rqst->rq_rcv_buf.len, rqst->rq_snd_buf.len); - return -1; + if (rtype != rpcrdma_noch && wtype != rpcrdma_noch) { + dprintk("RPC: %s: cannot marshal multiple chunk lists\n", + __func__); + return -EIO; } hdrlen = 28; /*sizeof *headerp;*/ @@ -474,8 +476,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero; headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero; hdrlen += 2 * sizeof(u32); /* extra words in padhdr */ - BUG_ON(wtype != rpcrdma_noch); - + if (wtype != rpcrdma_noch) { + dprintk("RPC: %s: invalid chunk list\n", + __func__); + return -EIO; + } } else { headerp->rm_body.rm_nochunks.rm_empty[0] = xdr_zero; headerp->rm_body.rm_nochunks.rm_empty[1] = xdr_zero; @@ -492,8 +497,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) * on receive. Therefore, we request a reply chunk * for non-writes wherever feasible and efficient. */ - if (wtype == rpcrdma_noch && - r_xprt->rx_ia.ri_memreg_strategy > RPCRDMA_REGISTER) + if (wtype == rpcrdma_noch) wtype = rpcrdma_replych; } } @@ -511,9 +515,8 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) hdrlen = rpcrdma_create_chunks(rqst, &rqst->rq_rcv_buf, headerp, wtype); } - - if (hdrlen == 0) - return -1; + if (hdrlen < 0) + return hdrlen; dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd" " headerp 0x%p base 0x%p lkey 0x%x\n", @@ -680,15 +683,11 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) rqst->rq_private_buf = rqst->rq_rcv_buf; } -/* - * This function is called when an async event is posted to - * the connection which changes the connection state. All it - * does at this point is mark the connection up/down, the rpc - * timers do the rest. - */ void -rpcrdma_conn_func(struct rpcrdma_ep *ep) +rpcrdma_connect_worker(struct work_struct *work) { + struct rpcrdma_ep *ep = + container_of(work, struct rpcrdma_ep, rep_connect_worker.work); struct rpc_xprt *xprt = ep->rep_xprt; spin_lock_bh(&xprt->transport_lock); @@ -705,13 +704,15 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep) } /* - * This function is called when memory window unbind which we are waiting - * for completes. Just use rr_func (zeroed by upcall) to signal completion. + * This function is called when an async event is posted to + * the connection which changes the connection state. All it + * does at this point is mark the connection up/down, the rpc + * timers do the rest. */ -static void -rpcrdma_unbind_func(struct rpcrdma_rep *rep) +void +rpcrdma_conn_func(struct rpcrdma_ep *ep) { - wake_up(&rep->rr_unbind); + schedule_delayed_work(&ep->rep_connect_worker, 0); } /* @@ -728,7 +729,8 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) struct rpc_xprt *xprt = rep->rr_xprt; struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); __be32 *iptr; - int i, rdmalen, status; + int rdmalen, status; + unsigned long cwnd; /* Check status. If bad, signal disconnect and return rep to pool */ if (rep->rr_len == ~0U) { @@ -783,6 +785,7 @@ repost: /* from here on, the reply is no longer an orphan */ req->rl_reply = rep; + xprt->reestablish_timeout = 0; /* check for expected message types */ /* The order of some of these tests is important. */ @@ -857,26 +860,10 @@ badheader: break; } - /* If using mw bind, start the deregister process now. */ - /* (Note: if mr_free(), cannot perform it here, in tasklet context) */ - if (req->rl_nchunks) switch (r_xprt->rx_ia.ri_memreg_strategy) { - case RPCRDMA_MEMWINDOWS: - for (i = 0; req->rl_nchunks-- > 1;) - i += rpcrdma_deregister_external( - &req->rl_segments[i], r_xprt, NULL); - /* Optionally wait (not here) for unbinds to complete */ - rep->rr_func = rpcrdma_unbind_func; - (void) rpcrdma_deregister_external(&req->rl_segments[i], - r_xprt, rep); - break; - case RPCRDMA_MEMWINDOWS_ASYNC: - for (i = 0; req->rl_nchunks--;) - i += rpcrdma_deregister_external(&req->rl_segments[i], - r_xprt, NULL); - break; - default: - break; - } + cwnd = xprt->cwnd; + xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT; + if (xprt->cwnd > cwnd) + xprt_release_rqst_cong(rqst->rq_task); dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n", __func__, xprt, rqst, status); diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 8d904e4eef15..8f92a61ee2df 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved. * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -69,7 +70,8 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, /* Set up the XDR head */ rqstp->rq_arg.head[0].iov_base = page_address(page); - rqstp->rq_arg.head[0].iov_len = min(byte_count, ctxt->sge[0].length); + rqstp->rq_arg.head[0].iov_len = + min_t(size_t, byte_count, ctxt->sge[0].length); rqstp->rq_arg.len = byte_count; rqstp->rq_arg.buflen = byte_count; @@ -85,7 +87,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, page = ctxt->pages[sge_no]; put_page(rqstp->rq_pages[sge_no]); rqstp->rq_pages[sge_no] = page; - bc -= min(bc, ctxt->sge[sge_no].length); + bc -= min_t(u32, bc, ctxt->sge[sge_no].length); rqstp->rq_arg.buflen += ctxt->sge[sge_no].length; sge_no++; } @@ -113,291 +115,265 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, rqstp->rq_arg.tail[0].iov_len = 0; } -/* Encode a read-chunk-list as an array of IB SGE - * - * Assumptions: - * - chunk[0]->position points to pages[0] at an offset of 0 - * - pages[] is not physically or virtually contiguous and consists of - * PAGE_SIZE elements. - * - * Output: - * - sge array pointing into pages[] array. - * - chunk_sge array specifying sge index and count for each - * chunk in the read list - * - */ -static int map_read_chunks(struct svcxprt_rdma *xprt, - struct svc_rqst *rqstp, - struct svc_rdma_op_ctxt *head, - struct rpcrdma_msg *rmsgp, - struct svc_rdma_req_map *rpl_map, - struct svc_rdma_req_map *chl_map, - int ch_count, - int byte_count) +static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) { - int sge_no; - int sge_bytes; - int page_off; - int page_no; - int ch_bytes; - int ch_no; - struct rpcrdma_read_chunk *ch; + if (rdma_node_get_transport(xprt->sc_cm_id->device->node_type) == + RDMA_TRANSPORT_IWARP) + return 1; + else + return min_t(int, sge_count, xprt->sc_max_sge); +} - sge_no = 0; - page_no = 0; - page_off = 0; - ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; - ch_no = 0; - ch_bytes = ntohl(ch->rc_target.rs_length); - head->arg.head[0] = rqstp->rq_arg.head[0]; - head->arg.tail[0] = rqstp->rq_arg.tail[0]; - head->arg.pages = &head->pages[head->count]; - head->hdr_count = head->count; /* save count of hdr pages */ - head->arg.page_base = 0; - head->arg.page_len = ch_bytes; - head->arg.len = rqstp->rq_arg.len + ch_bytes; - head->arg.buflen = rqstp->rq_arg.buflen + ch_bytes; - head->count++; - chl_map->ch[0].start = 0; - while (byte_count) { - rpl_map->sge[sge_no].iov_base = - page_address(rqstp->rq_arg.pages[page_no]) + page_off; - sge_bytes = min_t(int, PAGE_SIZE-page_off, ch_bytes); - rpl_map->sge[sge_no].iov_len = sge_bytes; - /* - * Don't bump head->count here because the same page - * may be used by multiple SGE. - */ - head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no]; - rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1]; +typedef int (*rdma_reader_fn)(struct svcxprt_rdma *xprt, + struct svc_rqst *rqstp, + struct svc_rdma_op_ctxt *head, + int *page_no, + u32 *page_offset, + u32 rs_handle, + u32 rs_length, + u64 rs_offset, + int last); + +/* Issue an RDMA_READ using the local lkey to map the data sink */ +static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, + struct svc_rqst *rqstp, + struct svc_rdma_op_ctxt *head, + int *page_no, + u32 *page_offset, + u32 rs_handle, + u32 rs_length, + u64 rs_offset, + int last) +{ + struct ib_send_wr read_wr; + int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; + struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); + int ret, read, pno; + u32 pg_off = *page_offset; + u32 pg_no = *page_no; + + ctxt->direction = DMA_FROM_DEVICE; + ctxt->read_hdr = head; + pages_needed = + min_t(int, pages_needed, rdma_read_max_sge(xprt, pages_needed)); + read = min_t(int, pages_needed << PAGE_SHIFT, rs_length); + + for (pno = 0; pno < pages_needed; pno++) { + int len = min_t(int, rs_length, PAGE_SIZE - pg_off); + + head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; + head->arg.page_len += len; + head->arg.len += len; + if (!pg_off) + head->count++; + rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1]; rqstp->rq_next_page = rqstp->rq_respages + 1; + ctxt->sge[pno].addr = + ib_dma_map_page(xprt->sc_cm_id->device, + head->arg.pages[pg_no], pg_off, + PAGE_SIZE - pg_off, + DMA_FROM_DEVICE); + ret = ib_dma_mapping_error(xprt->sc_cm_id->device, + ctxt->sge[pno].addr); + if (ret) + goto err; + atomic_inc(&xprt->sc_dma_used); - byte_count -= sge_bytes; - ch_bytes -= sge_bytes; - sge_no++; - /* - * If all bytes for this chunk have been mapped to an - * SGE, move to the next SGE - */ - if (ch_bytes == 0) { - chl_map->ch[ch_no].count = - sge_no - chl_map->ch[ch_no].start; - ch_no++; - ch++; - chl_map->ch[ch_no].start = sge_no; - ch_bytes = ntohl(ch->rc_target.rs_length); - /* If bytes remaining account for next chunk */ - if (byte_count) { - head->arg.page_len += ch_bytes; - head->arg.len += ch_bytes; - head->arg.buflen += ch_bytes; - } + /* The lkey here is either a local dma lkey or a dma_mr lkey */ + ctxt->sge[pno].lkey = xprt->sc_dma_lkey; + ctxt->sge[pno].length = len; + ctxt->count++; + + /* adjust offset and wrap to next page if needed */ + pg_off += len; + if (pg_off == PAGE_SIZE) { + pg_off = 0; + pg_no++; } - /* - * If this SGE consumed all of the page, move to the - * next page - */ - if ((sge_bytes + page_off) == PAGE_SIZE) { - page_no++; - page_off = 0; - /* - * If there are still bytes left to map, bump - * the page count - */ - if (byte_count) - head->count++; - } else - page_off += sge_bytes; + rs_length -= len; } - BUG_ON(byte_count != 0); - return sge_no; + + if (last && rs_length == 0) + set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); + else + clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); + + memset(&read_wr, 0, sizeof(read_wr)); + read_wr.wr_id = (unsigned long)ctxt; + read_wr.opcode = IB_WR_RDMA_READ; + ctxt->wr_op = read_wr.opcode; + read_wr.send_flags = IB_SEND_SIGNALED; + read_wr.wr.rdma.rkey = rs_handle; + read_wr.wr.rdma.remote_addr = rs_offset; + read_wr.sg_list = ctxt->sge; + read_wr.num_sge = pages_needed; + + ret = svc_rdma_send(xprt, &read_wr); + if (ret) { + pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); + set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); + goto err; + } + + /* return current location in page array */ + *page_no = pg_no; + *page_offset = pg_off; + ret = read; + atomic_inc(&rdma_stat_read); + return ret; + err: + svc_rdma_unmap_dma(ctxt); + svc_rdma_put_context(ctxt, 0); + return ret; } -/* Map a read-chunk-list to an XDR and fast register the page-list. - * - * Assumptions: - * - chunk[0] position points to pages[0] at an offset of 0 - * - pages[] will be made physically contiguous by creating a one-off memory - * region using the fastreg verb. - * - byte_count is # of bytes in read-chunk-list - * - ch_count is # of chunks in read-chunk-list - * - * Output: - * - sge array pointing into pages[] array. - * - chunk_sge array specifying sge index and count for each - * chunk in the read list - */ -static int fast_reg_read_chunks(struct svcxprt_rdma *xprt, +/* Issue an RDMA_READ using an FRMR to map the data sink */ +static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head, - struct rpcrdma_msg *rmsgp, - struct svc_rdma_req_map *rpl_map, - struct svc_rdma_req_map *chl_map, - int ch_count, - int byte_count) + int *page_no, + u32 *page_offset, + u32 rs_handle, + u32 rs_length, + u64 rs_offset, + int last) { - int page_no; - int ch_no; - u32 offset; - struct rpcrdma_read_chunk *ch; - struct svc_rdma_fastreg_mr *frmr; - int ret = 0; + struct ib_send_wr read_wr; + struct ib_send_wr inv_wr; + struct ib_send_wr fastreg_wr; + u8 key; + int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; + struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); + struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt); + int ret, read, pno; + u32 pg_off = *page_offset; + u32 pg_no = *page_no; - frmr = svc_rdma_get_frmr(xprt); if (IS_ERR(frmr)) return -ENOMEM; - head->frmr = frmr; - head->arg.head[0] = rqstp->rq_arg.head[0]; - head->arg.tail[0] = rqstp->rq_arg.tail[0]; - head->arg.pages = &head->pages[head->count]; - head->hdr_count = head->count; /* save count of hdr pages */ - head->arg.page_base = 0; - head->arg.page_len = byte_count; - head->arg.len = rqstp->rq_arg.len + byte_count; - head->arg.buflen = rqstp->rq_arg.buflen + byte_count; + ctxt->direction = DMA_FROM_DEVICE; + ctxt->frmr = frmr; + pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len); + read = min_t(int, pages_needed << PAGE_SHIFT, rs_length); - /* Fast register the page list */ - frmr->kva = page_address(rqstp->rq_arg.pages[0]); + frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]); frmr->direction = DMA_FROM_DEVICE; frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE); - frmr->map_len = byte_count; - frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT; - for (page_no = 0; page_no < frmr->page_list_len; page_no++) { - frmr->page_list->page_list[page_no] = + frmr->map_len = pages_needed << PAGE_SHIFT; + frmr->page_list_len = pages_needed; + + for (pno = 0; pno < pages_needed; pno++) { + int len = min_t(int, rs_length, PAGE_SIZE - pg_off); + + head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; + head->arg.page_len += len; + head->arg.len += len; + if (!pg_off) + head->count++; + rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1]; + rqstp->rq_next_page = rqstp->rq_respages + 1; + frmr->page_list->page_list[pno] = ib_dma_map_page(xprt->sc_cm_id->device, - rqstp->rq_arg.pages[page_no], 0, + head->arg.pages[pg_no], 0, PAGE_SIZE, DMA_FROM_DEVICE); - if (ib_dma_mapping_error(xprt->sc_cm_id->device, - frmr->page_list->page_list[page_no])) - goto fatal_err; + ret = ib_dma_mapping_error(xprt->sc_cm_id->device, + frmr->page_list->page_list[pno]); + if (ret) + goto err; atomic_inc(&xprt->sc_dma_used); - head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no]; - } - head->count += page_no; - - /* rq_respages points one past arg pages */ - rqstp->rq_respages = &rqstp->rq_arg.pages[page_no]; - rqstp->rq_next_page = rqstp->rq_respages + 1; - /* Create the reply and chunk maps */ - offset = 0; - ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; - for (ch_no = 0; ch_no < ch_count; ch_no++) { - int len = ntohl(ch->rc_target.rs_length); - rpl_map->sge[ch_no].iov_base = frmr->kva + offset; - rpl_map->sge[ch_no].iov_len = len; - chl_map->ch[ch_no].count = 1; - chl_map->ch[ch_no].start = ch_no; - offset += len; - ch++; + /* adjust offset and wrap to next page if needed */ + pg_off += len; + if (pg_off == PAGE_SIZE) { + pg_off = 0; + pg_no++; + } + rs_length -= len; } - ret = svc_rdma_fastreg(xprt, frmr); - if (ret) - goto fatal_err; - - return ch_no; - - fatal_err: - printk("svcrdma: error fast registering xdr for xprt %p", xprt); - svc_rdma_put_frmr(xprt, frmr); - return -EIO; -} - -static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt, - struct svc_rdma_op_ctxt *ctxt, - struct svc_rdma_fastreg_mr *frmr, - struct kvec *vec, - u64 *sgl_offset, - int count) -{ - int i; - unsigned long off; + if (last && rs_length == 0) + set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); + else + clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); - ctxt->count = count; - ctxt->direction = DMA_FROM_DEVICE; - for (i = 0; i < count; i++) { - ctxt->sge[i].length = 0; /* in case map fails */ - if (!frmr) { - BUG_ON(!virt_to_page(vec[i].iov_base)); - off = (unsigned long)vec[i].iov_base & ~PAGE_MASK; - ctxt->sge[i].addr = - ib_dma_map_page(xprt->sc_cm_id->device, - virt_to_page(vec[i].iov_base), - off, - vec[i].iov_len, - DMA_FROM_DEVICE); - if (ib_dma_mapping_error(xprt->sc_cm_id->device, - ctxt->sge[i].addr)) - return -EINVAL; - ctxt->sge[i].lkey = xprt->sc_dma_lkey; - atomic_inc(&xprt->sc_dma_used); - } else { - ctxt->sge[i].addr = (unsigned long)vec[i].iov_base; - ctxt->sge[i].lkey = frmr->mr->lkey; - } - ctxt->sge[i].length = vec[i].iov_len; - *sgl_offset = *sgl_offset + vec[i].iov_len; + /* Bump the key */ + key = (u8)(frmr->mr->lkey & 0x000000FF); + ib_update_fast_reg_key(frmr->mr, ++key); + + ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset; + ctxt->sge[0].lkey = frmr->mr->lkey; + ctxt->sge[0].length = read; + ctxt->count = 1; + ctxt->read_hdr = head; + + /* Prepare FASTREG WR */ + memset(&fastreg_wr, 0, sizeof(fastreg_wr)); + fastreg_wr.opcode = IB_WR_FAST_REG_MR; + fastreg_wr.send_flags = IB_SEND_SIGNALED; + fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva; + fastreg_wr.wr.fast_reg.page_list = frmr->page_list; + fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len; + fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; + fastreg_wr.wr.fast_reg.length = frmr->map_len; + fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags; + fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey; + fastreg_wr.next = &read_wr; + + /* Prepare RDMA_READ */ + memset(&read_wr, 0, sizeof(read_wr)); + read_wr.send_flags = IB_SEND_SIGNALED; + read_wr.wr.rdma.rkey = rs_handle; + read_wr.wr.rdma.remote_addr = rs_offset; + read_wr.sg_list = ctxt->sge; + read_wr.num_sge = 1; + if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) { + read_wr.opcode = IB_WR_RDMA_READ_WITH_INV; + read_wr.wr_id = (unsigned long)ctxt; + read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey; + } else { + read_wr.opcode = IB_WR_RDMA_READ; + read_wr.next = &inv_wr; + /* Prepare invalidate */ + memset(&inv_wr, 0, sizeof(inv_wr)); + inv_wr.wr_id = (unsigned long)ctxt; + inv_wr.opcode = IB_WR_LOCAL_INV; + inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE; + inv_wr.ex.invalidate_rkey = frmr->mr->lkey; + } + ctxt->wr_op = read_wr.opcode; + + /* Post the chain */ + ret = svc_rdma_send(xprt, &fastreg_wr); + if (ret) { + pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); + set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); + goto err; } - return 0; -} -static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) -{ - if ((rdma_node_get_transport(xprt->sc_cm_id->device->node_type) == - RDMA_TRANSPORT_IWARP) && - sge_count > 1) - return 1; - else - return min_t(int, sge_count, xprt->sc_max_sge); + /* return current location in page array */ + *page_no = pg_no; + *page_offset = pg_off; + ret = read; + atomic_inc(&rdma_stat_read); + return ret; + err: + svc_rdma_unmap_dma(ctxt); + svc_rdma_put_context(ctxt, 0); + svc_rdma_put_frmr(xprt, frmr); + return ret; } -/* - * Use RDMA_READ to read data from the advertised client buffer into the - * XDR stream starting at rq_arg.head[0].iov_base. - * Each chunk in the array - * contains the following fields: - * discrim - '1', This isn't used for data placement - * position - The xdr stream offset (the same for every chunk) - * handle - RMR for client memory region - * length - data transfer length - * offset - 64 bit tagged offset in remote memory region - * - * On our side, we need to read into a pagelist. The first page immediately - * follows the RPC header. - * - * This function returns: - * 0 - No error and no read-list found. - * - * 1 - Successful read-list processing. The data is not yet in - * the pagelist and therefore the RPC request must be deferred. The - * I/O completion will enqueue the transport again and - * svc_rdma_recvfrom will complete the request. - * - * <0 - Error processing/posting read-list. - * - * NOTE: The ctxt must not be touched after the last WR has been posted - * because the I/O completion processing may occur on another - * processor and free / modify the context. Ne touche pas! - */ -static int rdma_read_xdr(struct svcxprt_rdma *xprt, - struct rpcrdma_msg *rmsgp, - struct svc_rqst *rqstp, - struct svc_rdma_op_ctxt *hdr_ctxt) +static int rdma_read_chunks(struct svcxprt_rdma *xprt, + struct rpcrdma_msg *rmsgp, + struct svc_rqst *rqstp, + struct svc_rdma_op_ctxt *head) { - struct ib_send_wr read_wr; - struct ib_send_wr inv_wr; - int err = 0; - int ch_no; - int ch_count; - int byte_count; - int sge_count; - u64 sgl_offset; + int page_no, ch_count, ret; struct rpcrdma_read_chunk *ch; - struct svc_rdma_op_ctxt *ctxt = NULL; - struct svc_rdma_req_map *rpl_map; - struct svc_rdma_req_map *chl_map; + u32 page_offset, byte_count; + u64 rs_offset; + rdma_reader_fn reader; /* If no read list is present, return 0 */ ch = svc_rdma_get_read_chunk(rmsgp); @@ -408,122 +384,55 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, if (ch_count > RPCSVC_MAXPAGES) return -EINVAL; - /* Allocate temporary reply and chunk maps */ - rpl_map = svc_rdma_get_req_map(); - chl_map = svc_rdma_get_req_map(); + /* The request is completed when the RDMA_READs complete. The + * head context keeps all the pages that comprise the + * request. + */ + head->arg.head[0] = rqstp->rq_arg.head[0]; + head->arg.tail[0] = rqstp->rq_arg.tail[0]; + head->arg.pages = &head->pages[head->count]; + head->hdr_count = head->count; + head->arg.page_base = 0; + head->arg.page_len = 0; + head->arg.len = rqstp->rq_arg.len; + head->arg.buflen = rqstp->rq_arg.buflen; - if (!xprt->sc_frmr_pg_list_len) - sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, - rpl_map, chl_map, ch_count, - byte_count); + /* Use FRMR if supported */ + if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG) + reader = rdma_read_chunk_frmr; else - sge_count = fast_reg_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, - rpl_map, chl_map, ch_count, - byte_count); - if (sge_count < 0) { - err = -EIO; - goto out; - } - - sgl_offset = 0; - ch_no = 0; + reader = rdma_read_chunk_lcl; + page_no = 0; page_offset = 0; for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; - ch->rc_discrim != 0; ch++, ch_no++) { - u64 rs_offset; -next_sge: - ctxt = svc_rdma_get_context(xprt); - ctxt->direction = DMA_FROM_DEVICE; - ctxt->frmr = hdr_ctxt->frmr; - ctxt->read_hdr = NULL; - clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); - clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); + ch->rc_discrim != 0; ch++) { - /* Prepare READ WR */ - memset(&read_wr, 0, sizeof read_wr); - read_wr.wr_id = (unsigned long)ctxt; - read_wr.opcode = IB_WR_RDMA_READ; - ctxt->wr_op = read_wr.opcode; - read_wr.send_flags = IB_SEND_SIGNALED; - read_wr.wr.rdma.rkey = ntohl(ch->rc_target.rs_handle); xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset, &rs_offset); - read_wr.wr.rdma.remote_addr = rs_offset + sgl_offset; - read_wr.sg_list = ctxt->sge; - read_wr.num_sge = - rdma_read_max_sge(xprt, chl_map->ch[ch_no].count); - err = rdma_set_ctxt_sge(xprt, ctxt, hdr_ctxt->frmr, - &rpl_map->sge[chl_map->ch[ch_no].start], - &sgl_offset, - read_wr.num_sge); - if (err) { - svc_rdma_unmap_dma(ctxt); - svc_rdma_put_context(ctxt, 0); - goto out; - } - if (((ch+1)->rc_discrim == 0) && - (read_wr.num_sge == chl_map->ch[ch_no].count)) { - /* - * Mark the last RDMA_READ with a bit to - * indicate all RPC data has been fetched from - * the client and the RPC needs to be enqueued. - */ - set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); - if (hdr_ctxt->frmr) { - set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); - /* - * Invalidate the local MR used to map the data - * sink. - */ - if (xprt->sc_dev_caps & - SVCRDMA_DEVCAP_READ_W_INV) { - read_wr.opcode = - IB_WR_RDMA_READ_WITH_INV; - ctxt->wr_op = read_wr.opcode; - read_wr.ex.invalidate_rkey = - ctxt->frmr->mr->lkey; - } else { - /* Prepare INVALIDATE WR */ - memset(&inv_wr, 0, sizeof inv_wr); - inv_wr.opcode = IB_WR_LOCAL_INV; - inv_wr.send_flags = IB_SEND_SIGNALED; - inv_wr.ex.invalidate_rkey = - hdr_ctxt->frmr->mr->lkey; - read_wr.next = &inv_wr; - } - } - ctxt->read_hdr = hdr_ctxt; - } - /* Post the read */ - err = svc_rdma_send(xprt, &read_wr); - if (err) { - printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n", - err); - set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); - svc_rdma_unmap_dma(ctxt); - svc_rdma_put_context(ctxt, 0); - goto out; + byte_count = ntohl(ch->rc_target.rs_length); + + while (byte_count > 0) { + ret = reader(xprt, rqstp, head, + &page_no, &page_offset, + ntohl(ch->rc_target.rs_handle), + byte_count, rs_offset, + ((ch+1)->rc_discrim == 0) /* last */ + ); + if (ret < 0) + goto err; + byte_count -= ret; + rs_offset += ret; + head->arg.buflen += ret; } - atomic_inc(&rdma_stat_read); - - if (read_wr.num_sge < chl_map->ch[ch_no].count) { - chl_map->ch[ch_no].count -= read_wr.num_sge; - chl_map->ch[ch_no].start += read_wr.num_sge; - goto next_sge; - } - sgl_offset = 0; - err = 1; } - - out: - svc_rdma_put_req_map(rpl_map); - svc_rdma_put_req_map(chl_map); - + ret = 1; + err: /* Detach arg pages. svc_recv will replenish them */ - for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++) - rqstp->rq_pages[ch_no] = NULL; + for (page_no = 0; + &rqstp->rq_pages[page_no] < rqstp->rq_respages; page_no++) + rqstp->rq_pages[page_no] = NULL; - return err; + return ret; } static int rdma_read_complete(struct svc_rqst *rqstp, @@ -595,13 +504,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) struct svc_rdma_op_ctxt, dto_q); list_del_init(&ctxt->dto_q); - } - if (ctxt) { spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock); return rdma_read_complete(rqstp, ctxt); - } - - if (!list_empty(&rdma_xprt->sc_rq_dto_q)) { + } else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) { ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next, struct svc_rdma_op_ctxt, dto_q); @@ -621,7 +526,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) goto close_out; - BUG_ON(ret); goto out; } dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n", @@ -644,12 +548,11 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) } /* Read read-list data. */ - ret = rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt); + ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt); if (ret > 0) { /* read-list posted, defer until data received from client. */ goto defer; - } - if (ret < 0) { + } else if (ret < 0) { /* Post of read-list failed, free context. */ svc_rdma_put_context(ctxt, 1); return 0; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 7e024a51617e..49fd21a5c215 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved. * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -49,152 +50,6 @@ #define RPCDBG_FACILITY RPCDBG_SVCXPRT -/* Encode an XDR as an array of IB SGE - * - * Assumptions: - * - head[0] is physically contiguous. - * - tail[0] is physically contiguous. - * - pages[] is not physically or virtually contiguous and consists of - * PAGE_SIZE elements. - * - * Output: - * SGE[0] reserved for RCPRDMA header - * SGE[1] data from xdr->head[] - * SGE[2..sge_count-2] data from xdr->pages[] - * SGE[sge_count-1] data from xdr->tail. - * - * The max SGE we need is the length of the XDR / pagesize + one for - * head + one for tail + one for RPCRDMA header. Since RPCSVC_MAXPAGES - * reserves a page for both the request and the reply header, and this - * array is only concerned with the reply we are assured that we have - * on extra page for the RPCRMDA header. - */ -static int fast_reg_xdr(struct svcxprt_rdma *xprt, - struct xdr_buf *xdr, - struct svc_rdma_req_map *vec) -{ - int sge_no; - u32 sge_bytes; - u32 page_bytes; - u32 page_off; - int page_no = 0; - u8 *frva; - struct svc_rdma_fastreg_mr *frmr; - - frmr = svc_rdma_get_frmr(xprt); - if (IS_ERR(frmr)) - return -ENOMEM; - vec->frmr = frmr; - - /* Skip the RPCRDMA header */ - sge_no = 1; - - /* Map the head. */ - frva = (void *)((unsigned long)(xdr->head[0].iov_base) & PAGE_MASK); - vec->sge[sge_no].iov_base = xdr->head[0].iov_base; - vec->sge[sge_no].iov_len = xdr->head[0].iov_len; - vec->count = 2; - sge_no++; - - /* Map the XDR head */ - frmr->kva = frva; - frmr->direction = DMA_TO_DEVICE; - frmr->access_flags = 0; - frmr->map_len = PAGE_SIZE; - frmr->page_list_len = 1; - page_off = (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK; - frmr->page_list->page_list[page_no] = - ib_dma_map_page(xprt->sc_cm_id->device, - virt_to_page(xdr->head[0].iov_base), - page_off, - PAGE_SIZE - page_off, - DMA_TO_DEVICE); - if (ib_dma_mapping_error(xprt->sc_cm_id->device, - frmr->page_list->page_list[page_no])) - goto fatal_err; - atomic_inc(&xprt->sc_dma_used); - - /* Map the XDR page list */ - page_off = xdr->page_base; - page_bytes = xdr->page_len + page_off; - if (!page_bytes) - goto encode_tail; - - /* Map the pages */ - vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off; - vec->sge[sge_no].iov_len = page_bytes; - sge_no++; - while (page_bytes) { - struct page *page; - - page = xdr->pages[page_no++]; - sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off)); - page_bytes -= sge_bytes; - - frmr->page_list->page_list[page_no] = - ib_dma_map_page(xprt->sc_cm_id->device, - page, page_off, - sge_bytes, DMA_TO_DEVICE); - if (ib_dma_mapping_error(xprt->sc_cm_id->device, - frmr->page_list->page_list[page_no])) - goto fatal_err; - - atomic_inc(&xprt->sc_dma_used); - page_off = 0; /* reset for next time through loop */ - frmr->map_len += PAGE_SIZE; - frmr->page_list_len++; - } - vec->count++; - - encode_tail: - /* Map tail */ - if (0 == xdr->tail[0].iov_len) - goto done; - - vec->count++; - vec->sge[sge_no].iov_len = xdr->tail[0].iov_len; - - if (((unsigned long)xdr->tail[0].iov_base & PAGE_MASK) == - ((unsigned long)xdr->head[0].iov_base & PAGE_MASK)) { - /* - * If head and tail use the same page, we don't need - * to map it again. - */ - vec->sge[sge_no].iov_base = xdr->tail[0].iov_base; - } else { - void *va; - - /* Map another page for the tail */ - page_off = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK; - va = (void *)((unsigned long)xdr->tail[0].iov_base & PAGE_MASK); - vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off; - - frmr->page_list->page_list[page_no] = - ib_dma_map_page(xprt->sc_cm_id->device, virt_to_page(va), - page_off, - PAGE_SIZE, - DMA_TO_DEVICE); - if (ib_dma_mapping_error(xprt->sc_cm_id->device, - frmr->page_list->page_list[page_no])) - goto fatal_err; - atomic_inc(&xprt->sc_dma_used); - frmr->map_len += PAGE_SIZE; - frmr->page_list_len++; - } - - done: - if (svc_rdma_fastreg(xprt, frmr)) - goto fatal_err; - - return 0; - - fatal_err: - printk("svcrdma: Error fast registering memory for xprt %p\n", xprt); - vec->frmr = NULL; - svc_rdma_put_frmr(xprt, frmr); - return -EIO; -} - static int map_xdr(struct svcxprt_rdma *xprt, struct xdr_buf *xdr, struct svc_rdma_req_map *vec) @@ -208,9 +63,6 @@ static int map_xdr(struct svcxprt_rdma *xprt, BUG_ON(xdr->len != (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)); - if (xprt->sc_frmr_pg_list_len) - return fast_reg_xdr(xprt, xdr, vec); - /* Skip the first sge, this is for the RPCRDMA header */ sge_no = 1; @@ -282,8 +134,6 @@ static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt, } /* Assumptions: - * - We are using FRMR - * - or - * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE */ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, @@ -327,23 +177,16 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, sge_bytes = min_t(size_t, bc, vec->sge[xdr_sge_no].iov_len-sge_off); sge[sge_no].length = sge_bytes; - if (!vec->frmr) { - sge[sge_no].addr = - dma_map_xdr(xprt, &rqstp->rq_res, xdr_off, - sge_bytes, DMA_TO_DEVICE); - xdr_off += sge_bytes; - if (ib_dma_mapping_error(xprt->sc_cm_id->device, - sge[sge_no].addr)) - goto err; - atomic_inc(&xprt->sc_dma_used); - sge[sge_no].lkey = xprt->sc_dma_lkey; - } else { - sge[sge_no].addr = (unsigned long) - vec->sge[xdr_sge_no].iov_base + sge_off; - sge[sge_no].lkey = vec->frmr->mr->lkey; - } + sge[sge_no].addr = + dma_map_xdr(xprt, &rqstp->rq_res, xdr_off, + sge_bytes, DMA_TO_DEVICE); + xdr_off += sge_bytes; + if (ib_dma_mapping_error(xprt->sc_cm_id->device, + sge[sge_no].addr)) + goto err; + atomic_inc(&xprt->sc_dma_used); + sge[sge_no].lkey = xprt->sc_dma_lkey; ctxt->count++; - ctxt->frmr = vec->frmr; sge_off = 0; sge_no++; xdr_sge_no++; @@ -369,7 +212,6 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, return 0; err: svc_rdma_unmap_dma(ctxt); - svc_rdma_put_frmr(xprt, vec->frmr); svc_rdma_put_context(ctxt, 0); /* Fatal error, close transport */ return -EIO; @@ -397,10 +239,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt, res_ary = (struct rpcrdma_write_array *) &rdma_resp->rm_body.rm_chunks[1]; - if (vec->frmr) - max_write = vec->frmr->map_len; - else - max_write = xprt->sc_max_sge * PAGE_SIZE; + max_write = xprt->sc_max_sge * PAGE_SIZE; /* Write chunks start at the pagelist */ for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0; @@ -472,10 +311,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt, res_ary = (struct rpcrdma_write_array *) &rdma_resp->rm_body.rm_chunks[2]; - if (vec->frmr) - max_write = vec->frmr->map_len; - else - max_write = xprt->sc_max_sge * PAGE_SIZE; + max_write = xprt->sc_max_sge * PAGE_SIZE; /* xdr offset starts at RPC message */ nchunks = ntohl(arg_ary->wc_nchunks); @@ -545,7 +381,6 @@ static int send_reply(struct svcxprt_rdma *rdma, int byte_count) { struct ib_send_wr send_wr; - struct ib_send_wr inv_wr; int sge_no; int sge_bytes; int page_no; @@ -559,7 +394,6 @@ static int send_reply(struct svcxprt_rdma *rdma, "svcrdma: could not post a receive buffer, err=%d." "Closing transport %p.\n", ret, rdma); set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); - svc_rdma_put_frmr(rdma, vec->frmr); svc_rdma_put_context(ctxt, 0); return -ENOTCONN; } @@ -567,11 +401,6 @@ static int send_reply(struct svcxprt_rdma *rdma, /* Prepare the context */ ctxt->pages[0] = page; ctxt->count = 1; - ctxt->frmr = vec->frmr; - if (vec->frmr) - set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); - else - clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); /* Prepare the SGE for the RPCRDMA Header */ ctxt->sge[0].lkey = rdma->sc_dma_lkey; @@ -590,21 +419,15 @@ static int send_reply(struct svcxprt_rdma *rdma, int xdr_off = 0; sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); byte_count -= sge_bytes; - if (!vec->frmr) { - ctxt->sge[sge_no].addr = - dma_map_xdr(rdma, &rqstp->rq_res, xdr_off, - sge_bytes, DMA_TO_DEVICE); - xdr_off += sge_bytes; - if (ib_dma_mapping_error(rdma->sc_cm_id->device, - ctxt->sge[sge_no].addr)) - goto err; - atomic_inc(&rdma->sc_dma_used); - ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey; - } else { - ctxt->sge[sge_no].addr = (unsigned long) - vec->sge[sge_no].iov_base; - ctxt->sge[sge_no].lkey = vec->frmr->mr->lkey; - } + ctxt->sge[sge_no].addr = + dma_map_xdr(rdma, &rqstp->rq_res, xdr_off, + sge_bytes, DMA_TO_DEVICE); + xdr_off += sge_bytes; + if (ib_dma_mapping_error(rdma->sc_cm_id->device, + ctxt->sge[sge_no].addr)) + goto err; + atomic_inc(&rdma->sc_dma_used); + ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey; ctxt->sge[sge_no].length = sge_bytes; } BUG_ON(byte_count != 0); @@ -627,6 +450,7 @@ static int send_reply(struct svcxprt_rdma *rdma, ctxt->sge[page_no+1].length = 0; } rqstp->rq_next_page = rqstp->rq_respages + 1; + BUG_ON(sge_no > rdma->sc_max_sge); memset(&send_wr, 0, sizeof send_wr); ctxt->wr_op = IB_WR_SEND; @@ -635,15 +459,6 @@ static int send_reply(struct svcxprt_rdma *rdma, send_wr.num_sge = sge_no; send_wr.opcode = IB_WR_SEND; send_wr.send_flags = IB_SEND_SIGNALED; - if (vec->frmr) { - /* Prepare INVALIDATE WR */ - memset(&inv_wr, 0, sizeof inv_wr); - inv_wr.opcode = IB_WR_LOCAL_INV; - inv_wr.send_flags = IB_SEND_SIGNALED; - inv_wr.ex.invalidate_rkey = - vec->frmr->mr->lkey; - send_wr.next = &inv_wr; - } ret = svc_rdma_send(rdma, &send_wr); if (ret) @@ -653,7 +468,6 @@ static int send_reply(struct svcxprt_rdma *rdma, err: svc_rdma_unmap_dma(ctxt); - svc_rdma_put_frmr(rdma, vec->frmr); svc_rdma_put_context(ctxt, 1); return -EIO; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 25688fa2207f..e7323fbbd348 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved. * Copyright (c) 2005-2007 Network Appliance, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -65,6 +66,7 @@ static void dto_tasklet_func(unsigned long data); static void svc_rdma_detach(struct svc_xprt *xprt); static void svc_rdma_free(struct svc_xprt *xprt); static int svc_rdma_has_wspace(struct svc_xprt *xprt); +static int svc_rdma_secure_port(struct svc_rqst *); static void rq_cq_reap(struct svcxprt_rdma *xprt); static void sq_cq_reap(struct svcxprt_rdma *xprt); @@ -82,6 +84,7 @@ static struct svc_xprt_ops svc_rdma_ops = { .xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr, .xpo_has_wspace = svc_rdma_has_wspace, .xpo_accept = svc_rdma_accept, + .xpo_secure_port = svc_rdma_secure_port, }; struct svc_xprt_class svc_rdma_class = { @@ -160,7 +163,6 @@ struct svc_rdma_req_map *svc_rdma_get_req_map(void) schedule_timeout_uninterruptible(msecs_to_jiffies(500)); } map->count = 0; - map->frmr = NULL; return map; } @@ -336,22 +338,21 @@ static void process_context(struct svcxprt_rdma *xprt, switch (ctxt->wr_op) { case IB_WR_SEND: - if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags)) - svc_rdma_put_frmr(xprt, ctxt->frmr); + BUG_ON(ctxt->frmr); svc_rdma_put_context(ctxt, 1); break; case IB_WR_RDMA_WRITE: + BUG_ON(ctxt->frmr); svc_rdma_put_context(ctxt, 0); break; case IB_WR_RDMA_READ: case IB_WR_RDMA_READ_WITH_INV: + svc_rdma_put_frmr(xprt, ctxt->frmr); if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; BUG_ON(!read_hdr); - if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags)) - svc_rdma_put_frmr(xprt, ctxt->frmr); spin_lock_bh(&xprt->sc_rq_dto_lock); set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); list_add_tail(&read_hdr->dto_q, @@ -363,6 +364,7 @@ static void process_context(struct svcxprt_rdma *xprt, break; default: + BUG_ON(1); printk(KERN_ERR "svcrdma: unexpected completion type, " "opcode=%d\n", ctxt->wr_op); @@ -378,29 +380,42 @@ static void process_context(struct svcxprt_rdma *xprt, static void sq_cq_reap(struct svcxprt_rdma *xprt) { struct svc_rdma_op_ctxt *ctxt = NULL; - struct ib_wc wc; + struct ib_wc wc_a[6]; + struct ib_wc *wc; struct ib_cq *cq = xprt->sc_sq_cq; int ret; + memset(wc_a, 0, sizeof(wc_a)); + if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) return; ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); atomic_inc(&rdma_stat_sq_poll); - while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { - if (wc.status != IB_WC_SUCCESS) - /* Close the transport */ - set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); + while ((ret = ib_poll_cq(cq, ARRAY_SIZE(wc_a), wc_a)) > 0) { + int i; - /* Decrement used SQ WR count */ - atomic_dec(&xprt->sc_sq_count); - wake_up(&xprt->sc_send_wait); + for (i = 0; i < ret; i++) { + wc = &wc_a[i]; + if (wc->status != IB_WC_SUCCESS) { + dprintk("svcrdma: sq wc err status %d\n", + wc->status); - ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; - if (ctxt) - process_context(xprt, ctxt); + /* Close the transport */ + set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); + } - svc_xprt_put(&xprt->sc_xprt); + /* Decrement used SQ WR count */ + atomic_dec(&xprt->sc_sq_count); + wake_up(&xprt->sc_send_wait); + + ctxt = (struct svc_rdma_op_ctxt *) + (unsigned long)wc->wr_id; + if (ctxt) + process_context(xprt, ctxt); + + svc_xprt_put(&xprt->sc_xprt); + } } if (ctxt) @@ -993,7 +1008,11 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) need_dma_mr = 0; break; case RDMA_TRANSPORT_IB: - if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) { + if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) { + need_dma_mr = 1; + dma_mr_acc = IB_ACCESS_LOCAL_WRITE; + } else if (!(devattr.device_cap_flags & + IB_DEVICE_LOCAL_DMA_LKEY)) { need_dma_mr = 1; dma_mr_acc = IB_ACCESS_LOCAL_WRITE; } else @@ -1190,14 +1209,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt) container_of(xprt, struct svcxprt_rdma, sc_xprt); /* - * If there are fewer SQ WR available than required to send a - * simple response, return false. - */ - if ((rdma->sc_sq_depth - atomic_read(&rdma->sc_sq_count) < 3)) - return 0; - - /* - * ...or there are already waiters on the SQ, + * If there are already waiters on the SQ, * return false. */ if (waitqueue_active(&rdma->sc_send_wait)) @@ -1207,6 +1219,11 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt) return 1; } +static int svc_rdma_secure_port(struct svc_rqst *rqstp) +{ + return 1; +} + /* * Attempt to register the kvec representing the RPC memory with the * device. diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 1eb9c468d0c9..66f91f0d071a 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -149,6 +149,11 @@ static struct ctl_table sunrpc_table[] = { #endif +#define RPCRDMA_BIND_TO (60U * HZ) +#define RPCRDMA_INIT_REEST_TO (5U * HZ) +#define RPCRDMA_MAX_REEST_TO (30U * HZ) +#define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ) + static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ static void @@ -229,7 +234,6 @@ static void xprt_rdma_destroy(struct rpc_xprt *xprt) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); - int rc; dprintk("RPC: %s: called\n", __func__); @@ -238,10 +242,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) xprt_clear_connected(xprt); rpcrdma_buffer_destroy(&r_xprt->rx_buf); - rc = rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia); - if (rc) - dprintk("RPC: %s: rpcrdma_ep_destroy returned %i\n", - __func__, rc); + rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia); rpcrdma_ia_close(&r_xprt->rx_ia); xprt_rdma_free_addresses(xprt); @@ -289,9 +290,9 @@ xprt_setup_rdma(struct xprt_create *args) /* 60 second timeout, no retries */ xprt->timeout = &xprt_rdma_default_timeout; - xprt->bind_timeout = (60U * HZ); - xprt->reestablish_timeout = (5U * HZ); - xprt->idle_timeout = (5U * 60 * HZ); + xprt->bind_timeout = RPCRDMA_BIND_TO; + xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; + xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO; xprt->resvport = 0; /* privileged port not needed */ xprt->tsh_size = 0; /* RPC-RDMA handles framing */ @@ -391,7 +392,7 @@ out4: xprt_rdma_free_addresses(xprt); rc = -EINVAL; out3: - (void) rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); + rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); out2: rpcrdma_ia_close(&new_xprt->rx_ia); out1: @@ -436,10 +437,10 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) schedule_delayed_work(&r_xprt->rdma_connect, xprt->reestablish_timeout); xprt->reestablish_timeout <<= 1; - if (xprt->reestablish_timeout > (30 * HZ)) - xprt->reestablish_timeout = (30 * HZ); - else if (xprt->reestablish_timeout < (5 * HZ)) - xprt->reestablish_timeout = (5 * HZ); + if (xprt->reestablish_timeout > RPCRDMA_MAX_REEST_TO) + xprt->reestablish_timeout = RPCRDMA_MAX_REEST_TO; + else if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO) + xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; } else { schedule_delayed_work(&r_xprt->rdma_connect, 0); if (!RPC_IS_ASYNC(task)) @@ -447,23 +448,6 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) } } -static int -xprt_rdma_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task) -{ - struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); - int credits = atomic_read(&r_xprt->rx_buf.rb_credits); - - /* == RPC_CWNDSCALE @ init, but *after* setup */ - if (r_xprt->rx_buf.rb_cwndscale == 0UL) { - r_xprt->rx_buf.rb_cwndscale = xprt->cwnd; - dprintk("RPC: %s: cwndscale %lu\n", __func__, - r_xprt->rx_buf.rb_cwndscale); - BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0); - } - xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale; - return xprt_reserve_xprt_cong(xprt, task); -} - /* * The RDMA allocate/free functions need the task structure as a place * to hide the struct rpcrdma_req, which is necessary for the actual send/recv @@ -479,7 +463,8 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size) struct rpcrdma_req *req, *nreq; req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf); - BUG_ON(NULL == req); + if (req == NULL) + return NULL; if (size > req->rl_size) { dprintk("RPC: %s: size %zd too large for buffer[%zd]: " @@ -503,18 +488,6 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size) * If the allocation or registration fails, the RPC framework * will (doggedly) retry. */ - if (rpcx_to_rdmax(xprt)->rx_ia.ri_memreg_strategy == - RPCRDMA_BOUNCEBUFFERS) { - /* forced to "pure inline" */ - dprintk("RPC: %s: too much data (%zd) for inline " - "(r/w max %d/%d)\n", __func__, size, - rpcx_to_rdmad(xprt).inline_rsize, - rpcx_to_rdmad(xprt).inline_wsize); - size = req->rl_size; - rpc_exit(task, -EIO); /* fail the operation */ - rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++; - goto out; - } if (task->tk_flags & RPC_TASK_SWAPPER) nreq = kmalloc(sizeof *req + size, GFP_ATOMIC); else @@ -543,7 +516,6 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size) req = nreq; } dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); -out: req->rl_connect_cookie = 0; /* our reserved value */ return req->rl_xdr_buf; @@ -579,9 +551,7 @@ xprt_rdma_free(void *buffer) __func__, rep, (rep && rep->rr_func) ? " (with waiter)" : ""); /* - * Finish the deregistration. When using mw bind, this was - * begun in rpcrdma_reply_handler(). In all other modes, we - * do it here, in thread context. The process is considered + * Finish the deregistration. The process is considered * complete when the rr_func vector becomes NULL - this * was put in place during rpcrdma_reply_handler() - the wait * call below will not block if the dereg is "done". If @@ -590,12 +560,7 @@ xprt_rdma_free(void *buffer) for (i = 0; req->rl_nchunks;) { --req->rl_nchunks; i += rpcrdma_deregister_external( - &req->rl_segments[i], r_xprt, NULL); - } - - if (rep && wait_event_interruptible(rep->rr_unbind, !rep->rr_func)) { - rep->rr_func = NULL; /* abandon the callback */ - req->rl_reply = NULL; + &req->rl_segments[i], r_xprt); } if (req->rl_iov.length == 0) { /* see allocate above */ @@ -630,13 +595,12 @@ xprt_rdma_send_request(struct rpc_task *task) struct rpc_xprt *xprt = rqst->rq_xprt; struct rpcrdma_req *req = rpcr_to_rdmar(rqst); struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + int rc; - /* marshal the send itself */ - if (req->rl_niovs == 0 && rpcrdma_marshal_req(rqst) != 0) { - r_xprt->rx_stats.failed_marshal_count++; - dprintk("RPC: %s: rpcrdma_marshal_req failed\n", - __func__); - return -EIO; + if (req->rl_niovs == 0) { + rc = rpcrdma_marshal_req(rqst); + if (rc < 0) + goto failed_marshal; } if (req->rl_reply == NULL) /* e.g. reconnection */ @@ -660,6 +624,12 @@ xprt_rdma_send_request(struct rpc_task *task) rqst->rq_bytes_sent = 0; return 0; +failed_marshal: + r_xprt->rx_stats.failed_marshal_count++; + dprintk("RPC: %s: rpcrdma_marshal_req failed, status %i\n", + __func__, rc); + if (rc == -EIO) + return -EIO; drop_connection: xprt_disconnect_done(xprt); return -ENOTCONN; /* implies disconnect */ @@ -705,7 +675,7 @@ static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) */ static struct rpc_xprt_ops xprt_rdma_procs = { - .reserve_xprt = xprt_rdma_reserve_xprt, + .reserve_xprt = xprt_reserve_xprt_cong, .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */ .alloc_slot = xprt_alloc_slot, .release_request = xprt_release_rqst_cong, /* ditto */ diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 93726560eaa8..13dbd1c389ff 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -48,8 +48,8 @@ */ #include <linux/interrupt.h> -#include <linux/pci.h> /* for Tavor hack below */ #include <linux/slab.h> +#include <asm/bitops.h> #include "xprt_rdma.h" @@ -142,98 +142,139 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context) } } -static inline -void rpcrdma_event_process(struct ib_wc *wc) +static void +rpcrdma_sendcq_process_wc(struct ib_wc *wc) { - struct rpcrdma_mw *frmr; - struct rpcrdma_rep *rep = - (struct rpcrdma_rep *)(unsigned long) wc->wr_id; + struct rpcrdma_mw *frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; - dprintk("RPC: %s: event rep %p status %X opcode %X length %u\n", - __func__, rep, wc->status, wc->opcode, wc->byte_len); + dprintk("RPC: %s: frmr %p status %X opcode %d\n", + __func__, frmr, wc->status, wc->opcode); - if (!rep) /* send or bind completion that we don't care about */ + if (wc->wr_id == 0ULL) return; - - if (IB_WC_SUCCESS != wc->status) { - dprintk("RPC: %s: WC opcode %d status %X, connection lost\n", - __func__, wc->opcode, wc->status); - rep->rr_len = ~0U; - if (wc->opcode != IB_WC_FAST_REG_MR && wc->opcode != IB_WC_LOCAL_INV) - rpcrdma_schedule_tasklet(rep); + if (wc->status != IB_WC_SUCCESS) return; - } - switch (wc->opcode) { - case IB_WC_FAST_REG_MR: - frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; + if (wc->opcode == IB_WC_FAST_REG_MR) frmr->r.frmr.state = FRMR_IS_VALID; - break; - case IB_WC_LOCAL_INV: - frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; + else if (wc->opcode == IB_WC_LOCAL_INV) frmr->r.frmr.state = FRMR_IS_INVALID; - break; - case IB_WC_RECV: - rep->rr_len = wc->byte_len; - ib_dma_sync_single_for_cpu( - rdmab_to_ia(rep->rr_buffer)->ri_id->device, - rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE); - /* Keep (only) the most recent credits, after check validity */ - if (rep->rr_len >= 16) { - struct rpcrdma_msg *p = - (struct rpcrdma_msg *) rep->rr_base; - unsigned int credits = ntohl(p->rm_credit); - if (credits == 0) { - dprintk("RPC: %s: server" - " dropped credits to 0!\n", __func__); - /* don't deadlock */ - credits = 1; - } else if (credits > rep->rr_buffer->rb_max_requests) { - dprintk("RPC: %s: server" - " over-crediting: %d (%d)\n", - __func__, credits, - rep->rr_buffer->rb_max_requests); - credits = rep->rr_buffer->rb_max_requests; - } - atomic_set(&rep->rr_buffer->rb_credits, credits); - } - /* fall through */ - case IB_WC_BIND_MW: - rpcrdma_schedule_tasklet(rep); - break; - default: - dprintk("RPC: %s: unexpected WC event %X\n", - __func__, wc->opcode); - break; - } } -static inline int -rpcrdma_cq_poll(struct ib_cq *cq) +static int +rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep) { - struct ib_wc wc; - int rc; + struct ib_wc *wcs; + int budget, count, rc; - for (;;) { - rc = ib_poll_cq(cq, 1, &wc); - if (rc < 0) { - dprintk("RPC: %s: ib_poll_cq failed %i\n", - __func__, rc); + budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE; + do { + wcs = ep->rep_send_wcs; + + rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs); + if (rc <= 0) return rc; - } - if (rc == 0) - break; - rpcrdma_event_process(&wc); + count = rc; + while (count-- > 0) + rpcrdma_sendcq_process_wc(wcs++); + } while (rc == RPCRDMA_POLLSIZE && --budget); + return 0; +} + +/* + * Handle send, fast_reg_mr, and local_inv completions. + * + * Send events are typically suppressed and thus do not result + * in an upcall. Occasionally one is signaled, however. This + * prevents the provider's completion queue from wrapping and + * losing a completion. + */ +static void +rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context) +{ + struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context; + int rc; + + rc = rpcrdma_sendcq_poll(cq, ep); + if (rc) { + dprintk("RPC: %s: ib_poll_cq failed: %i\n", + __func__, rc); + return; } + rc = ib_req_notify_cq(cq, + IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); + if (rc == 0) + return; + if (rc < 0) { + dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", + __func__, rc); + return; + } + + rpcrdma_sendcq_poll(cq, ep); +} + +static void +rpcrdma_recvcq_process_wc(struct ib_wc *wc) +{ + struct rpcrdma_rep *rep = + (struct rpcrdma_rep *)(unsigned long)wc->wr_id; + + dprintk("RPC: %s: rep %p status %X opcode %X length %u\n", + __func__, rep, wc->status, wc->opcode, wc->byte_len); + + if (wc->status != IB_WC_SUCCESS) { + rep->rr_len = ~0U; + goto out_schedule; + } + if (wc->opcode != IB_WC_RECV) + return; + + rep->rr_len = wc->byte_len; + ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device, + rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE); + + if (rep->rr_len >= 16) { + struct rpcrdma_msg *p = (struct rpcrdma_msg *)rep->rr_base; + unsigned int credits = ntohl(p->rm_credit); + + if (credits == 0) + credits = 1; /* don't deadlock */ + else if (credits > rep->rr_buffer->rb_max_requests) + credits = rep->rr_buffer->rb_max_requests; + atomic_set(&rep->rr_buffer->rb_credits, credits); + } + +out_schedule: + rpcrdma_schedule_tasklet(rep); +} + +static int +rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep) +{ + struct ib_wc *wcs; + int budget, count, rc; + + budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE; + do { + wcs = ep->rep_recv_wcs; + + rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs); + if (rc <= 0) + return rc; + + count = rc; + while (count-- > 0) + rpcrdma_recvcq_process_wc(wcs++); + } while (rc == RPCRDMA_POLLSIZE && --budget); return 0; } /* - * rpcrdma_cq_event_upcall + * Handle receive completions. * - * This upcall handles recv, send, bind and unbind events. * It is reentrant but processes single events in order to maintain * ordering of receives to keep server credits. * @@ -242,26 +283,31 @@ rpcrdma_cq_poll(struct ib_cq *cq) * connection shutdown. That is, the structures required for * the completion of the reply handler must remain intact until * all memory has been reclaimed. - * - * Note that send events are suppressed and do not result in an upcall. */ static void -rpcrdma_cq_event_upcall(struct ib_cq *cq, void *context) +rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context) { + struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context; int rc; - rc = rpcrdma_cq_poll(cq); - if (rc) + rc = rpcrdma_recvcq_poll(cq, ep); + if (rc) { + dprintk("RPC: %s: ib_poll_cq failed: %i\n", + __func__, rc); return; + } - rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); - if (rc) { - dprintk("RPC: %s: ib_req_notify_cq failed %i\n", + rc = ib_req_notify_cq(cq, + IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); + if (rc == 0) + return; + if (rc < 0) { + dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", __func__, rc); return; } - rpcrdma_cq_poll(cq); + rpcrdma_recvcq_poll(cq, ep); } #ifdef RPC_DEBUG @@ -493,54 +539,32 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey; } - switch (memreg) { - case RPCRDMA_MEMWINDOWS: - case RPCRDMA_MEMWINDOWS_ASYNC: - if (!(devattr.device_cap_flags & IB_DEVICE_MEM_WINDOW)) { - dprintk("RPC: %s: MEMWINDOWS registration " - "specified but not supported by adapter, " - "using slower RPCRDMA_REGISTER\n", - __func__); - memreg = RPCRDMA_REGISTER; - } - break; - case RPCRDMA_MTHCAFMR: - if (!ia->ri_id->device->alloc_fmr) { -#if RPCRDMA_PERSISTENT_REGISTRATION - dprintk("RPC: %s: MTHCAFMR registration " - "specified but not supported by adapter, " - "using riskier RPCRDMA_ALLPHYSICAL\n", - __func__); - memreg = RPCRDMA_ALLPHYSICAL; -#else - dprintk("RPC: %s: MTHCAFMR registration " - "specified but not supported by adapter, " - "using slower RPCRDMA_REGISTER\n", - __func__); - memreg = RPCRDMA_REGISTER; -#endif - } - break; - case RPCRDMA_FRMR: + if (memreg == RPCRDMA_FRMR) { /* Requires both frmr reg and local dma lkey */ if ((devattr.device_cap_flags & (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) { -#if RPCRDMA_PERSISTENT_REGISTRATION dprintk("RPC: %s: FRMR registration " - "specified but not supported by adapter, " - "using riskier RPCRDMA_ALLPHYSICAL\n", - __func__); + "not supported by HCA\n", __func__); + memreg = RPCRDMA_MTHCAFMR; + } else { + /* Mind the ia limit on FRMR page list depth */ + ia->ri_max_frmr_depth = min_t(unsigned int, + RPCRDMA_MAX_DATA_SEGS, + devattr.max_fast_reg_page_list_len); + } + } + if (memreg == RPCRDMA_MTHCAFMR) { + if (!ia->ri_id->device->alloc_fmr) { + dprintk("RPC: %s: MTHCAFMR registration " + "not supported by HCA\n", __func__); +#if RPCRDMA_PERSISTENT_REGISTRATION memreg = RPCRDMA_ALLPHYSICAL; #else - dprintk("RPC: %s: FRMR registration " - "specified but not supported by adapter, " - "using slower RPCRDMA_REGISTER\n", - __func__); - memreg = RPCRDMA_REGISTER; + rc = -ENOMEM; + goto out2; #endif } - break; } /* @@ -552,8 +576,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) * adapter. */ switch (memreg) { - case RPCRDMA_BOUNCEBUFFERS: - case RPCRDMA_REGISTER: case RPCRDMA_FRMR: break; #if RPCRDMA_PERSISTENT_REGISTRATION @@ -563,30 +585,26 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) IB_ACCESS_REMOTE_READ; goto register_setup; #endif - case RPCRDMA_MEMWINDOWS_ASYNC: - case RPCRDMA_MEMWINDOWS: - mem_priv = IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_MW_BIND; - goto register_setup; case RPCRDMA_MTHCAFMR: if (ia->ri_have_dma_lkey) break; mem_priv = IB_ACCESS_LOCAL_WRITE; +#if RPCRDMA_PERSISTENT_REGISTRATION register_setup: +#endif ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); if (IS_ERR(ia->ri_bind_mem)) { printk(KERN_ALERT "%s: ib_get_dma_mr for " - "phys register failed with %lX\n\t" - "Will continue with degraded performance\n", + "phys register failed with %lX\n", __func__, PTR_ERR(ia->ri_bind_mem)); - memreg = RPCRDMA_REGISTER; - ia->ri_bind_mem = NULL; + rc = -ENOMEM; + goto out2; } break; default: - printk(KERN_ERR "%s: invalid memory registration mode %d\n", - __func__, memreg); - rc = -EINVAL; + printk(KERN_ERR "RPC: Unsupported memory " + "registration mode: %d\n", memreg); + rc = -ENOMEM; goto out2; } dprintk("RPC: %s: memory registration strategy is %d\n", @@ -640,6 +658,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) { struct ib_device_attr devattr; + struct ib_cq *sendcq, *recvcq; int rc, err; rc = ib_query_device(ia->ri_id->device, &devattr); @@ -659,32 +678,42 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ep->rep_attr.srq = NULL; ep->rep_attr.cap.max_send_wr = cdata->max_requests; switch (ia->ri_memreg_strategy) { - case RPCRDMA_FRMR: + case RPCRDMA_FRMR: { + int depth = 7; + /* Add room for frmr register and invalidate WRs. * 1. FRMR reg WR for head * 2. FRMR invalidate WR for head - * 3. FRMR reg WR for pagelist - * 4. FRMR invalidate WR for pagelist + * 3. N FRMR reg WRs for pagelist + * 4. N FRMR invalidate WRs for pagelist * 5. FRMR reg WR for tail * 6. FRMR invalidate WR for tail * 7. The RDMA_SEND WR */ - ep->rep_attr.cap.max_send_wr *= 7; + + /* Calculate N if the device max FRMR depth is smaller than + * RPCRDMA_MAX_DATA_SEGS. + */ + if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) { + int delta = RPCRDMA_MAX_DATA_SEGS - + ia->ri_max_frmr_depth; + + do { + depth += 2; /* FRMR reg + invalidate */ + delta -= ia->ri_max_frmr_depth; + } while (delta > 0); + + } + ep->rep_attr.cap.max_send_wr *= depth; if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) { - cdata->max_requests = devattr.max_qp_wr / 7; + cdata->max_requests = devattr.max_qp_wr / depth; if (!cdata->max_requests) return -EINVAL; - ep->rep_attr.cap.max_send_wr = cdata->max_requests * 7; + ep->rep_attr.cap.max_send_wr = cdata->max_requests * + depth; } break; - case RPCRDMA_MEMWINDOWS_ASYNC: - case RPCRDMA_MEMWINDOWS: - /* Add room for mw_binds+unbinds - overkill! */ - ep->rep_attr.cap.max_send_wr++; - ep->rep_attr.cap.max_send_wr *= (2 * RPCRDMA_MAX_SEGS); - if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) - return -EINVAL; - break; + } default: break; } @@ -705,46 +734,51 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ep->rep_attr.cap.max_recv_sge); /* set trigger for requesting send completion */ - ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 /* - 1*/; - switch (ia->ri_memreg_strategy) { - case RPCRDMA_MEMWINDOWS_ASYNC: - case RPCRDMA_MEMWINDOWS: - ep->rep_cqinit -= RPCRDMA_MAX_SEGS; - break; - default: - break; - } + ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; if (ep->rep_cqinit <= 2) ep->rep_cqinit = 0; INIT_CQCOUNT(ep); ep->rep_ia = ia; init_waitqueue_head(&ep->rep_connect_wait); + INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); - /* - * Create a single cq for receive dto and mw_bind (only ever - * care about unbind, really). Send completions are suppressed. - * Use single threaded tasklet upcalls to maintain ordering. - */ - ep->rep_cq = ib_create_cq(ia->ri_id->device, rpcrdma_cq_event_upcall, - rpcrdma_cq_async_error_upcall, NULL, - ep->rep_attr.cap.max_recv_wr + + sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall, + rpcrdma_cq_async_error_upcall, ep, ep->rep_attr.cap.max_send_wr + 1, 0); - if (IS_ERR(ep->rep_cq)) { - rc = PTR_ERR(ep->rep_cq); - dprintk("RPC: %s: ib_create_cq failed: %i\n", + if (IS_ERR(sendcq)) { + rc = PTR_ERR(sendcq); + dprintk("RPC: %s: failed to create send CQ: %i\n", __func__, rc); goto out1; } - rc = ib_req_notify_cq(ep->rep_cq, IB_CQ_NEXT_COMP); + rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP); + if (rc) { + dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", + __func__, rc); + goto out2; + } + + recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall, + rpcrdma_cq_async_error_upcall, ep, + ep->rep_attr.cap.max_recv_wr + 1, 0); + if (IS_ERR(recvcq)) { + rc = PTR_ERR(recvcq); + dprintk("RPC: %s: failed to create recv CQ: %i\n", + __func__, rc); + goto out2; + } + + rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP); if (rc) { dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", __func__, rc); + ib_destroy_cq(recvcq); goto out2; } - ep->rep_attr.send_cq = ep->rep_cq; - ep->rep_attr.recv_cq = ep->rep_cq; + ep->rep_attr.send_cq = sendcq; + ep->rep_attr.recv_cq = recvcq; /* Initialize cma parameters */ @@ -754,9 +788,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, /* Client offers RDMA Read but does not initiate */ ep->rep_remote_cma.initiator_depth = 0; - if (ia->ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS) - ep->rep_remote_cma.responder_resources = 0; - else if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */ + if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */ ep->rep_remote_cma.responder_resources = 32; else ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; @@ -768,7 +800,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, return 0; out2: - err = ib_destroy_cq(ep->rep_cq); + err = ib_destroy_cq(sendcq); if (err) dprintk("RPC: %s: ib_destroy_cq returned %i\n", __func__, err); @@ -782,11 +814,8 @@ out1: * Disconnect and destroy endpoint. After this, the only * valid operations on the ep are to free it (if dynamically * allocated) or re-create it. - * - * The caller's error handling must be sure to not leak the endpoint - * if this function fails. */ -int +void rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) { int rc; @@ -794,6 +823,8 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) dprintk("RPC: %s: entering, connected is %d\n", __func__, ep->rep_connected); + cancel_delayed_work_sync(&ep->rep_connect_worker); + if (ia->ri_id->qp) { rc = rpcrdma_ep_disconnect(ep, ia); if (rc) @@ -809,13 +840,17 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) ep->rep_pad_mr = NULL; } - rpcrdma_clean_cq(ep->rep_cq); - rc = ib_destroy_cq(ep->rep_cq); + rpcrdma_clean_cq(ep->rep_attr.recv_cq); + rc = ib_destroy_cq(ep->rep_attr.recv_cq); if (rc) dprintk("RPC: %s: ib_destroy_cq returned %i\n", __func__, rc); - return rc; + rpcrdma_clean_cq(ep->rep_attr.send_cq); + rc = ib_destroy_cq(ep->rep_attr.send_cq); + if (rc) + dprintk("RPC: %s: ib_destroy_cq returned %i\n", + __func__, rc); } /* @@ -831,17 +866,20 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) if (ep->rep_connected != 0) { struct rpcrdma_xprt *xprt; retry: + dprintk("RPC: %s: reconnecting...\n", __func__); rc = rpcrdma_ep_disconnect(ep, ia); if (rc && rc != -ENOTCONN) dprintk("RPC: %s: rpcrdma_ep_disconnect" " status %i\n", __func__, rc); - rpcrdma_clean_cq(ep->rep_cq); + + rpcrdma_clean_cq(ep->rep_attr.recv_cq); + rpcrdma_clean_cq(ep->rep_attr.send_cq); xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); id = rpcrdma_create_id(xprt, ia, (struct sockaddr *)&xprt->rx_data.addr); if (IS_ERR(id)) { - rc = PTR_ERR(id); + rc = -EHOSTUNREACH; goto out; } /* TEMP TEMP TEMP - fail if new device: @@ -855,35 +893,32 @@ retry: printk("RPC: %s: can't reconnect on " "different device!\n", __func__); rdma_destroy_id(id); - rc = -ENETDOWN; + rc = -ENETUNREACH; goto out; } /* END TEMP */ + rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr); + if (rc) { + dprintk("RPC: %s: rdma_create_qp failed %i\n", + __func__, rc); + rdma_destroy_id(id); + rc = -ENETUNREACH; + goto out; + } rdma_destroy_qp(ia->ri_id); rdma_destroy_id(ia->ri_id); ia->ri_id = id; + } else { + dprintk("RPC: %s: connecting...\n", __func__); + rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); + if (rc) { + dprintk("RPC: %s: rdma_create_qp failed %i\n", + __func__, rc); + /* do not update ep->rep_connected */ + return -ENETUNREACH; + } } - rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); - if (rc) { - dprintk("RPC: %s: rdma_create_qp failed %i\n", - __func__, rc); - goto out; - } - -/* XXX Tavor device performs badly with 2K MTU! */ -if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { - struct pci_dev *pcid = to_pci_dev(ia->ri_id->device->dma_device); - if (pcid->device == PCI_DEVICE_ID_MELLANOX_TAVOR && - (pcid->vendor == PCI_VENDOR_ID_MELLANOX || - pcid->vendor == PCI_VENDOR_ID_TOPSPIN)) { - struct ib_qp_attr attr = { - .path_mtu = IB_MTU_1024 - }; - rc = ib_modify_qp(ia->ri_id->qp, &attr, IB_QP_PATH_MTU); - } -} - ep->rep_connected = 0; rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); @@ -944,7 +979,8 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) { int rc; - rpcrdma_clean_cq(ep->rep_cq); + rpcrdma_clean_cq(ep->rep_attr.recv_cq); + rpcrdma_clean_cq(ep->rep_attr.send_cq); rc = rdma_disconnect(ia->ri_id); if (!rc) { /* returns without wait if not connected */ @@ -967,7 +1003,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) { char *p; - size_t len; + size_t len, rlen, wlen; int i, rc; struct rpcrdma_mw *r; @@ -997,11 +1033,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * sizeof(struct rpcrdma_mw); break; - case RPCRDMA_MEMWINDOWS_ASYNC: - case RPCRDMA_MEMWINDOWS: - len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * - sizeof(struct rpcrdma_mw); - break; default: break; } @@ -1032,32 +1063,29 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, } p += cdata->padding; - /* - * Allocate the fmr's, or mw's for mw_bind chunk registration. - * We "cycle" the mw's in order to minimize rkey reuse, - * and also reduce unbind-to-bind collision. - */ INIT_LIST_HEAD(&buf->rb_mws); r = (struct rpcrdma_mw *)p; switch (ia->ri_memreg_strategy) { case RPCRDMA_FRMR: for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) { r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, - RPCRDMA_MAX_SEGS); + ia->ri_max_frmr_depth); if (IS_ERR(r->r.frmr.fr_mr)) { rc = PTR_ERR(r->r.frmr.fr_mr); dprintk("RPC: %s: ib_alloc_fast_reg_mr" " failed %i\n", __func__, rc); goto out; } - r->r.frmr.fr_pgl = - ib_alloc_fast_reg_page_list(ia->ri_id->device, - RPCRDMA_MAX_SEGS); + r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list( + ia->ri_id->device, + ia->ri_max_frmr_depth); if (IS_ERR(r->r.frmr.fr_pgl)) { rc = PTR_ERR(r->r.frmr.fr_pgl); dprintk("RPC: %s: " "ib_alloc_fast_reg_page_list " "failed %i\n", __func__, rc); + + ib_dereg_mr(r->r.frmr.fr_mr); goto out; } list_add(&r->mw_list, &buf->rb_mws); @@ -1082,21 +1110,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, ++r; } break; - case RPCRDMA_MEMWINDOWS_ASYNC: - case RPCRDMA_MEMWINDOWS: - /* Allocate one extra request's worth, for full cycling */ - for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { - r->r.mw = ib_alloc_mw(ia->ri_pd, IB_MW_TYPE_1); - if (IS_ERR(r->r.mw)) { - rc = PTR_ERR(r->r.mw); - dprintk("RPC: %s: ib_alloc_mw" - " failed %i\n", __func__, rc); - goto out; - } - list_add(&r->mw_list, &buf->rb_mws); - ++r; - } - break; default: break; } @@ -1105,16 +1118,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, * Allocate/init the request/reply buffers. Doing this * using kmalloc for now -- one for each buf. */ + wlen = 1 << fls(cdata->inline_wsize + sizeof(struct rpcrdma_req)); + rlen = 1 << fls(cdata->inline_rsize + sizeof(struct rpcrdma_rep)); + dprintk("RPC: %s: wlen = %zu, rlen = %zu\n", + __func__, wlen, rlen); + for (i = 0; i < buf->rb_max_requests; i++) { struct rpcrdma_req *req; struct rpcrdma_rep *rep; - len = cdata->inline_wsize + sizeof(struct rpcrdma_req); - /* RPC layer requests *double* size + 1K RPC_SLACK_SPACE! */ - /* Typical ~2400b, so rounding up saves work later */ - if (len < 4096) - len = 4096; - req = kmalloc(len, GFP_KERNEL); + req = kmalloc(wlen, GFP_KERNEL); if (req == NULL) { dprintk("RPC: %s: request buffer %d alloc" " failed\n", __func__, i); @@ -1126,16 +1139,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, buf->rb_send_bufs[i]->rl_buffer = buf; rc = rpcrdma_register_internal(ia, req->rl_base, - len - offsetof(struct rpcrdma_req, rl_base), + wlen - offsetof(struct rpcrdma_req, rl_base), &buf->rb_send_bufs[i]->rl_handle, &buf->rb_send_bufs[i]->rl_iov); if (rc) goto out; - buf->rb_send_bufs[i]->rl_size = len-sizeof(struct rpcrdma_req); + buf->rb_send_bufs[i]->rl_size = wlen - + sizeof(struct rpcrdma_req); - len = cdata->inline_rsize + sizeof(struct rpcrdma_rep); - rep = kmalloc(len, GFP_KERNEL); + rep = kmalloc(rlen, GFP_KERNEL); if (rep == NULL) { dprintk("RPC: %s: reply buffer %d alloc failed\n", __func__, i); @@ -1145,10 +1158,9 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, memset(rep, 0, sizeof(struct rpcrdma_rep)); buf->rb_recv_bufs[i] = rep; buf->rb_recv_bufs[i]->rr_buffer = buf; - init_waitqueue_head(&rep->rr_unbind); rc = rpcrdma_register_internal(ia, rep->rr_base, - len - offsetof(struct rpcrdma_rep, rr_base), + rlen - offsetof(struct rpcrdma_rep, rr_base), &buf->rb_recv_bufs[i]->rr_handle, &buf->rb_recv_bufs[i]->rr_iov); if (rc) @@ -1179,7 +1191,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) /* clean up in reverse order from create * 1. recv mr memory (mr free, then kfree) - * 1a. bind mw memory * 2. send mr memory (mr free, then kfree) * 3. padding (if any) [moved to rpcrdma_ep_destroy] * 4. arrays @@ -1194,41 +1205,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) kfree(buf->rb_recv_bufs[i]); } if (buf->rb_send_bufs && buf->rb_send_bufs[i]) { - while (!list_empty(&buf->rb_mws)) { - r = list_entry(buf->rb_mws.next, - struct rpcrdma_mw, mw_list); - list_del(&r->mw_list); - switch (ia->ri_memreg_strategy) { - case RPCRDMA_FRMR: - rc = ib_dereg_mr(r->r.frmr.fr_mr); - if (rc) - dprintk("RPC: %s:" - " ib_dereg_mr" - " failed %i\n", - __func__, rc); - ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); - break; - case RPCRDMA_MTHCAFMR: - rc = ib_dealloc_fmr(r->r.fmr); - if (rc) - dprintk("RPC: %s:" - " ib_dealloc_fmr" - " failed %i\n", - __func__, rc); - break; - case RPCRDMA_MEMWINDOWS_ASYNC: - case RPCRDMA_MEMWINDOWS: - rc = ib_dealloc_mw(r->r.mw); - if (rc) - dprintk("RPC: %s:" - " ib_dealloc_mw" - " failed %i\n", - __func__, rc); - break; - default: - break; - } - } rpcrdma_deregister_internal(ia, buf->rb_send_bufs[i]->rl_handle, &buf->rb_send_bufs[i]->rl_iov); @@ -1236,6 +1212,33 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) } } + while (!list_empty(&buf->rb_mws)) { + r = list_entry(buf->rb_mws.next, + struct rpcrdma_mw, mw_list); + list_del(&r->mw_list); + switch (ia->ri_memreg_strategy) { + case RPCRDMA_FRMR: + rc = ib_dereg_mr(r->r.frmr.fr_mr); + if (rc) + dprintk("RPC: %s:" + " ib_dereg_mr" + " failed %i\n", + __func__, rc); + ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); + break; + case RPCRDMA_MTHCAFMR: + rc = ib_dealloc_fmr(r->r.fmr); + if (rc) + dprintk("RPC: %s:" + " ib_dealloc_fmr" + " failed %i\n", + __func__, rc); + break; + default: + break; + } + } + kfree(buf->rb_pool); } @@ -1299,21 +1302,17 @@ rpcrdma_buffer_put(struct rpcrdma_req *req) int i; unsigned long flags; - BUG_ON(req->rl_nchunks != 0); spin_lock_irqsave(&buffers->rb_lock, flags); buffers->rb_send_bufs[--buffers->rb_send_index] = req; req->rl_niovs = 0; if (req->rl_reply) { buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply; - init_waitqueue_head(&req->rl_reply->rr_unbind); req->rl_reply->rr_func = NULL; req->rl_reply = NULL; } switch (ia->ri_memreg_strategy) { case RPCRDMA_FRMR: case RPCRDMA_MTHCAFMR: - case RPCRDMA_MEMWINDOWS_ASYNC: - case RPCRDMA_MEMWINDOWS: /* * Cycle mw's back in reverse order, and "spin" them. * This delays and scrambles reuse as much as possible. @@ -1358,8 +1357,7 @@ rpcrdma_recv_buffer_get(struct rpcrdma_req *req) /* * Put reply buffers back into pool when not attached to - * request. This happens in error conditions, and when - * aborting unbinds. Pre-decrement counter/array index. + * request. This happens in error conditions. */ void rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) @@ -1498,8 +1496,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, seg1->mr_offset -= pageoff; /* start of page */ seg1->mr_len += pageoff; len = -pageoff; - if (*nsegs > RPCRDMA_MAX_DATA_SEGS) - *nsegs = RPCRDMA_MAX_DATA_SEGS; + if (*nsegs > ia->ri_max_frmr_depth) + *nsegs = ia->ri_max_frmr_depth; for (page_no = i = 0; i < *nsegs;) { rpcrdma_map_one(ia, seg, writing); pa = seg->mr_dma; @@ -1536,10 +1534,6 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, } else post_wr = &frmr_wr; - /* Bump the key */ - key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF); - ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key); - /* Prepare FRMR WR */ memset(&frmr_wr, 0, sizeof frmr_wr); frmr_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw; @@ -1550,7 +1544,16 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, frmr_wr.wr.fast_reg.page_list_len = page_no; frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT; - BUG_ON(frmr_wr.wr.fast_reg.length < len); + if (frmr_wr.wr.fast_reg.length < len) { + while (seg1->mr_nsegs--) + rpcrdma_unmap_one(ia, seg++); + return -EIO; + } + + /* Bump the key */ + key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF); + ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key); + frmr_wr.wr.fast_reg.access_flags = (writing ? IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : IB_ACCESS_REMOTE_READ); @@ -1661,135 +1664,6 @@ rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg, return rc; } -static int -rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg, - int *nsegs, int writing, struct rpcrdma_ia *ia, - struct rpcrdma_xprt *r_xprt) -{ - int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : - IB_ACCESS_REMOTE_READ); - struct ib_mw_bind param; - int rc; - - *nsegs = 1; - rpcrdma_map_one(ia, seg, writing); - param.bind_info.mr = ia->ri_bind_mem; - param.wr_id = 0ULL; /* no send cookie */ - param.bind_info.addr = seg->mr_dma; - param.bind_info.length = seg->mr_len; - param.send_flags = 0; - param.bind_info.mw_access_flags = mem_priv; - - DECR_CQCOUNT(&r_xprt->rx_ep); - rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, ¶m); - if (rc) { - dprintk("RPC: %s: failed ib_bind_mw " - "%u@0x%llx status %i\n", - __func__, seg->mr_len, - (unsigned long long)seg->mr_dma, rc); - rpcrdma_unmap_one(ia, seg); - } else { - seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey; - seg->mr_base = param.bind_info.addr; - seg->mr_nsegs = 1; - } - return rc; -} - -static int -rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg, - struct rpcrdma_ia *ia, - struct rpcrdma_xprt *r_xprt, void **r) -{ - struct ib_mw_bind param; - LIST_HEAD(l); - int rc; - - BUG_ON(seg->mr_nsegs != 1); - param.bind_info.mr = ia->ri_bind_mem; - param.bind_info.addr = 0ULL; /* unbind */ - param.bind_info.length = 0; - param.bind_info.mw_access_flags = 0; - if (*r) { - param.wr_id = (u64) (unsigned long) *r; - param.send_flags = IB_SEND_SIGNALED; - INIT_CQCOUNT(&r_xprt->rx_ep); - } else { - param.wr_id = 0ULL; - param.send_flags = 0; - DECR_CQCOUNT(&r_xprt->rx_ep); - } - rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, ¶m); - rpcrdma_unmap_one(ia, seg); - if (rc) - dprintk("RPC: %s: failed ib_(un)bind_mw," - " status %i\n", __func__, rc); - else - *r = NULL; /* will upcall on completion */ - return rc; -} - -static int -rpcrdma_register_default_external(struct rpcrdma_mr_seg *seg, - int *nsegs, int writing, struct rpcrdma_ia *ia) -{ - int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : - IB_ACCESS_REMOTE_READ); - struct rpcrdma_mr_seg *seg1 = seg; - struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS]; - int len, i, rc = 0; - - if (*nsegs > RPCRDMA_MAX_DATA_SEGS) - *nsegs = RPCRDMA_MAX_DATA_SEGS; - for (len = 0, i = 0; i < *nsegs;) { - rpcrdma_map_one(ia, seg, writing); - ipb[i].addr = seg->mr_dma; - ipb[i].size = seg->mr_len; - len += seg->mr_len; - ++seg; - ++i; - /* Check for holes */ - if ((i < *nsegs && offset_in_page(seg->mr_offset)) || - offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) - break; - } - seg1->mr_base = seg1->mr_dma; - seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd, - ipb, i, mem_priv, &seg1->mr_base); - if (IS_ERR(seg1->mr_chunk.rl_mr)) { - rc = PTR_ERR(seg1->mr_chunk.rl_mr); - dprintk("RPC: %s: failed ib_reg_phys_mr " - "%u@0x%llx (%d)... status %i\n", - __func__, len, - (unsigned long long)seg1->mr_dma, i, rc); - while (i--) - rpcrdma_unmap_one(ia, --seg); - } else { - seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey; - seg1->mr_nsegs = i; - seg1->mr_len = len; - } - *nsegs = i; - return rc; -} - -static int -rpcrdma_deregister_default_external(struct rpcrdma_mr_seg *seg, - struct rpcrdma_ia *ia) -{ - struct rpcrdma_mr_seg *seg1 = seg; - int rc; - - rc = ib_dereg_mr(seg1->mr_chunk.rl_mr); - seg1->mr_chunk.rl_mr = NULL; - while (seg1->mr_nsegs--) - rpcrdma_unmap_one(ia, seg++); - if (rc) - dprintk("RPC: %s: failed ib_dereg_mr," - " status %i\n", __func__, rc); - return rc; -} - int rpcrdma_register_external(struct rpcrdma_mr_seg *seg, int nsegs, int writing, struct rpcrdma_xprt *r_xprt) @@ -1819,16 +1693,8 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg, rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia); break; - /* Registration using memory windows */ - case RPCRDMA_MEMWINDOWS_ASYNC: - case RPCRDMA_MEMWINDOWS: - rc = rpcrdma_register_memwin_external(seg, &nsegs, writing, ia, r_xprt); - break; - - /* Default registration each time */ default: - rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia); - break; + return -1; } if (rc) return -1; @@ -1838,7 +1704,7 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg, int rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, - struct rpcrdma_xprt *r_xprt, void *r) + struct rpcrdma_xprt *r_xprt) { struct rpcrdma_ia *ia = &r_xprt->rx_ia; int nsegs = seg->mr_nsegs, rc; @@ -1847,9 +1713,7 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, #if RPCRDMA_PERSISTENT_REGISTRATION case RPCRDMA_ALLPHYSICAL: - BUG_ON(nsegs != 1); rpcrdma_unmap_one(ia, seg); - rc = 0; break; #endif @@ -1861,21 +1725,9 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, rc = rpcrdma_deregister_fmr_external(seg, ia); break; - case RPCRDMA_MEMWINDOWS_ASYNC: - case RPCRDMA_MEMWINDOWS: - rc = rpcrdma_deregister_memwin_external(seg, ia, r_xprt, &r); - break; - default: - rc = rpcrdma_deregister_default_external(seg, ia); break; } - if (r) { - struct rpcrdma_rep *rep = r; - void (*func)(struct rpcrdma_rep *) = rep->rr_func; - rep->rr_func = NULL; - func(rep); /* dereg done, callback now */ - } return nsegs; } @@ -1950,7 +1802,6 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, ib_dma_sync_single_for_cpu(ia->ri_id->device, rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL); - DECR_CQCOUNT(ep); rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail); if (rc) diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index cc1445dc1d1a..89e7cd479705 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -43,6 +43,7 @@ #include <linux/wait.h> /* wait_queue_head_t, etc */ #include <linux/spinlock.h> /* spinlock_t, etc */ #include <linux/atomic.h> /* atomic_t, etc */ +#include <linux/workqueue.h> /* struct work_struct */ #include <rdma/rdma_cm.h> /* RDMA connection api */ #include <rdma/ib_verbs.h> /* RDMA verbs api */ @@ -66,18 +67,21 @@ struct rpcrdma_ia { struct completion ri_done; int ri_async_rc; enum rpcrdma_memreg ri_memreg_strategy; + unsigned int ri_max_frmr_depth; }; /* * RDMA Endpoint -- one per transport instance */ +#define RPCRDMA_WC_BUDGET (128) +#define RPCRDMA_POLLSIZE (16) + struct rpcrdma_ep { atomic_t rep_cqcount; int rep_cqinit; int rep_connected; struct rpcrdma_ia *rep_ia; - struct ib_cq *rep_cq; struct ib_qp_init_attr rep_attr; wait_queue_head_t rep_connect_wait; struct ib_sge rep_pad; /* holds zeroed pad */ @@ -86,6 +90,9 @@ struct rpcrdma_ep { struct rpc_xprt *rep_xprt; /* for rep_func */ struct rdma_conn_param rep_remote_cma; struct sockaddr_storage rep_remote_addr; + struct delayed_work rep_connect_worker; + struct ib_wc rep_send_wcs[RPCRDMA_POLLSIZE]; + struct ib_wc rep_recv_wcs[RPCRDMA_POLLSIZE]; }; #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) @@ -124,7 +131,6 @@ struct rpcrdma_rep { struct rpc_xprt *rr_xprt; /* needed for request/reply matching */ void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */ struct list_head rr_list; /* tasklet list */ - wait_queue_head_t rr_unbind; /* optional unbind wait */ struct ib_sge rr_iov; /* for posting */ struct ib_mr *rr_handle; /* handle for mem in rr_iov */ char rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */ @@ -159,7 +165,6 @@ struct rpcrdma_mr_seg { /* chunk descriptors */ struct ib_mr *rl_mr; /* if registered directly */ struct rpcrdma_mw { /* if registered from region */ union { - struct ib_mw *mw; struct ib_fmr *fmr; struct { struct ib_fast_reg_page_list *fr_pgl; @@ -207,7 +212,6 @@ struct rpcrdma_req { struct rpcrdma_buffer { spinlock_t rb_lock; /* protects indexes */ atomic_t rb_credits; /* most recent server credits */ - unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */ int rb_max_requests;/* client max requests */ struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */ int rb_send_index; @@ -300,7 +304,7 @@ void rpcrdma_ia_close(struct rpcrdma_ia *); */ int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *, struct rpcrdma_create_data_internal *); -int rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *); +void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *); int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *); int rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); @@ -330,11 +334,12 @@ int rpcrdma_deregister_internal(struct rpcrdma_ia *, int rpcrdma_register_external(struct rpcrdma_mr_seg *, int, int, struct rpcrdma_xprt *); int rpcrdma_deregister_external(struct rpcrdma_mr_seg *, - struct rpcrdma_xprt *, void *); + struct rpcrdma_xprt *); /* * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c */ +void rpcrdma_connect_worker(struct work_struct *); void rpcrdma_conn_func(struct rpcrdma_ep *); void rpcrdma_reply_handler(struct rpcrdma_rep *); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 25a3dcf15cae..be8bbd5d65ec 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -866,8 +866,6 @@ static void xs_reset_transport(struct sock_xprt *transport) xs_restore_old_callbacks(transport, sk); write_unlock_bh(&sk->sk_callback_lock); - sk->sk_no_check = 0; - trace_rpc_socket_close(&transport->xprt, sock); sock_release(sock); } @@ -893,11 +891,11 @@ static void xs_close(struct rpc_xprt *xprt) xs_reset_transport(transport); xprt->reestablish_timeout = 0; - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); clear_bit(XPRT_CLOSING, &xprt->state); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); xprt_disconnect_done(xprt); } @@ -1497,12 +1495,12 @@ static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt) static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt) { - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); clear_bit(XPRT_CLOSING, &xprt->state); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); } static void xs_sock_mark_closed(struct rpc_xprt *xprt) @@ -1556,10 +1554,10 @@ static void xs_tcp_state_change(struct sock *sk) xprt->connect_cookie++; xprt->reestablish_timeout = 0; set_bit(XPRT_CLOSING, &xprt->state); - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); clear_bit(XPRT_CONNECTED, &xprt->state); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout); break; case TCP_CLOSE_WAIT: @@ -1578,9 +1576,9 @@ static void xs_tcp_state_change(struct sock *sk) case TCP_LAST_ACK: set_bit(XPRT_CLOSING, &xprt->state); xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout); - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); clear_bit(XPRT_CONNECTED, &xprt->state); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); break; case TCP_CLOSE: xs_tcp_cancel_linger_timeout(xprt); @@ -2046,7 +2044,6 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) sk->sk_user_data = xprt; sk->sk_data_ready = xs_udp_data_ready; sk->sk_write_space = xs_udp_write_space; - sk->sk_no_check = UDP_CSUM_NORCV; sk->sk_allocation = GFP_ATOMIC; xprt_set_connected(xprt); diff --git a/net/tipc/Makefile b/net/tipc/Makefile index b282f7130d2b..a080c66d819a 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_TIPC) := tipc.o tipc-y += addr.o bcast.o bearer.o config.o \ - core.o handler.o link.o discover.o msg.o \ + core.o link.o discover.o msg.o \ name_distr.o subscr.o name_table.o net.o \ netlink.o node.o node_subscr.o port.o ref.o \ socket.o log.o eth_media.o server.o diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 95ab5ef92920..dd13bfa09333 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -1,7 +1,7 @@ /* * net/tipc/bcast.c: TIPC broadcast code * - * Copyright (c) 2004-2006, Ericsson AB + * Copyright (c) 2004-2006, 2014, Ericsson AB * Copyright (c) 2004, Intel Corporation. * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. @@ -38,6 +38,8 @@ #include "core.h" #include "link.h" #include "port.h" +#include "socket.h" +#include "msg.h" #include "bcast.h" #include "name_distr.h" @@ -71,7 +73,7 @@ struct tipc_bcbearer_pair { * Note: The fields labelled "temporary" are incorporated into the bearer * to avoid consuming potentially limited stack space through the use of * large local variables within multicast routines. Concurrent access is - * prevented through use of the spinlock "bc_lock". + * prevented through use of the spinlock "bclink_lock". */ struct tipc_bcbearer { struct tipc_bearer bearer; @@ -84,34 +86,69 @@ struct tipc_bcbearer { /** * struct tipc_bclink - link used for broadcast messages + * @lock: spinlock governing access to structure * @link: (non-standard) broadcast link structure * @node: (non-standard) node structure representing b'cast link's peer node + * @flags: represent bclink states * @bcast_nodes: map of broadcast-capable nodes * @retransmit_to: node that most recently requested a retransmit * * Handles sequence numbering, fragmentation, bundling, etc. */ struct tipc_bclink { + spinlock_t lock; struct tipc_link link; struct tipc_node node; + unsigned int flags; struct tipc_node_map bcast_nodes; struct tipc_node *retransmit_to; }; -static struct tipc_bcbearer bcast_bearer; -static struct tipc_bclink bcast_link; - -static struct tipc_bcbearer *bcbearer = &bcast_bearer; -static struct tipc_bclink *bclink = &bcast_link; -static struct tipc_link *bcl = &bcast_link.link; - -static DEFINE_SPINLOCK(bc_lock); +static struct tipc_bcbearer *bcbearer; +static struct tipc_bclink *bclink; +static struct tipc_link *bcl; const char tipc_bclink_name[] = "broadcast-link"; static void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b, struct tipc_node_map *nm_diff); +static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node); +static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node); + +static void tipc_bclink_lock(void) +{ + spin_lock_bh(&bclink->lock); +} + +static void tipc_bclink_unlock(void) +{ + struct tipc_node *node = NULL; + + if (likely(!bclink->flags)) { + spin_unlock_bh(&bclink->lock); + return; + } + + if (bclink->flags & TIPC_BCLINK_RESET) { + bclink->flags &= ~TIPC_BCLINK_RESET; + node = tipc_bclink_retransmit_to(); + } + spin_unlock_bh(&bclink->lock); + + if (node) + tipc_link_reset_all(node); +} + +uint tipc_bclink_get_mtu(void) +{ + return MAX_PKT_DEFAULT_MCAST; +} + +void tipc_bclink_set_flags(unsigned int flags) +{ + bclink->flags |= flags; +} static u32 bcbuf_acks(struct sk_buff *buf) { @@ -130,16 +167,16 @@ static void bcbuf_decr_acks(struct sk_buff *buf) void tipc_bclink_add_node(u32 addr) { - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); tipc_nmap_add(&bclink->bcast_nodes, addr); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); } void tipc_bclink_remove_node(u32 addr) { - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); tipc_nmap_remove(&bclink->bcast_nodes, addr); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); } static void bclink_set_last_sent(void) @@ -165,7 +202,7 @@ static void bclink_update_last_sent(struct tipc_node *node, u32 seqno) /** * tipc_bclink_retransmit_to - get most recent node to request retransmission * - * Called with bc_lock locked + * Called with bclink_lock locked */ struct tipc_node *tipc_bclink_retransmit_to(void) { @@ -177,7 +214,7 @@ struct tipc_node *tipc_bclink_retransmit_to(void) * @after: sequence number of last packet to *not* retransmit * @to: sequence number of last packet to retransmit * - * Called with bc_lock locked + * Called with bclink_lock locked */ static void bclink_retransmit_pkt(u32 after, u32 to) { @@ -194,7 +231,7 @@ static void bclink_retransmit_pkt(u32 after, u32 to) * @n_ptr: node that sent acknowledgement info * @acked: broadcast sequence # that has been acknowledged * - * Node is locked, bc_lock unlocked. + * Node is locked, bclink_lock unlocked. */ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) { @@ -202,8 +239,7 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) struct sk_buff *next; unsigned int released = 0; - spin_lock_bh(&bc_lock); - + tipc_bclink_lock(); /* Bail out if tx queue is empty (no clean up is required) */ crs = bcl->first_out; if (!crs) @@ -267,13 +303,13 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) if (unlikely(released && !list_empty(&bcl->waiting_ports))) tipc_link_wakeup_ports(bcl, 0); exit: - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); } /** * tipc_bclink_update_link_state - update broadcast link state * - * tipc_net_lock and node lock set + * RCU and node lock set */ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent) { @@ -320,10 +356,10 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent) ? buf_seqno(n_ptr->bclink.deferred_head) - 1 : n_ptr->bclink.last_sent); - spin_lock_bh(&bc_lock); - tipc_bearer_send(&bcbearer->bearer, buf, NULL); + tipc_bclink_lock(); + tipc_bearer_send(MAX_BEARERS, buf, NULL); bcl->stats.sent_nacks++; - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); kfree_skb(buf); n_ptr->bclink.oos_state++; @@ -335,8 +371,6 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent) * * Delay any upcoming NACK by this node if another node has already * requested the first message this node is going to ask for. - * - * Only tipc_net_lock set. */ static void bclink_peek_nack(struct tipc_msg *msg) { @@ -355,36 +389,56 @@ static void bclink_peek_nack(struct tipc_msg *msg) tipc_node_unlock(n_ptr); } -/* - * tipc_bclink_xmit - broadcast a packet to all nodes in cluster +/* tipc_bclink_xmit - broadcast buffer chain to all nodes in cluster + * and to identified node local sockets + * @buf: chain of buffers containing message + * Consumes the buffer chain, except when returning -ELINKCONG + * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE */ int tipc_bclink_xmit(struct sk_buff *buf) { - int res; - - spin_lock_bh(&bc_lock); - - if (!bclink->bcast_nodes.count) { - res = msg_data_sz(buf_msg(buf)); - kfree_skb(buf); - goto exit; + int rc = 0; + int bc = 0; + struct sk_buff *clbuf; + + /* Prepare clone of message for local node */ + clbuf = tipc_msg_reassemble(buf); + if (unlikely(!clbuf)) { + kfree_skb_list(buf); + return -EHOSTUNREACH; } - res = __tipc_link_xmit(bcl, buf); - if (likely(res >= 0)) { - bclink_set_last_sent(); - bcl->stats.queue_sz_counts++; - bcl->stats.accu_queue_sz += bcl->out_queue_size; + /* Broadcast to all other nodes */ + if (likely(bclink)) { + tipc_bclink_lock(); + if (likely(bclink->bcast_nodes.count)) { + rc = __tipc_link_xmit(bcl, buf); + if (likely(!rc)) { + bclink_set_last_sent(); + bcl->stats.queue_sz_counts++; + bcl->stats.accu_queue_sz += bcl->out_queue_size; + } + bc = 1; + } + tipc_bclink_unlock(); } -exit: - spin_unlock_bh(&bc_lock); - return res; + + if (unlikely(!bc)) + kfree_skb_list(buf); + + /* Deliver message clone */ + if (likely(!rc)) + tipc_sk_mcast_rcv(clbuf); + else + kfree_skb(clbuf); + + return rc; } /** * bclink_accept_pkt - accept an incoming, in-sequence broadcast packet * - * Called with both sending node's lock and bc_lock taken. + * Called with both sending node's lock and bclink_lock taken. */ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno) { @@ -408,7 +462,7 @@ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno) /** * tipc_bclink_rcv - receive a broadcast packet, and deliver upwards * - * tipc_net_lock is read_locked, no other locks set + * RCU is locked, no other locks set */ void tipc_bclink_rcv(struct sk_buff *buf) { @@ -416,7 +470,7 @@ void tipc_bclink_rcv(struct sk_buff *buf) struct tipc_node *node; u32 next_in; u32 seqno; - int deferred; + int deferred = 0; /* Screen out unwanted broadcast messages */ @@ -439,12 +493,12 @@ void tipc_bclink_rcv(struct sk_buff *buf) if (msg_destnode(msg) == tipc_own_addr) { tipc_bclink_acknowledge(node, msg_bcast_ack(msg)); tipc_node_unlock(node); - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); bcl->stats.recv_nacks++; bclink->retransmit_to = node; bclink_retransmit_pkt(msg_bcgap_after(msg), msg_bcgap_to(msg)); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); } else { tipc_node_unlock(node); bclink_peek_nack(msg); @@ -462,51 +516,47 @@ receive: /* Deliver message to destination */ if (likely(msg_isdata(msg))) { - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); bclink_accept_pkt(node, seqno); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); tipc_node_unlock(node); if (likely(msg_mcast(msg))) - tipc_port_mcast_rcv(buf, NULL); + tipc_sk_mcast_rcv(buf); else kfree_skb(buf); } else if (msg_user(msg) == MSG_BUNDLER) { - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); bclink_accept_pkt(node, seqno); bcl->stats.recv_bundles++; bcl->stats.recv_bundled += msg_msgcnt(msg); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); tipc_node_unlock(node); tipc_link_bundle_rcv(buf); } else if (msg_user(msg) == MSG_FRAGMENTER) { - int ret; - ret = tipc_link_frag_rcv(&node->bclink.reasm_head, - &node->bclink.reasm_tail, - &buf); - if (ret == LINK_REASM_ERROR) + tipc_buf_append(&node->bclink.reasm_buf, &buf); + if (unlikely(!buf && !node->bclink.reasm_buf)) goto unlock; - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); bclink_accept_pkt(node, seqno); bcl->stats.recv_fragments++; - if (ret == LINK_REASM_COMPLETE) { + if (buf) { bcl->stats.recv_fragmented++; - /* Point msg to inner header */ msg = buf_msg(buf); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); goto receive; } - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); tipc_node_unlock(node); } else if (msg_user(msg) == NAME_DISTRIBUTOR) { - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); bclink_accept_pkt(node, seqno); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); tipc_node_unlock(node); tipc_named_rcv(buf); } else { - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); bclink_accept_pkt(node, seqno); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); tipc_node_unlock(node); kfree_skb(buf); } @@ -536,6 +586,7 @@ receive: buf = node->bclink.deferred_head; node->bclink.deferred_head = buf->next; + buf->next = NULL; node->bclink.deferred_size--; goto receive; } @@ -549,17 +600,16 @@ receive: node->bclink.deferred_size += deferred; bclink_update_last_sent(node, seqno); buf = NULL; - } else - deferred = 0; + } - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); if (deferred) bcl->stats.deferred_recv++; else bcl->stats.duplicates++; - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); unlock: tipc_node_unlock(node); @@ -587,6 +637,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, struct tipc_media_addr *unused2) { int bp_index; + struct tipc_msg *msg = buf_msg(buf); /* Prepare broadcast link message for reliable transmission, * if first time trying to send it; @@ -594,10 +645,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, * since they are sent in an unreliable manner and don't need it */ if (likely(!msg_non_seq(buf_msg(buf)))) { - struct tipc_msg *msg; - bcbuf_set_acks(buf, bclink->bcast_nodes.count); - msg = buf_msg(buf); msg_set_non_seq(msg, 1); msg_set_mc_netid(msg, tipc_net_id); bcl->stats.sent_info++; @@ -614,12 +662,14 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) { struct tipc_bearer *p = bcbearer->bpairs[bp_index].primary; struct tipc_bearer *s = bcbearer->bpairs[bp_index].secondary; - struct tipc_bearer *b = p; + struct tipc_bearer *bp[2] = {p, s}; + struct tipc_bearer *b = bp[msg_link_selector(msg)]; struct sk_buff *tbuf; if (!p) break; /* No more bearers to try */ - + if (!b) + b = p; tipc_nmap_diff(&bcbearer->remains, &b->nodes, &bcbearer->remains_new); if (bcbearer->remains_new.count == bcbearer->remains.count) @@ -627,22 +677,15 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, if (bp_index == 0) { /* Use original buffer for first bearer */ - tipc_bearer_send(b, buf, &b->bcast_addr); + tipc_bearer_send(b->identity, buf, &b->bcast_addr); } else { /* Avoid concurrent buffer access */ - tbuf = pskb_copy(buf, GFP_ATOMIC); + tbuf = pskb_copy_for_clone(buf, GFP_ATOMIC); if (!tbuf) break; - tipc_bearer_send(b, tbuf, &b->bcast_addr); + tipc_bearer_send(b->identity, tbuf, &b->bcast_addr); kfree_skb(tbuf); /* Bearer keeps a clone */ } - - /* Swap bearers for next packet */ - if (s) { - bcbearer->bpairs[bp_index].primary = s; - bcbearer->bpairs[bp_index].secondary = p; - } - if (bcbearer->remains_new.count == 0) break; /* All targets reached */ @@ -655,20 +698,27 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, /** * tipc_bcbearer_sort - create sets of bearer pairs used by broadcast bearer */ -void tipc_bcbearer_sort(void) +void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action) { struct tipc_bcbearer_pair *bp_temp = bcbearer->bpairs_temp; struct tipc_bcbearer_pair *bp_curr; + struct tipc_bearer *b; int b_index; int pri; - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); + + if (action) + tipc_nmap_add(nm_ptr, node); + else + tipc_nmap_remove(nm_ptr, node); /* Group bearers by priority (can assume max of two per priority) */ memset(bp_temp, 0, sizeof(bcbearer->bpairs_temp)); + rcu_read_lock(); for (b_index = 0; b_index < MAX_BEARERS; b_index++) { - struct tipc_bearer *b = bearer_list[b_index]; + b = rcu_dereference_rtnl(bearer_list[b_index]); if (!b || !b->nodes.count) continue; @@ -677,6 +727,7 @@ void tipc_bcbearer_sort(void) else bp_temp[b->priority].secondary = b; } + rcu_read_unlock(); /* Create array of bearer pairs for broadcasting */ bp_curr = bcbearer->bpairs; @@ -702,7 +753,7 @@ void tipc_bcbearer_sort(void) bp_curr++; } - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); } @@ -714,7 +765,7 @@ int tipc_bclink_stats(char *buf, const u32 buf_size) if (!bcl) return 0; - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); s = &bcl->stats; @@ -743,7 +794,7 @@ int tipc_bclink_stats(char *buf, const u32 buf_size) s->queue_sz_counts ? (s->accu_queue_sz / s->queue_sz_counts) : 0); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); return ret; } @@ -752,9 +803,9 @@ int tipc_bclink_reset_stats(void) if (!bcl) return -ENOPROTOOPT; - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); memset(&bcl->stats, 0, sizeof(bcl->stats)); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); return 0; } @@ -765,46 +816,59 @@ int tipc_bclink_set_queue_limits(u32 limit) if ((limit < TIPC_MIN_LINK_WIN) || (limit > TIPC_MAX_LINK_WIN)) return -EINVAL; - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); tipc_link_set_queue_limits(bcl, limit); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); return 0; } -void tipc_bclink_init(void) +int tipc_bclink_init(void) { + bcbearer = kzalloc(sizeof(*bcbearer), GFP_ATOMIC); + if (!bcbearer) + return -ENOMEM; + + bclink = kzalloc(sizeof(*bclink), GFP_ATOMIC); + if (!bclink) { + kfree(bcbearer); + return -ENOMEM; + } + + bcl = &bclink->link; bcbearer->bearer.media = &bcbearer->media; bcbearer->media.send_msg = tipc_bcbearer_send; sprintf(bcbearer->media.name, "tipc-broadcast"); + spin_lock_init(&bclink->lock); INIT_LIST_HEAD(&bcl->waiting_ports); bcl->next_out_no = 1; spin_lock_init(&bclink->node.lock); bcl->owner = &bclink->node; bcl->max_pkt = MAX_PKT_DEFAULT_MCAST; tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT); - bcl->b_ptr = &bcbearer->bearer; - bearer_list[BCBEARER] = &bcbearer->bearer; + bcl->bearer_id = MAX_BEARERS; + rcu_assign_pointer(bearer_list[MAX_BEARERS], &bcbearer->bearer); bcl->state = WORKING_WORKING; strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME); + return 0; } void tipc_bclink_stop(void) { - spin_lock_bh(&bc_lock); + tipc_bclink_lock(); tipc_link_purge_queues(bcl); - spin_unlock_bh(&bc_lock); + tipc_bclink_unlock(); - bearer_list[BCBEARER] = NULL; - memset(bclink, 0, sizeof(*bclink)); - memset(bcbearer, 0, sizeof(*bcbearer)); + RCU_INIT_POINTER(bearer_list[BCBEARER], NULL); + synchronize_net(); + kfree(bcbearer); + kfree(bclink); } - /** * tipc_nmap_add - add a node to a node map */ -void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node) +static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node) { int n = tipc_node(node); int w = n / WSIZE; @@ -819,7 +883,7 @@ void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node) /** * tipc_nmap_remove - remove a node from a node map */ -void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node) +static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node) { int n = tipc_node(node); int w = n / WSIZE; diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index a80ef54b818e..4875d9536aee 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -1,7 +1,7 @@ /* * net/tipc/bcast.h: Include file for TIPC broadcast code * - * Copyright (c) 2003-2006, Ericsson AB + * Copyright (c) 2003-2006, 2014, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -39,6 +39,7 @@ #define MAX_NODES 4096 #define WSIZE 32 +#define TIPC_BCLINK_RESET 1 /** * struct tipc_node_map - set of node identifiers @@ -69,9 +70,6 @@ struct tipc_node; extern const char tipc_bclink_name[]; -void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node); -void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node); - /** * tipc_nmap_equal - test for equality of node maps */ @@ -84,13 +82,13 @@ static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, void tipc_port_list_add(struct tipc_port_list *pl_ptr, u32 port); void tipc_port_list_free(struct tipc_port_list *pl_ptr); -void tipc_bclink_init(void); +int tipc_bclink_init(void); void tipc_bclink_stop(void); +void tipc_bclink_set_flags(unsigned int flags); void tipc_bclink_add_node(u32 addr); void tipc_bclink_remove_node(u32 addr); struct tipc_node *tipc_bclink_retransmit_to(void); void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked); -int tipc_bclink_xmit(struct sk_buff *buf); void tipc_bclink_rcv(struct sk_buff *buf); u32 tipc_bclink_get_last_sent(void); u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr); @@ -98,6 +96,8 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent); int tipc_bclink_stats(char *stats_buf, const u32 buf_size); int tipc_bclink_reset_stats(void); int tipc_bclink_set_queue_limits(u32 limit); -void tipc_bcbearer_sort(void); +void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action); +uint tipc_bclink_get_mtu(void); +int tipc_bclink_xmit(struct sk_buff *buf); #endif diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 3fef7eb776dc..264474394f9f 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -49,7 +49,7 @@ static struct tipc_media * const media_info_array[] = { NULL }; -struct tipc_bearer *bearer_list[MAX_BEARERS + 1]; +struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1]; static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down); @@ -178,7 +178,7 @@ struct tipc_bearer *tipc_bearer_find(const char *name) u32 i; for (i = 0; i < MAX_BEARERS; i++) { - b_ptr = bearer_list[i]; + b_ptr = rtnl_dereference(bearer_list[i]); if (b_ptr && (!strcmp(b_ptr->name, name))) return b_ptr; } @@ -198,10 +198,9 @@ struct sk_buff *tipc_bearer_get_names(void) if (!buf) return NULL; - read_lock_bh(&tipc_net_lock); for (i = 0; media_info_array[i] != NULL; i++) { for (j = 0; j < MAX_BEARERS; j++) { - b = bearer_list[j]; + b = rtnl_dereference(bearer_list[j]); if (!b) continue; if (b->media == media_info_array[i]) { @@ -211,22 +210,33 @@ struct sk_buff *tipc_bearer_get_names(void) } } } - read_unlock_bh(&tipc_net_lock); return buf; } -void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest) +void tipc_bearer_add_dest(u32 bearer_id, u32 dest) { - tipc_nmap_add(&b_ptr->nodes, dest); - tipc_bcbearer_sort(); - tipc_disc_add_dest(b_ptr->link_req); + struct tipc_bearer *b_ptr; + + rcu_read_lock(); + b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]); + if (b_ptr) { + tipc_bcbearer_sort(&b_ptr->nodes, dest, true); + tipc_disc_add_dest(b_ptr->link_req); + } + rcu_read_unlock(); } -void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest) +void tipc_bearer_remove_dest(u32 bearer_id, u32 dest) { - tipc_nmap_remove(&b_ptr->nodes, dest); - tipc_bcbearer_sort(); - tipc_disc_remove_dest(b_ptr->link_req); + struct tipc_bearer *b_ptr; + + rcu_read_lock(); + b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]); + if (b_ptr) { + tipc_bcbearer_sort(&b_ptr->nodes, dest, false); + tipc_disc_remove_dest(b_ptr->link_req); + } + rcu_read_unlock(); } /** @@ -271,13 +281,11 @@ int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority) return -EINVAL; } - write_lock_bh(&tipc_net_lock); - m_ptr = tipc_media_find(b_names.media_name); if (!m_ptr) { pr_warn("Bearer <%s> rejected, media <%s> not registered\n", name, b_names.media_name); - goto exit; + return -EINVAL; } if (priority == TIPC_MEDIA_LINK_PRI) @@ -287,7 +295,7 @@ restart: bearer_id = MAX_BEARERS; with_this_prio = 1; for (i = MAX_BEARERS; i-- != 0; ) { - b_ptr = bearer_list[i]; + b_ptr = rtnl_dereference(bearer_list[i]); if (!b_ptr) { bearer_id = i; continue; @@ -295,14 +303,14 @@ restart: if (!strcmp(name, b_ptr->name)) { pr_warn("Bearer <%s> rejected, already enabled\n", name); - goto exit; + return -EINVAL; } if ((b_ptr->priority == priority) && (++with_this_prio > 2)) { if (priority-- == 0) { pr_warn("Bearer <%s> rejected, duplicate priority\n", name); - goto exit; + return -EINVAL; } pr_warn("Bearer <%s> priority adjustment required %u->%u\n", name, priority + 1, priority); @@ -312,21 +320,20 @@ restart: if (bearer_id >= MAX_BEARERS) { pr_warn("Bearer <%s> rejected, bearer limit reached (%u)\n", name, MAX_BEARERS); - goto exit; + return -EINVAL; } b_ptr = kzalloc(sizeof(*b_ptr), GFP_ATOMIC); - if (!b_ptr) { - res = -ENOMEM; - goto exit; - } + if (!b_ptr) + return -ENOMEM; + strcpy(b_ptr->name, name); b_ptr->media = m_ptr; res = m_ptr->enable_media(b_ptr); if (res) { pr_warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res); - goto exit; + return -EINVAL; } b_ptr->identity = bearer_id; @@ -341,16 +348,14 @@ restart: bearer_disable(b_ptr, false); pr_warn("Bearer <%s> rejected, discovery object creation failed\n", name); - goto exit; + return -EINVAL; } - bearer_list[bearer_id] = b_ptr; + rcu_assign_pointer(bearer_list[bearer_id], b_ptr); pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n", name, tipc_addr_string_fill(addr_string, disc_domain), priority); -exit: - write_unlock_bh(&tipc_net_lock); return res; } @@ -359,19 +364,16 @@ exit: */ static int tipc_reset_bearer(struct tipc_bearer *b_ptr) { - read_lock_bh(&tipc_net_lock); pr_info("Resetting bearer <%s>\n", b_ptr->name); - tipc_disc_delete(b_ptr->link_req); tipc_link_reset_list(b_ptr->identity); - tipc_disc_create(b_ptr, &b_ptr->bcast_addr); - read_unlock_bh(&tipc_net_lock); + tipc_disc_reset(b_ptr); return 0; } /** * bearer_disable * - * Note: This routine assumes caller holds tipc_net_lock. + * Note: This routine assumes caller holds RTNL lock. */ static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down) { @@ -385,12 +387,12 @@ static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down) tipc_disc_delete(b_ptr->link_req); for (i = 0; i < MAX_BEARERS; i++) { - if (b_ptr == bearer_list[i]) { - bearer_list[i] = NULL; + if (b_ptr == rtnl_dereference(bearer_list[i])) { + RCU_INIT_POINTER(bearer_list[i], NULL); break; } } - kfree(b_ptr); + kfree_rcu(b_ptr, rcu); } int tipc_disable_bearer(const char *name) @@ -398,7 +400,6 @@ int tipc_disable_bearer(const char *name) struct tipc_bearer *b_ptr; int res; - write_lock_bh(&tipc_net_lock); b_ptr = tipc_bearer_find(name); if (b_ptr == NULL) { pr_warn("Attempt to disable unknown bearer <%s>\n", name); @@ -407,32 +408,9 @@ int tipc_disable_bearer(const char *name) bearer_disable(b_ptr, false); res = 0; } - write_unlock_bh(&tipc_net_lock); return res; } - -/* tipc_l2_media_addr_set - initialize Ethernet media address structure - * - * Media-dependent "value" field stores MAC address in first 6 bytes - * and zeroes out the remaining bytes. - */ -void tipc_l2_media_addr_set(const struct tipc_bearer *b, - struct tipc_media_addr *a, char *mac) -{ - int len = b->media->hwaddr_len; - - if (unlikely(sizeof(a->value) < len)) { - WARN_ONCE(1, "Media length invalid\n"); - return; - } - - memcpy(a->value, mac, len); - memset(a->value + len, 0, sizeof(a->value) - len); - a->media_id = b->media->type_id; - a->broadcast = !memcmp(mac, b->bcast_addr.value, len); -} - int tipc_enable_l2_media(struct tipc_bearer *b) { struct net_device *dev; @@ -443,33 +421,37 @@ int tipc_enable_l2_media(struct tipc_bearer *b) if (!dev) return -ENODEV; - /* Associate TIPC bearer with Ethernet bearer */ - b->media_ptr = dev; - memset(b->bcast_addr.value, 0, sizeof(b->bcast_addr.value)); + /* Associate TIPC bearer with L2 bearer */ + rcu_assign_pointer(b->media_ptr, dev); + memset(&b->bcast_addr, 0, sizeof(b->bcast_addr)); memcpy(b->bcast_addr.value, dev->broadcast, b->media->hwaddr_len); b->bcast_addr.media_id = b->media->type_id; b->bcast_addr.broadcast = 1; b->mtu = dev->mtu; - tipc_l2_media_addr_set(b, &b->addr, (char *)dev->dev_addr); + b->media->raw2addr(b, &b->addr, (char *)dev->dev_addr); rcu_assign_pointer(dev->tipc_ptr, b); return 0; } -/* tipc_disable_l2_media - detach TIPC bearer from an Ethernet interface +/* tipc_disable_l2_media - detach TIPC bearer from an L2 interface * - * Mark Ethernet bearer as inactive so that incoming buffers are thrown away, + * Mark L2 bearer as inactive so that incoming buffers are thrown away, * then get worker thread to complete bearer cleanup. (Can't do cleanup * here because cleanup code needs to sleep and caller holds spinlocks.) */ void tipc_disable_l2_media(struct tipc_bearer *b) { - struct net_device *dev = (struct net_device *)b->media_ptr; + struct net_device *dev; + + dev = (struct net_device *)rtnl_dereference(b->media_ptr); + RCU_INIT_POINTER(b->media_ptr, NULL); RCU_INIT_POINTER(dev->tipc_ptr, NULL); + synchronize_net(); dev_put(dev); } /** - * tipc_l2_send_msg - send a TIPC packet out over an Ethernet interface + * tipc_l2_send_msg - send a TIPC packet out over an L2 interface * @buf: the packet to be sent * @b_ptr: the bearer through which the packet is to be sent * @dest: peer destination address @@ -478,8 +460,12 @@ int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b, struct tipc_media_addr *dest) { struct sk_buff *clone; + struct net_device *dev; int delta; - struct net_device *dev = (struct net_device *)b->media_ptr; + + dev = (struct net_device *)rcu_dereference_rtnl(b->media_ptr); + if (!dev) + return 0; clone = skb_clone(buf, GFP_ATOMIC); if (!clone) @@ -507,10 +493,16 @@ int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b, * The media send routine must not alter the buffer being passed in * as it may be needed for later retransmission! */ -void tipc_bearer_send(struct tipc_bearer *b, struct sk_buff *buf, +void tipc_bearer_send(u32 bearer_id, struct sk_buff *buf, struct tipc_media_addr *dest) { - b->media->send_msg(buf, b, dest); + struct tipc_bearer *b_ptr; + + rcu_read_lock(); + b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]); + if (likely(b_ptr)) + b_ptr->media->send_msg(buf, b_ptr, dest); + rcu_read_unlock(); } /** @@ -535,7 +527,7 @@ static int tipc_l2_rcv_msg(struct sk_buff *buf, struct net_device *dev, } rcu_read_lock(); - b_ptr = rcu_dereference(dev->tipc_ptr); + b_ptr = rcu_dereference_rtnl(dev->tipc_ptr); if (likely(b_ptr)) { if (likely(buf->pkt_type <= PACKET_BROADCAST)) { buf->next = NULL; @@ -568,12 +560,9 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; - rcu_read_lock(); - b_ptr = rcu_dereference(dev->tipc_ptr); - if (!b_ptr) { - rcu_read_unlock(); + b_ptr = rtnl_dereference(dev->tipc_ptr); + if (!b_ptr) return NOTIFY_DONE; - } b_ptr->mtu = dev->mtu; @@ -586,17 +575,15 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, tipc_reset_bearer(b_ptr); break; case NETDEV_CHANGEADDR: - tipc_l2_media_addr_set(b_ptr, &b_ptr->addr, + b_ptr->media->raw2addr(b_ptr, &b_ptr->addr, (char *)dev->dev_addr); tipc_reset_bearer(b_ptr); break; case NETDEV_UNREGISTER: case NETDEV_CHANGENAME: - tipc_disable_bearer(b_ptr->name); + bearer_disable(b_ptr, false); break; } - rcu_read_unlock(); - return NOTIFY_OK; } @@ -633,7 +620,7 @@ void tipc_bearer_stop(void) u32 i; for (i = 0; i < MAX_BEARERS; i++) { - b_ptr = bearer_list[i]; + b_ptr = rtnl_dereference(bearer_list[i]); if (b_ptr) { bearer_disable(b_ptr, true); bearer_list[i] = NULL; diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index ba48145e871d..78fccc49de23 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -42,14 +42,12 @@ #define MAX_BEARERS 2 #define MAX_MEDIA 2 -/* - * Identifiers associated with TIPC message header media address info - * - * - address info field is 20 bytes long - * - media type identifier located at offset 3 - * - remaining bytes vary according to media type +/* Identifiers associated with TIPC message header media address info + * - address info field is 32 bytes long + * - the field's actual content and length is defined per media + * - remaining unused bytes in the field are set to zero */ -#define TIPC_MEDIA_ADDR_SIZE 20 +#define TIPC_MEDIA_ADDR_SIZE 32 #define TIPC_MEDIA_TYPE_OFFSET 3 /* @@ -77,9 +75,10 @@ struct tipc_bearer; * @send_msg: routine which handles buffer transmission * @enable_media: routine which enables a media * @disable_media: routine which disables a media - * @addr2str: routine which converts media address to string - * @addr2msg: routine which converts media address to protocol message area - * @msg2addr: routine which converts media address from protocol message area + * @addr2str: convert media address format to string + * @addr2msg: convert from media addr format to discovery msg addr format + * @msg2addr: convert from discovery msg addr format to media addr format + * @raw2addr: convert from raw addr format to media addr format * @priority: default link (and bearer) priority * @tolerance: default time (in ms) before declaring link failure * @window: default window (in packets) before declaring link congestion @@ -93,10 +92,16 @@ struct tipc_media { struct tipc_media_addr *dest); int (*enable_media)(struct tipc_bearer *b_ptr); void (*disable_media)(struct tipc_bearer *b_ptr); - int (*addr2str)(struct tipc_media_addr *a, char *str_buf, int str_size); - int (*addr2msg)(struct tipc_media_addr *a, char *msg_area); - int (*msg2addr)(const struct tipc_bearer *b_ptr, - struct tipc_media_addr *a, char *msg_area); + int (*addr2str)(struct tipc_media_addr *addr, + char *strbuf, + int bufsz); + int (*addr2msg)(char *msg, struct tipc_media_addr *addr); + int (*msg2addr)(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *msg); + int (*raw2addr)(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *raw); u32 priority; u32 tolerance; u32 window; @@ -113,6 +118,7 @@ struct tipc_media { * @name: bearer name (format = media:interface) * @media: ptr to media structure associated with bearer * @bcast_addr: media address used in broadcasting + * @rcu: rcu struct for tipc_bearer * @priority: default link priority for bearer * @window: default window size for bearer * @tolerance: default link tolerance for bearer @@ -127,12 +133,13 @@ struct tipc_media { * care of initializing all other fields. */ struct tipc_bearer { - void *media_ptr; /* initalized by media */ + void __rcu *media_ptr; /* initalized by media */ u32 mtu; /* initalized by media */ struct tipc_media_addr addr; /* initalized by media */ char name[TIPC_MAX_BEARER_NAME]; struct tipc_media *media; struct tipc_media_addr bcast_addr; + struct rcu_head rcu; u32 priority; u32 window; u32 tolerance; @@ -150,7 +157,7 @@ struct tipc_bearer_names { struct tipc_link; -extern struct tipc_bearer *bearer_list[]; +extern struct tipc_bearer __rcu *bearer_list[]; /* * TIPC routines available to supported media types @@ -173,22 +180,20 @@ int tipc_media_set_priority(const char *name, u32 new_value); int tipc_media_set_window(const char *name, u32 new_value); void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a); struct sk_buff *tipc_media_get_names(void); -void tipc_l2_media_addr_set(const struct tipc_bearer *b, - struct tipc_media_addr *a, char *mac); int tipc_enable_l2_media(struct tipc_bearer *b); void tipc_disable_l2_media(struct tipc_bearer *b); int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b, struct tipc_media_addr *dest); struct sk_buff *tipc_bearer_get_names(void); -void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest); -void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest); +void tipc_bearer_add_dest(u32 bearer_id, u32 dest); +void tipc_bearer_remove_dest(u32 bearer_id, u32 dest); struct tipc_bearer *tipc_bearer_find(const char *name); struct tipc_media *tipc_media_find(const char *name); int tipc_bearer_setup(void); void tipc_bearer_cleanup(void); void tipc_bearer_stop(void); -void tipc_bearer_send(struct tipc_bearer *b, struct sk_buff *buf, +void tipc_bearer_send(u32 bearer_id, struct sk_buff *buf, struct tipc_media_addr *dest); #endif /* _TIPC_BEARER_H */ diff --git a/net/tipc/config.c b/net/tipc/config.c index 4b981c053823..2b42403ad33a 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -42,8 +42,6 @@ #define REPLY_TRUNCATED "<truncated>\n" -static DEFINE_MUTEX(config_mutex); - static const void *req_tlv_area; /* request message TLV area */ static int req_tlv_space; /* request message TLV area size */ static int rep_headroom; /* reply message headroom to use */ @@ -179,8 +177,10 @@ static struct sk_buff *cfg_set_own_addr(void) if (tipc_own_addr) return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (cannot change node address once assigned)"); - tipc_net_start(addr); - return tipc_cfg_reply_none(); + if (!tipc_net_start(addr)) + return tipc_cfg_reply_none(); + + return tipc_cfg_reply_error_string("cannot change to network mode"); } static struct sk_buff *cfg_set_max_ports(void) @@ -223,7 +223,7 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area { struct sk_buff *rep_tlv_buf; - mutex_lock(&config_mutex); + rtnl_lock(); /* Save request and reply details in a well-known location */ req_tlv_area = request_area; @@ -337,6 +337,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area /* Return reply buffer */ exit: - mutex_unlock(&config_mutex); + rtnl_unlock(); return rep_tlv_buf; } diff --git a/net/tipc/core.c b/net/tipc/core.c index 50d57429ebca..676d18015dd8 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -80,7 +80,6 @@ struct sk_buff *tipc_buf_acquire(u32 size) */ static void tipc_core_stop(void) { - tipc_handler_stop(); tipc_net_stop(); tipc_bearer_cleanup(); tipc_netlink_stop(); @@ -100,10 +99,6 @@ static int tipc_core_start(void) get_random_bytes(&tipc_random, sizeof(tipc_random)); - err = tipc_handler_start(); - if (err) - goto out_handler; - err = tipc_ref_table_init(tipc_max_ports, tipc_random); if (err) goto out_reftbl; @@ -146,8 +141,6 @@ out_netlink: out_nametbl: tipc_ref_table_stop(); out_reftbl: - tipc_handler_stop(); -out_handler: return err; } @@ -161,10 +154,11 @@ static int __init tipc_init(void) tipc_max_ports = CONFIG_TIPC_PORTS; tipc_net_id = 4711; - sysctl_tipc_rmem[0] = CONN_OVERLOAD_LIMIT >> 4 << TIPC_LOW_IMPORTANCE; - sysctl_tipc_rmem[1] = CONN_OVERLOAD_LIMIT >> 4 << + sysctl_tipc_rmem[0] = TIPC_CONN_OVERLOAD_LIMIT >> 4 << + TIPC_LOW_IMPORTANCE; + sysctl_tipc_rmem[1] = TIPC_CONN_OVERLOAD_LIMIT >> 4 << TIPC_CRITICAL_IMPORTANCE; - sysctl_tipc_rmem[2] = CONN_OVERLOAD_LIMIT; + sysctl_tipc_rmem[2] = TIPC_CONN_OVERLOAD_LIMIT; res = tipc_core_start(); if (res) diff --git a/net/tipc/core.h b/net/tipc/core.h index 8985bbcb942b..bb26ed1ee966 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -56,7 +56,8 @@ #include <linux/list.h> #include <linux/slab.h> #include <linux/vmalloc.h> - +#include <linux/rtnetlink.h> +#include <linux/etherdevice.h> #define TIPC_MOD_VER "2.0.0" @@ -89,8 +90,6 @@ extern int tipc_random __read_mostly; /* * Routines available to privileged subsystems */ -int tipc_handler_start(void); -void tipc_handler_stop(void); int tipc_netlink_start(void); void tipc_netlink_stop(void); int tipc_socket_init(void); @@ -109,12 +108,10 @@ void tipc_unregister_sysctl(void); #endif /* - * TIPC timer and signal code + * TIPC timer code */ typedef void (*Handler) (unsigned long); -u32 tipc_k_signal(Handler routine, unsigned long argument); - /** * k_init_timer - initialize a timer * @timer: pointer to timer structure @@ -191,6 +188,7 @@ static inline void k_term_timer(struct timer_list *timer) struct tipc_skb_cb { void *handle; bool deferred; + struct sk_buff *tail; }; #define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0])) diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 542fe3413dc4..aa722a42ef8b 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -1,7 +1,7 @@ /* * net/tipc/discover.c * - * Copyright (c) 2003-2006, Ericsson AB + * Copyright (c) 2003-2006, 2014, Ericsson AB * Copyright (c) 2005-2006, 2010-2011, Wind River Systems * All rights reserved. * @@ -46,8 +46,9 @@ /** * struct tipc_link_req - information about an ongoing link setup request - * @bearer: bearer issuing requests + * @bearer_id: identity of bearer issuing requests * @dest: destination address for request messages + * @domain: network domain to which links can be established * @num_nodes: number of nodes currently discovered (i.e. with an active link) * @lock: spinlock for controlling access to requests * @buf: request message to be (repeatedly) sent @@ -55,8 +56,9 @@ * @timer_intv: current interval between requests (in ms) */ struct tipc_link_req { - struct tipc_bearer *bearer; + u32 bearer_id; struct tipc_media_addr dest; + u32 domain; int num_nodes; spinlock_t lock; struct sk_buff *buf; @@ -69,22 +71,19 @@ struct tipc_link_req { * @type: message type (request or response) * @b_ptr: ptr to bearer issuing message */ -static struct sk_buff *tipc_disc_init_msg(u32 type, struct tipc_bearer *b_ptr) +static void tipc_disc_init_msg(struct sk_buff *buf, u32 type, + struct tipc_bearer *b_ptr) { - struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE); struct tipc_msg *msg; u32 dest_domain = b_ptr->domain; - if (buf) { - msg = buf_msg(buf); - tipc_msg_init(msg, LINK_CONFIG, type, INT_H_SIZE, dest_domain); - msg_set_non_seq(msg, 1); - msg_set_node_sig(msg, tipc_random); - msg_set_dest_domain(msg, dest_domain); - msg_set_bc_netid(msg, tipc_net_id); - b_ptr->media->addr2msg(&b_ptr->addr, msg_media_addr(msg)); - } - return buf; + msg = buf_msg(buf); + tipc_msg_init(msg, LINK_CONFIG, type, INT_H_SIZE, dest_domain); + msg_set_non_seq(msg, 1); + msg_set_node_sig(msg, tipc_random); + msg_set_dest_domain(msg, dest_domain); + msg_set_bc_netid(msg, tipc_net_id); + b_ptr->media->addr2msg(msg_media_addr(msg), &b_ptr->addr); } /** @@ -107,146 +106,150 @@ static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr, } /** - * tipc_disc_rcv - handle incoming link setup message (request or response) + * tipc_disc_rcv - handle incoming discovery message (request or response) * @buf: buffer containing message - * @b_ptr: bearer that message arrived on + * @bearer: bearer that message arrived on */ -void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *b_ptr) +void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *bearer) { - struct tipc_node *n_ptr; + struct tipc_node *node; struct tipc_link *link; - struct tipc_media_addr media_addr; + struct tipc_media_addr maddr; struct sk_buff *rbuf; struct tipc_msg *msg = buf_msg(buf); - u32 dest = msg_dest_domain(msg); - u32 orig = msg_prevnode(msg); + u32 ddom = msg_dest_domain(msg); + u32 onode = msg_prevnode(msg); u32 net_id = msg_bc_netid(msg); - u32 type = msg_type(msg); + u32 mtyp = msg_type(msg); u32 signature = msg_node_sig(msg); - int addr_mismatch; - int link_fully_up; - - media_addr.broadcast = 1; - b_ptr->media->msg2addr(b_ptr, &media_addr, msg_media_addr(msg)); + bool addr_match = false; + bool sign_match = false; + bool link_up = false; + bool accept_addr = false; + bool accept_sign = false; + bool respond = false; + + bearer->media->msg2addr(bearer, &maddr, msg_media_addr(msg)); kfree_skb(buf); /* Ensure message from node is valid and communication is permitted */ if (net_id != tipc_net_id) return; - if (media_addr.broadcast) + if (maddr.broadcast) return; - if (!tipc_addr_domain_valid(dest)) + if (!tipc_addr_domain_valid(ddom)) return; - if (!tipc_addr_node_valid(orig)) + if (!tipc_addr_node_valid(onode)) return; - if (orig == tipc_own_addr) { - if (memcmp(&media_addr, &b_ptr->addr, sizeof(media_addr))) - disc_dupl_alert(b_ptr, tipc_own_addr, &media_addr); + + if (in_own_node(onode)) { + if (memcmp(&maddr, &bearer->addr, sizeof(maddr))) + disc_dupl_alert(bearer, tipc_own_addr, &maddr); return; } - if (!tipc_in_scope(dest, tipc_own_addr)) + if (!tipc_in_scope(ddom, tipc_own_addr)) return; - if (!tipc_in_scope(b_ptr->domain, orig)) + if (!tipc_in_scope(bearer->domain, onode)) return; - /* Locate structure corresponding to requesting node */ - n_ptr = tipc_node_find(orig); - if (!n_ptr) { - n_ptr = tipc_node_create(orig); - if (!n_ptr) - return; - } - tipc_node_lock(n_ptr); + /* Locate, or if necessary, create, node: */ + node = tipc_node_find(onode); + if (!node) + node = tipc_node_create(onode); + if (!node) + return; - /* Prepare to validate requesting node's signature and media address */ - link = n_ptr->links[b_ptr->identity]; - addr_mismatch = (link != NULL) && - memcmp(&link->media_addr, &media_addr, sizeof(media_addr)); + tipc_node_lock(node); + link = node->links[bearer->identity]; - /* - * Ensure discovery message's signature is correct - * - * If signature is incorrect and there is no working link to the node, - * accept the new signature but invalidate all existing links to the - * node so they won't re-activate without a new discovery message. - * - * If signature is incorrect and the requested link to the node is - * working, accept the new signature. (This is an instance of delayed - * rediscovery, where a link endpoint was able to re-establish contact - * with its peer endpoint on a node that rebooted before receiving a - * discovery message from that node.) - * - * If signature is incorrect and there is a working link to the node - * that is not the requested link, reject the request (must be from - * a duplicate node). - */ - if (signature != n_ptr->signature) { - if (n_ptr->working_links == 0) { - struct tipc_link *curr_link; - int i; - - for (i = 0; i < MAX_BEARERS; i++) { - curr_link = n_ptr->links[i]; - if (curr_link) { - memset(&curr_link->media_addr, 0, - sizeof(media_addr)); - tipc_link_reset(curr_link); - } - } - addr_mismatch = (link != NULL); - } else if (tipc_link_is_up(link) && !addr_mismatch) { - /* delayed rediscovery */ - } else { - disc_dupl_alert(b_ptr, orig, &media_addr); - tipc_node_unlock(n_ptr); - return; - } - n_ptr->signature = signature; + /* Prepare to validate requesting node's signature and media address */ + sign_match = (signature == node->signature); + addr_match = link && !memcmp(&link->media_addr, &maddr, sizeof(maddr)); + link_up = link && tipc_link_is_up(link); + + + /* These three flags give us eight permutations: */ + + if (sign_match && addr_match && link_up) { + /* All is fine. Do nothing. */ + } else if (sign_match && addr_match && !link_up) { + /* Respond. The link will come up in due time */ + respond = true; + } else if (sign_match && !addr_match && link_up) { + /* Peer has changed i/f address without rebooting. + * If so, the link will reset soon, and the next + * discovery will be accepted. So we can ignore it. + * It may also be an cloned or malicious peer having + * chosen the same node address and signature as an + * existing one. + * Ignore requests until the link goes down, if ever. + */ + disc_dupl_alert(bearer, onode, &maddr); + } else if (sign_match && !addr_match && !link_up) { + /* Peer link has changed i/f address without rebooting. + * It may also be a cloned or malicious peer; we can't + * distinguish between the two. + * The signature is correct, so we must accept. + */ + accept_addr = true; + respond = true; + } else if (!sign_match && addr_match && link_up) { + /* Peer node rebooted. Two possibilities: + * - Delayed re-discovery; this link endpoint has already + * reset and re-established contact with the peer, before + * receiving a discovery message from that node. + * (The peer happened to receive one from this node first). + * - The peer came back so fast that our side has not + * discovered it yet. Probing from this side will soon + * reset the link, since there can be no working link + * endpoint at the peer end, and the link will re-establish. + * Accept the signature, since it comes from a known peer. + */ + accept_sign = true; + } else if (!sign_match && addr_match && !link_up) { + /* The peer node has rebooted. + * Accept signature, since it is a known peer. + */ + accept_sign = true; + respond = true; + } else if (!sign_match && !addr_match && link_up) { + /* Peer rebooted with new address, or a new/duplicate peer. + * Ignore until the link goes down, if ever. + */ + disc_dupl_alert(bearer, onode, &maddr); + } else if (!sign_match && !addr_match && !link_up) { + /* Peer rebooted with new address, or it is a new peer. + * Accept signature and address. + */ + accept_sign = true; + accept_addr = true; + respond = true; } - /* - * Ensure requesting node's media address is correct - * - * If media address doesn't match and the link is working, reject the - * request (must be from a duplicate node). - * - * If media address doesn't match and the link is not working, accept - * the new media address and reset the link to ensure it starts up - * cleanly. - */ - if (addr_mismatch) { - if (tipc_link_is_up(link)) { - disc_dupl_alert(b_ptr, orig, &media_addr); - tipc_node_unlock(n_ptr); - return; - } else { - memcpy(&link->media_addr, &media_addr, - sizeof(media_addr)); - tipc_link_reset(link); - } - } + if (accept_sign) + node->signature = signature; - /* Create a link endpoint for this bearer, if necessary */ - if (!link) { - link = tipc_link_create(n_ptr, b_ptr, &media_addr); - if (!link) { - tipc_node_unlock(n_ptr); - return; + if (accept_addr) { + if (!link) + link = tipc_link_create(node, bearer, &maddr); + if (link) { + memcpy(&link->media_addr, &maddr, sizeof(maddr)); + tipc_link_reset(link); + } else { + respond = false; } } - /* Accept discovery message & send response, if necessary */ - link_fully_up = link_working_working(link); - - if ((type == DSC_REQ_MSG) && !link_fully_up) { - rbuf = tipc_disc_init_msg(DSC_RESP_MSG, b_ptr); + /* Send response, if necessary */ + if (respond && (mtyp == DSC_REQ_MSG)) { + rbuf = tipc_buf_acquire(INT_H_SIZE); if (rbuf) { - tipc_bearer_send(b_ptr, rbuf, &media_addr); + tipc_disc_init_msg(rbuf, DSC_RESP_MSG, bearer); + tipc_bearer_send(bearer->identity, rbuf, &maddr); kfree_skb(rbuf); } } - - tipc_node_unlock(n_ptr); + tipc_node_unlock(node); } /** @@ -303,7 +306,7 @@ static void disc_timeout(struct tipc_link_req *req) spin_lock_bh(&req->lock); /* Stop searching if only desired node has been found */ - if (tipc_node(req->bearer->domain) && req->num_nodes) { + if (tipc_node(req->domain) && req->num_nodes) { req->timer_intv = TIPC_LINK_REQ_INACTIVE; goto exit; } @@ -315,7 +318,7 @@ static void disc_timeout(struct tipc_link_req *req) * hold at fast polling rate if don't have any associated nodes, * otherwise hold at slow polling rate */ - tipc_bearer_send(req->bearer, req->buf, &req->dest); + tipc_bearer_send(req->bearer_id, req->buf, &req->dest); req->timer_intv *= 2; @@ -347,21 +350,23 @@ int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest) if (!req) return -ENOMEM; - req->buf = tipc_disc_init_msg(DSC_REQ_MSG, b_ptr); + req->buf = tipc_buf_acquire(INT_H_SIZE); if (!req->buf) { kfree(req); - return -ENOMSG; + return -ENOMEM; } + tipc_disc_init_msg(req->buf, DSC_REQ_MSG, b_ptr); memcpy(&req->dest, dest, sizeof(*dest)); - req->bearer = b_ptr; + req->bearer_id = b_ptr->identity; + req->domain = b_ptr->domain; req->num_nodes = 0; req->timer_intv = TIPC_LINK_REQ_INIT; spin_lock_init(&req->lock); k_init_timer(&req->timer, (Handler)disc_timeout, (unsigned long)req); k_start_timer(&req->timer, req->timer_intv); b_ptr->link_req = req; - tipc_bearer_send(req->bearer, req->buf, &req->dest); + tipc_bearer_send(req->bearer_id, req->buf, &req->dest); return 0; } @@ -376,3 +381,23 @@ void tipc_disc_delete(struct tipc_link_req *req) kfree_skb(req->buf); kfree(req); } + +/** + * tipc_disc_reset - reset object to send periodic link setup requests + * @b_ptr: ptr to bearer issuing requests + * @dest_domain: network domain to which links can be established + */ +void tipc_disc_reset(struct tipc_bearer *b_ptr) +{ + struct tipc_link_req *req = b_ptr->link_req; + + spin_lock_bh(&req->lock); + tipc_disc_init_msg(req->buf, DSC_REQ_MSG, b_ptr); + req->bearer_id = b_ptr->identity; + req->domain = b_ptr->domain; + req->num_nodes = 0; + req->timer_intv = TIPC_LINK_REQ_INIT; + k_start_timer(&req->timer, req->timer_intv); + tipc_bearer_send(req->bearer_id, req->buf, &req->dest); + spin_unlock_bh(&req->lock); +} diff --git a/net/tipc/discover.h b/net/tipc/discover.h index 07f34729459d..515b57392f4d 100644 --- a/net/tipc/discover.h +++ b/net/tipc/discover.h @@ -41,6 +41,7 @@ struct tipc_link_req; int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest); void tipc_disc_delete(struct tipc_link_req *req); +void tipc_disc_reset(struct tipc_bearer *b_ptr); void tipc_disc_add_dest(struct tipc_link_req *req); void tipc_disc_remove_dest(struct tipc_link_req *req); void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *b_ptr); diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 67cf3f935dba..5e1426f1751f 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -1,7 +1,7 @@ /* * net/tipc/eth_media.c: Ethernet bearer support for TIPC * - * Copyright (c) 2001-2007, 2013, Ericsson AB + * Copyright (c) 2001-2007, 2013-2014, Ericsson AB * Copyright (c) 2005-2008, 2011-2013, Wind River Systems * All rights reserved. * @@ -37,39 +37,52 @@ #include "core.h" #include "bearer.h" -#define ETH_ADDR_OFFSET 4 /* message header offset of MAC address */ +#define ETH_ADDR_OFFSET 4 /* MAC addr position inside address field */ -/* convert Ethernet address to string */ -static int tipc_eth_addr2str(struct tipc_media_addr *a, char *str_buf, - int str_size) +/* Convert Ethernet address (media address format) to string */ +static int tipc_eth_addr2str(struct tipc_media_addr *addr, + char *strbuf, int bufsz) { - if (str_size < 18) /* 18 = strlen("aa:bb:cc:dd:ee:ff\0") */ + if (bufsz < 18) /* 18 = strlen("aa:bb:cc:dd:ee:ff\0") */ return 1; - sprintf(str_buf, "%pM", a->value); + sprintf(strbuf, "%pM", addr->value); return 0; } -/* convert Ethernet address format to message header format */ -static int tipc_eth_addr2msg(struct tipc_media_addr *a, char *msg_area) +/* Convert from media address format to discovery message addr format */ +static int tipc_eth_addr2msg(char *msg, struct tipc_media_addr *addr) { - memset(msg_area, 0, TIPC_MEDIA_ADDR_SIZE); - msg_area[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_ETH; - memcpy(msg_area + ETH_ADDR_OFFSET, a->value, ETH_ALEN); + memset(msg, 0, TIPC_MEDIA_ADDR_SIZE); + msg[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_ETH; + memcpy(msg + ETH_ADDR_OFFSET, addr->value, ETH_ALEN); return 0; } -/* convert message header address format to Ethernet format */ -static int tipc_eth_msg2addr(const struct tipc_bearer *tb_ptr, - struct tipc_media_addr *a, char *msg_area) +/* Convert raw mac address format to media addr format */ +static int tipc_eth_raw2addr(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *msg) { - if (msg_area[TIPC_MEDIA_TYPE_OFFSET] != TIPC_MEDIA_TYPE_ETH) - return 1; + char bcast_mac[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - tipc_l2_media_addr_set(tb_ptr, a, msg_area + ETH_ADDR_OFFSET); + memset(addr, 0, sizeof(*addr)); + ether_addr_copy(addr->value, msg); + addr->media_id = TIPC_MEDIA_TYPE_ETH; + addr->broadcast = !memcmp(addr->value, bcast_mac, ETH_ALEN); return 0; } +/* Convert discovery msg addr format to Ethernet media addr format */ +static int tipc_eth_msg2addr(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *msg) +{ + /* Skip past preamble: */ + msg += ETH_ADDR_OFFSET; + return tipc_eth_raw2addr(b, addr, msg); +} + /* Ethernet media registration info */ struct tipc_media eth_media_info = { .send_msg = tipc_l2_send_msg, @@ -78,6 +91,7 @@ struct tipc_media eth_media_info = { .addr2str = tipc_eth_addr2str, .addr2msg = tipc_eth_addr2msg, .msg2addr = tipc_eth_msg2addr, + .raw2addr = tipc_eth_raw2addr, .priority = TIPC_DEF_LINK_PRI, .tolerance = TIPC_DEF_LINK_TOL, .window = TIPC_DEF_LINK_WIN, @@ -85,4 +99,3 @@ struct tipc_media eth_media_info = { .hwaddr_len = ETH_ALEN, .name = "eth" }; - diff --git a/net/tipc/handler.c b/net/tipc/handler.c deleted file mode 100644 index 1fabf160501f..000000000000 --- a/net/tipc/handler.c +++ /dev/null @@ -1,134 +0,0 @@ -/* - * net/tipc/handler.c: TIPC signal handling - * - * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "core.h" - -struct queue_item { - struct list_head next_signal; - void (*handler) (unsigned long); - unsigned long data; -}; - -static struct kmem_cache *tipc_queue_item_cache; -static struct list_head signal_queue_head; -static DEFINE_SPINLOCK(qitem_lock); -static int handler_enabled __read_mostly; - -static void process_signal_queue(unsigned long dummy); - -static DECLARE_TASKLET_DISABLED(tipc_tasklet, process_signal_queue, 0); - - -unsigned int tipc_k_signal(Handler routine, unsigned long argument) -{ - struct queue_item *item; - - spin_lock_bh(&qitem_lock); - if (!handler_enabled) { - spin_unlock_bh(&qitem_lock); - return -ENOPROTOOPT; - } - - item = kmem_cache_alloc(tipc_queue_item_cache, GFP_ATOMIC); - if (!item) { - pr_err("Signal queue out of memory\n"); - spin_unlock_bh(&qitem_lock); - return -ENOMEM; - } - item->handler = routine; - item->data = argument; - list_add_tail(&item->next_signal, &signal_queue_head); - spin_unlock_bh(&qitem_lock); - tasklet_schedule(&tipc_tasklet); - return 0; -} - -static void process_signal_queue(unsigned long dummy) -{ - struct queue_item *__volatile__ item; - struct list_head *l, *n; - - spin_lock_bh(&qitem_lock); - list_for_each_safe(l, n, &signal_queue_head) { - item = list_entry(l, struct queue_item, next_signal); - list_del(&item->next_signal); - spin_unlock_bh(&qitem_lock); - item->handler(item->data); - spin_lock_bh(&qitem_lock); - kmem_cache_free(tipc_queue_item_cache, item); - } - spin_unlock_bh(&qitem_lock); -} - -int tipc_handler_start(void) -{ - tipc_queue_item_cache = - kmem_cache_create("tipc_queue_items", sizeof(struct queue_item), - 0, SLAB_HWCACHE_ALIGN, NULL); - if (!tipc_queue_item_cache) - return -ENOMEM; - - INIT_LIST_HEAD(&signal_queue_head); - tasklet_enable(&tipc_tasklet); - handler_enabled = 1; - return 0; -} - -void tipc_handler_stop(void) -{ - struct list_head *l, *n; - struct queue_item *item; - - spin_lock_bh(&qitem_lock); - if (!handler_enabled) { - spin_unlock_bh(&qitem_lock); - return; - } - handler_enabled = 0; - spin_unlock_bh(&qitem_lock); - - tasklet_kill(&tipc_tasklet); - - spin_lock_bh(&qitem_lock); - list_for_each_safe(l, n, &signal_queue_head) { - item = list_entry(l, struct queue_item, next_signal); - list_del(&item->next_signal); - kmem_cache_free(tipc_queue_item_cache, item); - } - spin_unlock_bh(&qitem_lock); - - kmem_cache_destroy(tipc_queue_item_cache); -} diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c index 844a77e25828..8522eef9c136 100644 --- a/net/tipc/ib_media.c +++ b/net/tipc/ib_media.c @@ -42,7 +42,7 @@ #include "core.h" #include "bearer.h" -/* convert InfiniBand address to string */ +/* convert InfiniBand address (media address format) media address to string */ static int tipc_ib_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size) { @@ -54,23 +54,35 @@ static int tipc_ib_addr2str(struct tipc_media_addr *a, char *str_buf, return 0; } -/* convert InfiniBand address format to message header format */ -static int tipc_ib_addr2msg(struct tipc_media_addr *a, char *msg_area) +/* Convert from media address format to discovery message addr format */ +static int tipc_ib_addr2msg(char *msg, struct tipc_media_addr *addr) { - memset(msg_area, 0, TIPC_MEDIA_ADDR_SIZE); - msg_area[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_IB; - memcpy(msg_area, a->value, INFINIBAND_ALEN); + memset(msg, 0, TIPC_MEDIA_ADDR_SIZE); + memcpy(msg, addr->value, INFINIBAND_ALEN); return 0; } -/* convert message header address format to InfiniBand format */ -static int tipc_ib_msg2addr(const struct tipc_bearer *tb_ptr, - struct tipc_media_addr *a, char *msg_area) +/* Convert raw InfiniBand address format to media addr format */ +static int tipc_ib_raw2addr(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *msg) { - tipc_l2_media_addr_set(tb_ptr, a, msg_area); + memset(addr, 0, sizeof(*addr)); + memcpy(addr->value, msg, INFINIBAND_ALEN); + addr->media_id = TIPC_MEDIA_TYPE_IB; + addr->broadcast = !memcmp(msg, b->bcast_addr.value, + INFINIBAND_ALEN); return 0; } +/* Convert discovery msg addr format to InfiniBand media addr format */ +static int tipc_ib_msg2addr(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *msg) +{ + return tipc_ib_raw2addr(b, addr, msg); +} + /* InfiniBand media registration info */ struct tipc_media ib_media_info = { .send_msg = tipc_l2_send_msg, @@ -79,6 +91,7 @@ struct tipc_media ib_media_info = { .addr2str = tipc_ib_addr2str, .addr2msg = tipc_ib_addr2msg, .msg2addr = tipc_ib_msg2addr, + .raw2addr = tipc_ib_raw2addr, .priority = TIPC_DEF_LINK_PRI, .tolerance = TIPC_DEF_LINK_TOL, .window = TIPC_DEF_LINK_WIN, @@ -86,4 +99,3 @@ struct tipc_media ib_media_info = { .hwaddr_len = INFINIBAND_ALEN, .name = "ib" }; - diff --git a/net/tipc/link.c b/net/tipc/link.c index c5190ab75290..fb1485dc6736 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -37,6 +37,7 @@ #include "core.h" #include "link.h" #include "port.h" +#include "socket.h" #include "name_distr.h" #include "discover.h" #include "config.h" @@ -81,15 +82,13 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf); static int tipc_link_tunnel_rcv(struct tipc_node *n_ptr, struct sk_buff **buf); static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance); -static int tipc_link_iovec_long_xmit(struct tipc_port *sender, - struct iovec const *msg_sect, - unsigned int len, u32 destnode); static void link_state_event(struct tipc_link *l_ptr, u32 event); static void link_reset_statistics(struct tipc_link *l_ptr); static void link_print(struct tipc_link *l_ptr, const char *str); -static int tipc_link_frag_xmit(struct tipc_link *l_ptr, struct sk_buff *buf); static void tipc_link_sync_xmit(struct tipc_link *l); static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf); +static int tipc_link_input(struct tipc_link *l, struct sk_buff *buf); +static int tipc_link_prepare_input(struct tipc_link *l, struct sk_buff **buf); /* * Simple link routines @@ -101,9 +100,18 @@ static unsigned int align(unsigned int i) static void link_init_max_pkt(struct tipc_link *l_ptr) { + struct tipc_bearer *b_ptr; u32 max_pkt; - max_pkt = (l_ptr->b_ptr->mtu & ~3); + rcu_read_lock(); + b_ptr = rcu_dereference_rtnl(bearer_list[l_ptr->bearer_id]); + if (!b_ptr) { + rcu_read_unlock(); + return; + } + max_pkt = (b_ptr->mtu & ~3); + rcu_read_unlock(); + if (max_pkt > MAX_MSG_SIZE) max_pkt = MAX_MSG_SIZE; @@ -248,7 +256,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, l_ptr->owner = n_ptr; l_ptr->checkpoint = 1; l_ptr->peer_session = INVALID_SESSION; - l_ptr->b_ptr = b_ptr; + l_ptr->bearer_id = b_ptr->identity; link_set_supervision_props(l_ptr, b_ptr->tolerance); l_ptr->state = RESET_UNKNOWN; @@ -263,6 +271,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, l_ptr->priority = b_ptr->priority; tipc_link_set_queue_limits(l_ptr, b_ptr->window); + l_ptr->net_plane = b_ptr->net_plane; link_init_max_pkt(l_ptr); l_ptr->next_out_no = 1; @@ -287,14 +296,14 @@ void tipc_link_delete_list(unsigned int bearer_id, bool shutting_down) rcu_read_lock(); list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) { - spin_lock_bh(&n_ptr->lock); + tipc_node_lock(n_ptr); l_ptr = n_ptr->links[bearer_id]; if (l_ptr) { tipc_link_reset(l_ptr); if (shutting_down || !tipc_node_is_up(n_ptr)) { tipc_node_detach_link(l_ptr->owner, l_ptr); tipc_link_reset_fragments(l_ptr); - spin_unlock_bh(&n_ptr->lock); + tipc_node_unlock(n_ptr); /* Nobody else can access this link now: */ del_timer_sync(&l_ptr->timer); @@ -302,12 +311,12 @@ void tipc_link_delete_list(unsigned int bearer_id, bool shutting_down) } else { /* Detach/delete when failover is finished: */ l_ptr->flags |= LINK_STOPPED; - spin_unlock_bh(&n_ptr->lock); + tipc_node_unlock(n_ptr); del_timer_sync(&l_ptr->timer); } continue; } - spin_unlock_bh(&n_ptr->lock); + tipc_node_unlock(n_ptr); } rcu_read_unlock(); } @@ -324,13 +333,15 @@ void tipc_link_delete_list(unsigned int bearer_id, bool shutting_down) static int link_schedule_port(struct tipc_link *l_ptr, u32 origport, u32 sz) { struct tipc_port *p_ptr; + struct tipc_sock *tsk; spin_lock_bh(&tipc_port_list_lock); p_ptr = tipc_port_lock(origport); if (p_ptr) { if (!list_empty(&p_ptr->wait_list)) goto exit; - p_ptr->congested = 1; + tsk = tipc_port_to_sock(p_ptr); + tsk->link_cong = 1; p_ptr->waiting_pkts = 1 + ((sz - 1) / l_ptr->max_pkt); list_add_tail(&p_ptr->wait_list, &l_ptr->waiting_ports); l_ptr->stats.link_congs++; @@ -344,6 +355,7 @@ exit: void tipc_link_wakeup_ports(struct tipc_link *l_ptr, int all) { struct tipc_port *p_ptr; + struct tipc_sock *tsk; struct tipc_port *temp_p_ptr; int win = l_ptr->queue_limit[0] - l_ptr->out_queue_size; @@ -359,10 +371,11 @@ void tipc_link_wakeup_ports(struct tipc_link *l_ptr, int all) wait_list) { if (win <= 0) break; + tsk = tipc_port_to_sock(p_ptr); list_del_init(&p_ptr->wait_list); spin_lock_bh(p_ptr->lock); - p_ptr->congested = 0; - tipc_port_wakeup(p_ptr); + tsk->link_cong = 0; + tipc_sock_wakeup(tsk); win -= p_ptr->waiting_pkts; spin_unlock_bh(p_ptr->lock); } @@ -388,9 +401,8 @@ static void link_release_outqueue(struct tipc_link *l_ptr) */ void tipc_link_reset_fragments(struct tipc_link *l_ptr) { - kfree_skb(l_ptr->reasm_head); - l_ptr->reasm_head = NULL; - l_ptr->reasm_tail = NULL; + kfree_skb(l_ptr->reasm_buf); + l_ptr->reasm_buf = NULL; } /** @@ -426,7 +438,7 @@ void tipc_link_reset(struct tipc_link *l_ptr) return; tipc_node_link_down(l_ptr->owner, l_ptr); - tipc_bearer_remove_dest(l_ptr->b_ptr, l_ptr->addr); + tipc_bearer_remove_dest(l_ptr->bearer_id, l_ptr->addr); if (was_active_link && tipc_node_active_links(l_ptr->owner)) { l_ptr->reset_checkpoint = checkpoint; @@ -464,11 +476,11 @@ void tipc_link_reset_list(unsigned int bearer_id) rcu_read_lock(); list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) { - spin_lock_bh(&n_ptr->lock); + tipc_node_lock(n_ptr); l_ptr = n_ptr->links[bearer_id]; if (l_ptr) tipc_link_reset(l_ptr); - spin_unlock_bh(&n_ptr->lock); + tipc_node_unlock(n_ptr); } rcu_read_unlock(); } @@ -477,7 +489,7 @@ static void link_activate(struct tipc_link *l_ptr) { l_ptr->next_in_no = l_ptr->stats.recv_info = 1; tipc_node_link_up(l_ptr->owner, l_ptr); - tipc_bearer_add_dest(l_ptr->b_ptr, l_ptr->addr); + tipc_bearer_add_dest(l_ptr->bearer_id, l_ptr->addr); } /** @@ -666,180 +678,142 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) } } -/* - * link_bundle_buf(): Append contents of a buffer to - * the tail of an existing one. +/* tipc_link_cong: determine return value and how to treat the + * sent buffer during link congestion. + * - For plain, errorless user data messages we keep the buffer and + * return -ELINKONG. + * - For all other messages we discard the buffer and return -EHOSTUNREACH + * - For TIPC internal messages we also reset the link */ -static int link_bundle_buf(struct tipc_link *l_ptr, struct sk_buff *bundler, - struct sk_buff *buf) +static int tipc_link_cong(struct tipc_link *link, struct sk_buff *buf) { - struct tipc_msg *bundler_msg = buf_msg(bundler); struct tipc_msg *msg = buf_msg(buf); - u32 size = msg_size(msg); - u32 bundle_size = msg_size(bundler_msg); - u32 to_pos = align(bundle_size); - u32 pad = to_pos - bundle_size; - - if (msg_user(bundler_msg) != MSG_BUNDLER) - return 0; - if (msg_type(bundler_msg) != OPEN_MSG) - return 0; - if (skb_tailroom(bundler) < (pad + size)) - return 0; - if (l_ptr->max_pkt < (to_pos + size)) - return 0; - - skb_put(bundler, pad + size); - skb_copy_to_linear_data_offset(bundler, to_pos, buf->data, size); - msg_set_size(bundler_msg, to_pos + size); - msg_set_msgcnt(bundler_msg, msg_msgcnt(bundler_msg) + 1); - kfree_skb(buf); - l_ptr->stats.sent_bundled++; - return 1; -} - -static void link_add_to_outqueue(struct tipc_link *l_ptr, - struct sk_buff *buf, - struct tipc_msg *msg) -{ - u32 ack = mod(l_ptr->next_in_no - 1); - u32 seqno = mod(l_ptr->next_out_no++); - - msg_set_word(msg, 2, ((ack << 16) | seqno)); - msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); - buf->next = NULL; - if (l_ptr->first_out) { - l_ptr->last_out->next = buf; - l_ptr->last_out = buf; - } else - l_ptr->first_out = l_ptr->last_out = buf; - - l_ptr->out_queue_size++; - if (l_ptr->out_queue_size > l_ptr->stats.max_queue_sz) - l_ptr->stats.max_queue_sz = l_ptr->out_queue_size; -} - -static void link_add_chain_to_outqueue(struct tipc_link *l_ptr, - struct sk_buff *buf_chain, - u32 long_msgno) -{ - struct sk_buff *buf; - struct tipc_msg *msg; + uint psz = msg_size(msg); + uint imp = tipc_msg_tot_importance(msg); + u32 oport = msg_tot_origport(msg); - if (!l_ptr->next_out) - l_ptr->next_out = buf_chain; - while (buf_chain) { - buf = buf_chain; - buf_chain = buf_chain->next; - - msg = buf_msg(buf); - msg_set_long_msgno(msg, long_msgno); - link_add_to_outqueue(l_ptr, buf, msg); + if (likely(imp <= TIPC_CRITICAL_IMPORTANCE)) { + if (!msg_errcode(msg) && !msg_reroute_cnt(msg)) { + link_schedule_port(link, oport, psz); + return -ELINKCONG; + } + } else { + pr_warn("%s<%s>, send queue full", link_rst_msg, link->name); + tipc_link_reset(link); } + kfree_skb_list(buf); + return -EHOSTUNREACH; } -/* - * tipc_link_xmit() is the 'full path' for messages, called from - * inside TIPC when the 'fast path' in tipc_send_xmit - * has failed, and from link_send() +/** + * __tipc_link_xmit(): same as tipc_link_xmit, but destlink is known & locked + * @link: link to use + * @buf: chain of buffers containing message + * Consumes the buffer chain, except when returning -ELINKCONG + * Returns 0 if success, otherwise errno: -ELINKCONG, -EMSGSIZE (plain socket + * user data messages) or -EHOSTUNREACH (all other messages/senders) + * Only the socket functions tipc_send_stream() and tipc_send_packet() need + * to act on the return value, since they may need to do more send attempts. */ -int __tipc_link_xmit(struct tipc_link *l_ptr, struct sk_buff *buf) +int __tipc_link_xmit(struct tipc_link *link, struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); - u32 size = msg_size(msg); - u32 dsz = msg_data_sz(msg); - u32 queue_size = l_ptr->out_queue_size; - u32 imp = tipc_msg_tot_importance(msg); - u32 queue_limit = l_ptr->queue_limit[imp]; - u32 max_packet = l_ptr->max_pkt; - - /* Match msg importance against queue limits: */ - if (unlikely(queue_size >= queue_limit)) { - if (imp <= TIPC_CRITICAL_IMPORTANCE) { - link_schedule_port(l_ptr, msg_origport(msg), size); - kfree_skb(buf); - return -ELINKCONG; - } - kfree_skb(buf); - if (imp > CONN_MANAGER) { - pr_warn("%s<%s>, send queue full", link_rst_msg, - l_ptr->name); - tipc_link_reset(l_ptr); + uint psz = msg_size(msg); + uint qsz = link->out_queue_size; + uint sndlim = link->queue_limit[0]; + uint imp = tipc_msg_tot_importance(msg); + uint mtu = link->max_pkt; + uint ack = mod(link->next_in_no - 1); + uint seqno = link->next_out_no; + uint bc_last_in = link->owner->bclink.last_in; + struct tipc_media_addr *addr = &link->media_addr; + struct sk_buff *next = buf->next; + + /* Match queue limits against msg importance: */ + if (unlikely(qsz >= link->queue_limit[imp])) + return tipc_link_cong(link, buf); + + /* Has valid packet limit been used ? */ + if (unlikely(psz > mtu)) { + kfree_skb_list(buf); + return -EMSGSIZE; + } + + /* Prepare each packet for sending, and add to outqueue: */ + while (buf) { + next = buf->next; + msg = buf_msg(buf); + msg_set_word(msg, 2, ((ack << 16) | mod(seqno))); + msg_set_bcast_ack(msg, bc_last_in); + + if (!link->first_out) { + link->first_out = buf; + } else if (qsz < sndlim) { + link->last_out->next = buf; + } else if (tipc_msg_bundle(link->last_out, buf, mtu)) { + link->stats.sent_bundled++; + buf = next; + next = buf->next; + continue; + } else if (tipc_msg_make_bundle(&buf, mtu, link->addr)) { + link->stats.sent_bundled++; + link->stats.sent_bundles++; + link->last_out->next = buf; + if (!link->next_out) + link->next_out = buf; + } else { + link->last_out->next = buf; + if (!link->next_out) + link->next_out = buf; } - return dsz; - } - /* Fragmentation needed ? */ - if (size > max_packet) - return tipc_link_frag_xmit(l_ptr, buf); - - /* Packet can be queued or sent. */ - if (likely(!link_congested(l_ptr))) { - link_add_to_outqueue(l_ptr, buf, msg); - - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); - l_ptr->unacked_window = 0; - return dsz; - } - /* Congestion: can message be bundled ? */ - if ((msg_user(msg) != CHANGEOVER_PROTOCOL) && - (msg_user(msg) != MSG_FRAGMENTER)) { - - /* Try adding message to an existing bundle */ - if (l_ptr->next_out && - link_bundle_buf(l_ptr, l_ptr->last_out, buf)) - return dsz; - - /* Try creating a new bundle */ - if (size <= max_packet * 2 / 3) { - struct sk_buff *bundler = tipc_buf_acquire(max_packet); - struct tipc_msg bundler_hdr; - - if (bundler) { - tipc_msg_init(&bundler_hdr, MSG_BUNDLER, OPEN_MSG, - INT_H_SIZE, l_ptr->addr); - skb_copy_to_linear_data(bundler, &bundler_hdr, - INT_H_SIZE); - skb_trim(bundler, INT_H_SIZE); - link_bundle_buf(l_ptr, bundler, buf); - buf = bundler; - msg = buf_msg(buf); - l_ptr->stats.sent_bundles++; - } + /* Send packet if possible: */ + if (likely(++qsz <= sndlim)) { + tipc_bearer_send(link->bearer_id, buf, addr); + link->next_out = next; + link->unacked_window = 0; } + seqno++; + link->last_out = buf; + buf = next; } - if (!l_ptr->next_out) - l_ptr->next_out = buf; - link_add_to_outqueue(l_ptr, buf, msg); - return dsz; + link->next_out_no = seqno; + link->out_queue_size = qsz; + return 0; } -/* - * tipc_link_xmit(): same as __tipc_link_xmit(), but the link to use - * has not been selected yet, and the the owner node is not locked - * Called by TIPC internal users, e.g. the name distributor +/** + * tipc_link_xmit() is the general link level function for message sending + * @buf: chain of buffers containing message + * @dsz: amount of user data to be sent + * @dnode: address of destination node + * @selector: a number used for deterministic link selection + * Consumes the buffer chain, except when returning -ELINKCONG + * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE */ -int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector) +int tipc_link_xmit(struct sk_buff *buf, u32 dnode, u32 selector) { - struct tipc_link *l_ptr; - struct tipc_node *n_ptr; - int res = -ELINKCONG; + struct tipc_link *link = NULL; + struct tipc_node *node; + int rc = -EHOSTUNREACH; - read_lock_bh(&tipc_net_lock); - n_ptr = tipc_node_find(dest); - if (n_ptr) { - tipc_node_lock(n_ptr); - l_ptr = n_ptr->active_links[selector & 1]; - if (l_ptr) - res = __tipc_link_xmit(l_ptr, buf); - else - kfree_skb(buf); - tipc_node_unlock(n_ptr); - } else { - kfree_skb(buf); + node = tipc_node_find(dnode); + if (node) { + tipc_node_lock(node); + link = node->active_links[selector & 1]; + if (link) + rc = __tipc_link_xmit(link, buf); + tipc_node_unlock(node); } - read_unlock_bh(&tipc_net_lock); - return res; + + if (link) + return rc; + + if (likely(in_own_node(dnode))) + return tipc_sk_rcv(buf); + + kfree_skb_list(buf); + return rc; } /* @@ -850,7 +824,7 @@ int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector) * * Called with node locked */ -static void tipc_link_sync_xmit(struct tipc_link *l) +static void tipc_link_sync_xmit(struct tipc_link *link) { struct sk_buff *buf; struct tipc_msg *msg; @@ -860,10 +834,9 @@ static void tipc_link_sync_xmit(struct tipc_link *l) return; msg = buf_msg(buf); - tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, l->addr); - msg_set_last_bcast(msg, l->owner->bclink.acked); - link_add_chain_to_outqueue(l, buf, 0); - tipc_link_push_queue(l); + tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, link->addr); + msg_set_last_bcast(msg, link->owner->bclink.acked); + __tipc_link_xmit(link, buf); } /* @@ -884,299 +857,6 @@ static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf) } /* - * tipc_link_names_xmit - send name table entries to new neighbor - * - * Send routine for bulk delivery of name table messages when contact - * with a new neighbor occurs. No link congestion checking is performed - * because name table messages *must* be delivered. The messages must be - * small enough not to require fragmentation. - * Called without any locks held. - */ -void tipc_link_names_xmit(struct list_head *message_list, u32 dest) -{ - struct tipc_node *n_ptr; - struct tipc_link *l_ptr; - struct sk_buff *buf; - struct sk_buff *temp_buf; - - if (list_empty(message_list)) - return; - - read_lock_bh(&tipc_net_lock); - n_ptr = tipc_node_find(dest); - if (n_ptr) { - tipc_node_lock(n_ptr); - l_ptr = n_ptr->active_links[0]; - if (l_ptr) { - /* convert circular list to linear list */ - ((struct sk_buff *)message_list->prev)->next = NULL; - link_add_chain_to_outqueue(l_ptr, - (struct sk_buff *)message_list->next, 0); - tipc_link_push_queue(l_ptr); - INIT_LIST_HEAD(message_list); - } - tipc_node_unlock(n_ptr); - } - read_unlock_bh(&tipc_net_lock); - - /* discard the messages if they couldn't be sent */ - list_for_each_safe(buf, temp_buf, ((struct sk_buff *)message_list)) { - list_del((struct list_head *)buf); - kfree_skb(buf); - } -} - -/* - * tipc_link_xmit_fast: Entry for data messages where the - * destination link is known and the header is complete, - * inclusive total message length. Very time critical. - * Link is locked. Returns user data length. - */ -static int tipc_link_xmit_fast(struct tipc_link *l_ptr, struct sk_buff *buf, - u32 *used_max_pkt) -{ - struct tipc_msg *msg = buf_msg(buf); - int res = msg_data_sz(msg); - - if (likely(!link_congested(l_ptr))) { - if (likely(msg_size(msg) <= l_ptr->max_pkt)) { - link_add_to_outqueue(l_ptr, buf, msg); - tipc_bearer_send(l_ptr->b_ptr, buf, - &l_ptr->media_addr); - l_ptr->unacked_window = 0; - return res; - } - else - *used_max_pkt = l_ptr->max_pkt; - } - return __tipc_link_xmit(l_ptr, buf); /* All other cases */ -} - -/* - * tipc_link_iovec_xmit_fast: Entry for messages where the - * destination processor is known and the header is complete, - * except for total message length. - * Returns user data length or errno. - */ -int tipc_link_iovec_xmit_fast(struct tipc_port *sender, - struct iovec const *msg_sect, - unsigned int len, u32 destaddr) -{ - struct tipc_msg *hdr = &sender->phdr; - struct tipc_link *l_ptr; - struct sk_buff *buf; - struct tipc_node *node; - int res; - u32 selector = msg_origport(hdr) & 1; - -again: - /* - * Try building message using port's max_pkt hint. - * (Must not hold any locks while building message.) - */ - res = tipc_msg_build(hdr, msg_sect, len, sender->max_pkt, &buf); - /* Exit if build request was invalid */ - if (unlikely(res < 0)) - return res; - - read_lock_bh(&tipc_net_lock); - node = tipc_node_find(destaddr); - if (likely(node)) { - tipc_node_lock(node); - l_ptr = node->active_links[selector]; - if (likely(l_ptr)) { - if (likely(buf)) { - res = tipc_link_xmit_fast(l_ptr, buf, - &sender->max_pkt); -exit: - tipc_node_unlock(node); - read_unlock_bh(&tipc_net_lock); - return res; - } - - /* Exit if link (or bearer) is congested */ - if (link_congested(l_ptr)) { - res = link_schedule_port(l_ptr, - sender->ref, res); - goto exit; - } - - /* - * Message size exceeds max_pkt hint; update hint, - * then re-try fast path or fragment the message - */ - sender->max_pkt = l_ptr->max_pkt; - tipc_node_unlock(node); - read_unlock_bh(&tipc_net_lock); - - - if ((msg_hdr_sz(hdr) + res) <= sender->max_pkt) - goto again; - - return tipc_link_iovec_long_xmit(sender, msg_sect, - len, destaddr); - } - tipc_node_unlock(node); - } - read_unlock_bh(&tipc_net_lock); - - /* Couldn't find a link to the destination node */ - kfree_skb(buf); - tipc_port_iovec_reject(sender, hdr, msg_sect, len, TIPC_ERR_NO_NODE); - return -ENETUNREACH; -} - -/* - * tipc_link_iovec_long_xmit(): Entry for long messages where the - * destination node is known and the header is complete, - * inclusive total message length. - * Link and bearer congestion status have been checked to be ok, - * and are ignored if they change. - * - * Note that fragments do not use the full link MTU so that they won't have - * to undergo refragmentation if link changeover causes them to be sent - * over another link with an additional tunnel header added as prefix. - * (Refragmentation will still occur if the other link has a smaller MTU.) - * - * Returns user data length or errno. - */ -static int tipc_link_iovec_long_xmit(struct tipc_port *sender, - struct iovec const *msg_sect, - unsigned int len, u32 destaddr) -{ - struct tipc_link *l_ptr; - struct tipc_node *node; - struct tipc_msg *hdr = &sender->phdr; - u32 dsz = len; - u32 max_pkt, fragm_sz, rest; - struct tipc_msg fragm_hdr; - struct sk_buff *buf, *buf_chain, *prev; - u32 fragm_crs, fragm_rest, hsz, sect_rest; - const unchar __user *sect_crs; - int curr_sect; - u32 fragm_no; - int res = 0; - -again: - fragm_no = 1; - max_pkt = sender->max_pkt - INT_H_SIZE; - /* leave room for tunnel header in case of link changeover */ - fragm_sz = max_pkt - INT_H_SIZE; - /* leave room for fragmentation header in each fragment */ - rest = dsz; - fragm_crs = 0; - fragm_rest = 0; - sect_rest = 0; - sect_crs = NULL; - curr_sect = -1; - - /* Prepare reusable fragment header */ - tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT, - INT_H_SIZE, msg_destnode(hdr)); - msg_set_size(&fragm_hdr, max_pkt); - msg_set_fragm_no(&fragm_hdr, 1); - - /* Prepare header of first fragment */ - buf_chain = buf = tipc_buf_acquire(max_pkt); - if (!buf) - return -ENOMEM; - buf->next = NULL; - skb_copy_to_linear_data(buf, &fragm_hdr, INT_H_SIZE); - hsz = msg_hdr_sz(hdr); - skb_copy_to_linear_data_offset(buf, INT_H_SIZE, hdr, hsz); - - /* Chop up message */ - fragm_crs = INT_H_SIZE + hsz; - fragm_rest = fragm_sz - hsz; - - do { /* For all sections */ - u32 sz; - - if (!sect_rest) { - sect_rest = msg_sect[++curr_sect].iov_len; - sect_crs = msg_sect[curr_sect].iov_base; - } - - if (sect_rest < fragm_rest) - sz = sect_rest; - else - sz = fragm_rest; - - if (copy_from_user(buf->data + fragm_crs, sect_crs, sz)) { - res = -EFAULT; -error: - kfree_skb_list(buf_chain); - return res; - } - sect_crs += sz; - sect_rest -= sz; - fragm_crs += sz; - fragm_rest -= sz; - rest -= sz; - - if (!fragm_rest && rest) { - - /* Initiate new fragment: */ - if (rest <= fragm_sz) { - fragm_sz = rest; - msg_set_type(&fragm_hdr, LAST_FRAGMENT); - } else { - msg_set_type(&fragm_hdr, FRAGMENT); - } - msg_set_size(&fragm_hdr, fragm_sz + INT_H_SIZE); - msg_set_fragm_no(&fragm_hdr, ++fragm_no); - prev = buf; - buf = tipc_buf_acquire(fragm_sz + INT_H_SIZE); - if (!buf) { - res = -ENOMEM; - goto error; - } - - buf->next = NULL; - prev->next = buf; - skb_copy_to_linear_data(buf, &fragm_hdr, INT_H_SIZE); - fragm_crs = INT_H_SIZE; - fragm_rest = fragm_sz; - } - } while (rest > 0); - - /* - * Now we have a buffer chain. Select a link and check - * that packet size is still OK - */ - node = tipc_node_find(destaddr); - if (likely(node)) { - tipc_node_lock(node); - l_ptr = node->active_links[sender->ref & 1]; - if (!l_ptr) { - tipc_node_unlock(node); - goto reject; - } - if (l_ptr->max_pkt < max_pkt) { - sender->max_pkt = l_ptr->max_pkt; - tipc_node_unlock(node); - kfree_skb_list(buf_chain); - goto again; - } - } else { -reject: - kfree_skb_list(buf_chain); - tipc_port_iovec_reject(sender, hdr, msg_sect, len, - TIPC_ERR_NO_NODE); - return -ENETUNREACH; - } - - /* Append chain of fragments to send queue & send them */ - l_ptr->long_msg_seq_no++; - link_add_chain_to_outqueue(l_ptr, buf_chain, l_ptr->long_msg_seq_no); - l_ptr->stats.sent_fragments += fragm_no; - l_ptr->stats.sent_fragmented++; - tipc_link_push_queue(l_ptr); - tipc_node_unlock(node); - return dsz; -} - -/* * tipc_link_push_packet: Push one unsent packet to the media */ static u32 tipc_link_push_packet(struct tipc_link *l_ptr) @@ -1204,7 +884,7 @@ static u32 tipc_link_push_packet(struct tipc_link *l_ptr) if (r_q_size && buf) { msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1)); msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in); - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); + tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr); l_ptr->retransm_queue_head = mod(++r_q_head); l_ptr->retransm_queue_size = --r_q_size; l_ptr->stats.retransmitted++; @@ -1216,7 +896,7 @@ static u32 tipc_link_push_packet(struct tipc_link *l_ptr) if (buf) { msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1)); msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in); - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); + tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr); l_ptr->unacked_window = 0; kfree_skb(buf); l_ptr->proto_msg_queue = NULL; @@ -1233,9 +913,10 @@ static u32 tipc_link_push_packet(struct tipc_link *l_ptr) if (mod(next - first) < l_ptr->queue_limit[0]) { msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); + tipc_bearer_send(l_ptr->bearer_id, buf, + &l_ptr->media_addr); if (msg_user(msg) == MSG_BUNDLER) - msg_set_type(msg, CLOSED_MSG); + msg_set_type(msg, BUNDLE_CLOSED); l_ptr->next_out = buf->next; return 0; } @@ -1256,33 +937,24 @@ void tipc_link_push_queue(struct tipc_link *l_ptr) } while (!res); } -static void link_reset_all(unsigned long addr) +void tipc_link_reset_all(struct tipc_node *node) { - struct tipc_node *n_ptr; char addr_string[16]; u32 i; - read_lock_bh(&tipc_net_lock); - n_ptr = tipc_node_find((u32)addr); - if (!n_ptr) { - read_unlock_bh(&tipc_net_lock); - return; /* node no longer exists */ - } - - tipc_node_lock(n_ptr); + tipc_node_lock(node); pr_warn("Resetting all links to %s\n", - tipc_addr_string_fill(addr_string, n_ptr->addr)); + tipc_addr_string_fill(addr_string, node->addr)); for (i = 0; i < MAX_BEARERS; i++) { - if (n_ptr->links[i]) { - link_print(n_ptr->links[i], "Resetting link\n"); - tipc_link_reset(n_ptr->links[i]); + if (node->links[i]) { + link_print(node->links[i], "Resetting link\n"); + tipc_link_reset(node->links[i]); } } - tipc_node_unlock(n_ptr); - read_unlock_bh(&tipc_net_lock); + tipc_node_unlock(node); } static void link_retransmit_failure(struct tipc_link *l_ptr, @@ -1319,10 +991,9 @@ static void link_retransmit_failure(struct tipc_link *l_ptr, n_ptr->bclink.oos_state, n_ptr->bclink.last_sent); - tipc_k_signal((Handler)link_reset_all, (unsigned long)n_ptr->addr); - tipc_node_unlock(n_ptr); + tipc_bclink_set_flags(TIPC_BCLINK_RESET); l_ptr->stale_count = 0; } } @@ -1352,7 +1023,7 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *buf, msg = buf_msg(buf); msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); + tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr); buf = buf->next; retransmits--; l_ptr->stats.retransmitted++; @@ -1440,14 +1111,13 @@ static int link_recv_buf_validate(struct sk_buff *buf) /** * tipc_rcv - process TIPC packets/messages arriving from off-node * @head: pointer to message buffer chain - * @tb_ptr: pointer to bearer message arrived on + * @b_ptr: pointer to bearer message arrived on * * Invoked with no locks held. Bearer pointer must point to a valid bearer * structure (i.e. cannot be NULL), but bearer can be inactive. */ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) { - read_lock_bh(&tipc_net_lock); while (head) { struct tipc_node *n_ptr; struct tipc_link *l_ptr; @@ -1497,14 +1167,14 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) goto unlock_discard; /* Verify that communication with node is currently allowed */ - if ((n_ptr->block_setup & WAIT_PEER_DOWN) && - msg_user(msg) == LINK_PROTOCOL && - (msg_type(msg) == RESET_MSG || - msg_type(msg) == ACTIVATE_MSG) && - !msg_redundant_link(msg)) - n_ptr->block_setup &= ~WAIT_PEER_DOWN; - - if (n_ptr->block_setup) + if ((n_ptr->action_flags & TIPC_WAIT_PEER_LINKS_DOWN) && + msg_user(msg) == LINK_PROTOCOL && + (msg_type(msg) == RESET_MSG || + msg_type(msg) == ACTIVATE_MSG) && + !msg_redundant_link(msg)) + n_ptr->action_flags &= ~TIPC_WAIT_PEER_LINKS_DOWN; + + if (tipc_node_blocked(n_ptr)) goto unlock_discard; /* Validate message sequence number info */ @@ -1535,11 +1205,6 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) if (unlikely(!list_empty(&l_ptr->waiting_ports))) tipc_link_wakeup_ports(l_ptr, 0); - if (unlikely(++l_ptr->unacked_window >= TIPC_MIN_LINK_WIN)) { - l_ptr->stats.sent_acks++; - tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0, 0); - } - /* Process the incoming packet */ if (unlikely(!link_working_working(l_ptr))) { if (msg_user(msg) == LINK_PROTOCOL) { @@ -1573,69 +1238,99 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) if (unlikely(l_ptr->oldest_deferred_in)) head = link_insert_deferred_queue(l_ptr, head); - /* Deliver packet/message to correct user: */ - if (unlikely(msg_user(msg) == CHANGEOVER_PROTOCOL)) { - if (!tipc_link_tunnel_rcv(n_ptr, &buf)) { - tipc_node_unlock(n_ptr); - continue; - } - msg = buf_msg(buf); - } else if (msg_user(msg) == MSG_FRAGMENTER) { - int rc; - - l_ptr->stats.recv_fragments++; - rc = tipc_link_frag_rcv(&l_ptr->reasm_head, - &l_ptr->reasm_tail, - &buf); - if (rc == LINK_REASM_COMPLETE) { - l_ptr->stats.recv_fragmented++; - msg = buf_msg(buf); - } else { - if (rc == LINK_REASM_ERROR) - tipc_link_reset(l_ptr); - tipc_node_unlock(n_ptr); - continue; - } + if (unlikely(++l_ptr->unacked_window >= TIPC_MIN_LINK_WIN)) { + l_ptr->stats.sent_acks++; + tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0, 0); } - switch (msg_user(msg)) { - case TIPC_LOW_IMPORTANCE: - case TIPC_MEDIUM_IMPORTANCE: - case TIPC_HIGH_IMPORTANCE: - case TIPC_CRITICAL_IMPORTANCE: - tipc_node_unlock(n_ptr); - tipc_port_rcv(buf); - continue; - case MSG_BUNDLER: - l_ptr->stats.recv_bundles++; - l_ptr->stats.recv_bundled += msg_msgcnt(msg); - tipc_node_unlock(n_ptr); - tipc_link_bundle_rcv(buf); - continue; - case NAME_DISTRIBUTOR: - n_ptr->bclink.recv_permitted = true; - tipc_node_unlock(n_ptr); - tipc_named_rcv(buf); - continue; - case CONN_MANAGER: + if (tipc_link_prepare_input(l_ptr, &buf)) { tipc_node_unlock(n_ptr); - tipc_port_proto_rcv(buf); continue; - case BCAST_PROTOCOL: - tipc_link_sync_rcv(n_ptr, buf); - break; - default: - kfree_skb(buf); - break; } tipc_node_unlock(n_ptr); + msg = buf_msg(buf); + if (tipc_link_input(l_ptr, buf) != 0) + goto discard; continue; unlock_discard: tipc_node_unlock(n_ptr); discard: kfree_skb(buf); } - read_unlock_bh(&tipc_net_lock); +} + +/** + * tipc_link_prepare_input - process TIPC link messages + * + * returns nonzero if the message was consumed + * + * Node lock must be held + */ +static int tipc_link_prepare_input(struct tipc_link *l, struct sk_buff **buf) +{ + struct tipc_node *n; + struct tipc_msg *msg; + int res = -EINVAL; + + n = l->owner; + msg = buf_msg(*buf); + switch (msg_user(msg)) { + case CHANGEOVER_PROTOCOL: + if (tipc_link_tunnel_rcv(n, buf)) + res = 0; + break; + case MSG_FRAGMENTER: + l->stats.recv_fragments++; + if (tipc_buf_append(&l->reasm_buf, buf)) { + l->stats.recv_fragmented++; + res = 0; + } else if (!l->reasm_buf) { + tipc_link_reset(l); + } + break; + case MSG_BUNDLER: + l->stats.recv_bundles++; + l->stats.recv_bundled += msg_msgcnt(msg); + res = 0; + break; + case NAME_DISTRIBUTOR: + n->bclink.recv_permitted = true; + res = 0; + break; + case BCAST_PROTOCOL: + tipc_link_sync_rcv(n, *buf); + break; + default: + res = 0; + } + return res; +} +/** + * tipc_link_input - Deliver message too higher layers + */ +static int tipc_link_input(struct tipc_link *l, struct sk_buff *buf) +{ + struct tipc_msg *msg = buf_msg(buf); + int res = 0; + + switch (msg_user(msg)) { + case TIPC_LOW_IMPORTANCE: + case TIPC_MEDIUM_IMPORTANCE: + case TIPC_HIGH_IMPORTANCE: + case TIPC_CRITICAL_IMPORTANCE: + case CONN_MANAGER: + tipc_sk_rcv(buf); + break; + case NAME_DISTRIBUTOR: + tipc_named_rcv(buf); + break; + case MSG_BUNDLER: + tipc_link_bundle_rcv(buf); + break; + default: + res = -EINVAL; + } + return res; } /** @@ -1747,12 +1442,12 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, return; /* Abort non-RESET send if communication with node is prohibited */ - if ((l_ptr->owner->block_setup) && (msg_typ != RESET_MSG)) + if ((tipc_node_blocked(l_ptr->owner)) && (msg_typ != RESET_MSG)) return; /* Create protocol message with "out-of-sequence" sequence number */ msg_set_type(msg, msg_typ); - msg_set_net_plane(msg, l_ptr->b_ptr->net_plane); + msg_set_net_plane(msg, l_ptr->net_plane); msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); msg_set_last_bcast(msg, tipc_bclink_get_last_sent()); @@ -1818,7 +1513,7 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg)); buf->priority = TC_PRIO_CONTROL; - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); + tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr); l_ptr->unacked_window = 0; kfree_skb(buf); } @@ -1840,12 +1535,9 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf) if (l_ptr->exp_msg_count) goto exit; - /* record unnumbered packet arrival (force mismatch on next timeout) */ - l_ptr->checkpoint--; - - if (l_ptr->b_ptr->net_plane != msg_net_plane(msg)) + if (l_ptr->net_plane != msg_net_plane(msg)) if (tipc_own_addr > msg_prevnode(msg)) - l_ptr->b_ptr->net_plane = msg_net_plane(msg); + l_ptr->net_plane = msg_net_plane(msg); switch (msg_type(msg)) { @@ -1862,7 +1554,7 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf) * peer has lost contact -- don't allow peer's links * to reactivate before we recognize loss & clean up */ - l_ptr->owner->block_setup = WAIT_NODE_DOWN; + l_ptr->owner->action_flags |= TIPC_WAIT_OWN_LINKS_DOWN; } link_state_event(l_ptr, RESET_MSG); @@ -1918,6 +1610,10 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf) tipc_link_reset(l_ptr); /* Enforce change to take effect */ break; } + + /* Record reception; force mismatch at next timeout: */ + l_ptr->checkpoint--; + link_state_event(l_ptr, TRAFFIC_MSG_EVT); l_ptr->stats.recv_states++; if (link_reset_unknown(l_ptr)) @@ -2177,9 +1873,7 @@ static struct sk_buff *tipc_link_failover_rcv(struct tipc_link *l_ptr, } if (msg_user(msg) == MSG_FRAGMENTER) { l_ptr->stats.recv_fragments++; - tipc_link_frag_rcv(&l_ptr->reasm_head, - &l_ptr->reasm_tail, - &buf); + tipc_buf_append(&l_ptr->reasm_buf, &buf); } } exit: @@ -2232,6 +1926,7 @@ void tipc_link_bundle_rcv(struct sk_buff *buf) u32 msgcount = msg_msgcnt(buf_msg(buf)); u32 pos = INT_H_SIZE; struct sk_buff *obuf; + struct tipc_msg *omsg; while (msgcount--) { obuf = buf_extract(buf, pos); @@ -2239,129 +1934,18 @@ void tipc_link_bundle_rcv(struct sk_buff *buf) pr_warn("Link unable to unbundle message(s)\n"); break; } - pos += align(msg_size(buf_msg(obuf))); - tipc_net_route_msg(obuf); - } - kfree_skb(buf); -} - -/* - * Fragmentation/defragmentation: - */ - -/* - * tipc_link_frag_xmit: Entry for buffers needing fragmentation. - * The buffer is complete, inclusive total message length. - * Returns user data length. - */ -static int tipc_link_frag_xmit(struct tipc_link *l_ptr, struct sk_buff *buf) -{ - struct sk_buff *buf_chain = NULL; - struct sk_buff *buf_chain_tail = (struct sk_buff *)&buf_chain; - struct tipc_msg *inmsg = buf_msg(buf); - struct tipc_msg fragm_hdr; - u32 insize = msg_size(inmsg); - u32 dsz = msg_data_sz(inmsg); - unchar *crs = buf->data; - u32 rest = insize; - u32 pack_sz = l_ptr->max_pkt; - u32 fragm_sz = pack_sz - INT_H_SIZE; - u32 fragm_no = 0; - u32 destaddr; - - if (msg_short(inmsg)) - destaddr = l_ptr->addr; - else - destaddr = msg_destnode(inmsg); - - /* Prepare reusable fragment header: */ - tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT, - INT_H_SIZE, destaddr); - - /* Chop up message: */ - while (rest > 0) { - struct sk_buff *fragm; - - if (rest <= fragm_sz) { - fragm_sz = rest; - msg_set_type(&fragm_hdr, LAST_FRAGMENT); - } - fragm = tipc_buf_acquire(fragm_sz + INT_H_SIZE); - if (fragm == NULL) { - kfree_skb(buf); - kfree_skb_list(buf_chain); - return -ENOMEM; + omsg = buf_msg(obuf); + pos += align(msg_size(omsg)); + if (msg_isdata(omsg) || (msg_user(omsg) == CONN_MANAGER)) { + tipc_sk_rcv(obuf); + } else if (msg_user(omsg) == NAME_DISTRIBUTOR) { + tipc_named_rcv(obuf); + } else { + pr_warn("Illegal bundled msg: %u\n", msg_user(omsg)); + kfree_skb(obuf); } - msg_set_size(&fragm_hdr, fragm_sz + INT_H_SIZE); - fragm_no++; - msg_set_fragm_no(&fragm_hdr, fragm_no); - skb_copy_to_linear_data(fragm, &fragm_hdr, INT_H_SIZE); - skb_copy_to_linear_data_offset(fragm, INT_H_SIZE, crs, - fragm_sz); - buf_chain_tail->next = fragm; - buf_chain_tail = fragm; - - rest -= fragm_sz; - crs += fragm_sz; - msg_set_type(&fragm_hdr, FRAGMENT); } kfree_skb(buf); - - /* Append chain of fragments to send queue & send them */ - l_ptr->long_msg_seq_no++; - link_add_chain_to_outqueue(l_ptr, buf_chain, l_ptr->long_msg_seq_no); - l_ptr->stats.sent_fragments += fragm_no; - l_ptr->stats.sent_fragmented++; - tipc_link_push_queue(l_ptr); - - return dsz; -} - -/* tipc_link_frag_rcv(): Called with node lock on. Returns - * the reassembled buffer if message is complete. - */ -int tipc_link_frag_rcv(struct sk_buff **head, struct sk_buff **tail, - struct sk_buff **fbuf) -{ - struct sk_buff *frag = *fbuf; - struct tipc_msg *msg = buf_msg(frag); - u32 fragid = msg_type(msg); - bool headstolen; - int delta; - - skb_pull(frag, msg_hdr_sz(msg)); - if (fragid == FIRST_FRAGMENT) { - if (*head || skb_unclone(frag, GFP_ATOMIC)) - goto out_free; - *head = frag; - skb_frag_list_init(*head); - *fbuf = NULL; - return 0; - } else if (*head && - skb_try_coalesce(*head, frag, &headstolen, &delta)) { - kfree_skb_partial(frag, headstolen); - } else { - if (!*head) - goto out_free; - if (!skb_has_frag_list(*head)) - skb_shinfo(*head)->frag_list = frag; - else - (*tail)->next = frag; - *tail = frag; - (*head)->truesize += frag->truesize; - } - if (fragid == LAST_FRAGMENT) { - *fbuf = *head; - *tail = *head = NULL; - return LINK_REASM_COMPLETE; - } - *fbuf = NULL; - return 0; -out_free: - pr_warn_ratelimited("Link unable to reassemble fragmented message\n"); - kfree_skb(*fbuf); - *fbuf = NULL; - return LINK_REASM_ERROR; } static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance) @@ -2397,8 +1981,6 @@ void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window) /* tipc_link_find_owner - locate owner node of link by link's name * @name: pointer to link name string * @bearer_id: pointer to index in 'node->links' array where the link was found. - * Caller must hold 'tipc_net_lock' to ensure node and bearer are not deleted; - * this also prevents link deletion. * * Returns pointer to node owning the link, or 0 if no matching link is found. */ @@ -2460,7 +2042,7 @@ static int link_value_is_valid(u16 cmd, u32 new_value) * @new_value: new value of link, bearer, or media setting * @cmd: which link, bearer, or media attribute to set (TIPC_CMD_SET_LINK_*) * - * Caller must hold 'tipc_net_lock' to ensure link/bearer/media is not deleted. + * Caller must hold RTNL lock to ensure link/bearer/media is not deleted. * * Returns 0 if value updated and negative value on error. */ @@ -2566,9 +2148,7 @@ struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space " (cannot change setting on broadcast link)"); } - read_lock_bh(&tipc_net_lock); res = link_cmd_set_value(args->name, new_value, cmd); - read_unlock_bh(&tipc_net_lock); if (res) return tipc_cfg_reply_error_string("cannot change link setting"); @@ -2602,22 +2182,18 @@ struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_ return tipc_cfg_reply_error_string("link not found"); return tipc_cfg_reply_none(); } - read_lock_bh(&tipc_net_lock); node = tipc_link_find_owner(link_name, &bearer_id); - if (!node) { - read_unlock_bh(&tipc_net_lock); + if (!node) return tipc_cfg_reply_error_string("link not found"); - } + tipc_node_lock(node); l_ptr = node->links[bearer_id]; if (!l_ptr) { tipc_node_unlock(node); - read_unlock_bh(&tipc_net_lock); return tipc_cfg_reply_error_string("link not found"); } link_reset_statistics(l_ptr); tipc_node_unlock(node); - read_unlock_bh(&tipc_net_lock); return tipc_cfg_reply_none(); } @@ -2650,18 +2226,15 @@ static int tipc_link_stats(const char *name, char *buf, const u32 buf_size) if (!strcmp(name, tipc_bclink_name)) return tipc_bclink_stats(buf, buf_size); - read_lock_bh(&tipc_net_lock); node = tipc_link_find_owner(name, &bearer_id); - if (!node) { - read_unlock_bh(&tipc_net_lock); + if (!node) return 0; - } + tipc_node_lock(node); l = node->links[bearer_id]; if (!l) { tipc_node_unlock(node); - read_unlock_bh(&tipc_net_lock); return 0; } @@ -2727,7 +2300,6 @@ static int tipc_link_stats(const char *name, char *buf, const u32 buf_size) (s->accu_queue_sz / s->queue_sz_counts) : 0); tipc_node_unlock(node); - read_unlock_bh(&tipc_net_lock); return ret; } @@ -2778,7 +2350,6 @@ u32 tipc_link_get_max_pkt(u32 dest, u32 selector) if (dest == tipc_own_addr) return MAX_MSG_SIZE; - read_lock_bh(&tipc_net_lock); n_ptr = tipc_node_find(dest); if (n_ptr) { tipc_node_lock(n_ptr); @@ -2787,13 +2358,18 @@ u32 tipc_link_get_max_pkt(u32 dest, u32 selector) res = l_ptr->max_pkt; tipc_node_unlock(n_ptr); } - read_unlock_bh(&tipc_net_lock); return res; } static void link_print(struct tipc_link *l_ptr, const char *str) { - pr_info("%s Link %x<%s>:", str, l_ptr->addr, l_ptr->b_ptr->name); + struct tipc_bearer *b_ptr; + + rcu_read_lock(); + b_ptr = rcu_dereference_rtnl(bearer_list[l_ptr->bearer_id]); + if (b_ptr) + pr_info("%s Link %x<%s>:", str, l_ptr->addr, b_ptr->name); + rcu_read_unlock(); if (link_working_unknown(l_ptr)) pr_cont(":WU\n"); diff --git a/net/tipc/link.h b/net/tipc/link.h index 8c0b49b5b2ee..782983ccd323 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -40,11 +40,6 @@ #include "msg.h" #include "node.h" -/* Link reassembly status codes - */ -#define LINK_REASM_ERROR -1 -#define LINK_REASM_COMPLETE 1 - /* Out-of-range value for link sequence numbers */ #define INVALID_LINK_SEQ 0x10000 @@ -107,7 +102,7 @@ struct tipc_stats { * @checkpoint: reference point for triggering link continuity checking * @peer_session: link session # being used by peer end of link * @peer_bearer_id: bearer id used by link's peer endpoint - * @b_ptr: pointer to bearer used by link + * @bearer_id: local bearer id used by link * @tolerance: minimum link continuity loss needed to reset link [in ms] * @continuity_interval: link continuity testing interval [in ms] * @abort_limit: # of unacknowledged continuity probes needed to reset link @@ -116,6 +111,7 @@ struct tipc_stats { * @proto_msg: template for control messages generated by link * @pmsg: convenience pointer to "proto_msg" field * @priority: current link priority + * @net_plane: current link network plane ('A' through 'H') * @queue_limit: outbound message queue congestion thresholds (indexed by user) * @exp_msg_count: # of tunnelled messages expected during link changeover * @reset_checkpoint: seq # of last acknowledged message at time of link reset @@ -139,8 +135,7 @@ struct tipc_stats { * @next_out: ptr to first unsent outbound message in queue * @waiting_ports: linked list of ports waiting for link congestion to abate * @long_msg_seq_no: next identifier to use for outbound fragmented messages - * @reasm_head: list head of partially reassembled inbound message fragments - * @reasm_tail: last fragment received + * @reasm_buf: head of partially reassembled inbound message fragments * @stats: collects statistics regarding link activity */ struct tipc_link { @@ -155,7 +150,7 @@ struct tipc_link { u32 checkpoint; u32 peer_session; u32 peer_bearer_id; - struct tipc_bearer *b_ptr; + u32 bearer_id; u32 tolerance; u32 continuity_interval; u32 abort_limit; @@ -167,6 +162,7 @@ struct tipc_link { } proto_msg; struct tipc_msg *pmsg; u32 priority; + char net_plane; u32 queue_limit[15]; /* queue_limit[0]==window limit */ /* Changeover */ @@ -202,8 +198,7 @@ struct tipc_link { /* Fragmentation/reassembly */ u32 long_msg_seq_no; - struct sk_buff *reasm_head; - struct sk_buff *reasm_tail; + struct sk_buff *reasm_buf; /* Statistics */ struct tipc_stats stats; @@ -228,20 +223,13 @@ struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, int req_tlv_space); struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_space); +void tipc_link_reset_all(struct tipc_node *node); void tipc_link_reset(struct tipc_link *l_ptr); void tipc_link_reset_list(unsigned int bearer_id); int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector); -void tipc_link_names_xmit(struct list_head *message_list, u32 dest); -int __tipc_link_xmit(struct tipc_link *l_ptr, struct sk_buff *buf); -int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf); +int __tipc_link_xmit(struct tipc_link *link, struct sk_buff *buf); u32 tipc_link_get_max_pkt(u32 dest, u32 selector); -int tipc_link_iovec_xmit_fast(struct tipc_port *sender, - struct iovec const *msg_sect, - unsigned int len, u32 destnode); void tipc_link_bundle_rcv(struct sk_buff *buf); -int tipc_link_frag_rcv(struct sk_buff **reasm_head, - struct sk_buff **reasm_tail, - struct sk_buff **fbuf); void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob, u32 gap, u32 tolerance, u32 priority, u32 acked_mtu); void tipc_link_push_queue(struct tipc_link *l_ptr); diff --git a/net/tipc/msg.c b/net/tipc/msg.c index e525f8ce1dee..9680be6d388a 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -1,7 +1,7 @@ /* * net/tipc/msg.c: TIPC message header routines * - * Copyright (c) 2000-2006, Ericsson AB + * Copyright (c) 2000-2006, 2014, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -36,21 +36,16 @@ #include "core.h" #include "msg.h" +#include "addr.h" +#include "name_table.h" -u32 tipc_msg_tot_importance(struct tipc_msg *m) +#define MAX_FORWARD_SIZE 1024 + +static unsigned int align(unsigned int i) { - if (likely(msg_isdata(m))) { - if (likely(msg_orignode(m) == tipc_own_addr)) - return msg_importance(m); - return msg_importance(m) + 4; - } - if ((msg_user(m) == MSG_FRAGMENTER) && - (msg_type(m) == FIRST_FRAGMENT)) - return msg_importance(msg_get_wrapped(m)); - return msg_importance(m); + return (i + 3) & ~3u; } - void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, u32 destnode) { @@ -65,37 +60,373 @@ void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, msg_set_destnode(m, destnode); } -/** - * tipc_msg_build - create message using specified header and data - * - * Note: Caller must not hold any locks in case copy_from_user() is interrupted! - * - * Returns message data size or errno +/* tipc_buf_append(): Append a buffer to the fragment list of another buffer + * @*headbuf: in: NULL for first frag, otherwise value returned from prev call + * out: set when successful non-complete reassembly, otherwise NULL + * @*buf: in: the buffer to append. Always defined + * out: head buf after sucessful complete reassembly, otherwise NULL + * Returns 1 when reassembly complete, otherwise 0 */ -int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, - unsigned int len, int max_size, struct sk_buff **buf) +int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) { - int dsz, sz, hsz; - unsigned char *to; - - dsz = len; - hsz = msg_hdr_sz(hdr); - sz = hsz + dsz; - msg_set_size(hdr, sz); - if (unlikely(sz > max_size)) { + struct sk_buff *head = *headbuf; + struct sk_buff *frag = *buf; + struct sk_buff *tail; + struct tipc_msg *msg; + u32 fragid; + int delta; + bool headstolen; + + if (!frag) + goto err; + + msg = buf_msg(frag); + fragid = msg_type(msg); + frag->next = NULL; + skb_pull(frag, msg_hdr_sz(msg)); + + if (fragid == FIRST_FRAGMENT) { + if (unlikely(head)) + goto err; + if (unlikely(skb_unclone(frag, GFP_ATOMIC))) + goto err; + head = *headbuf = frag; + skb_frag_list_init(head); + TIPC_SKB_CB(head)->tail = NULL; *buf = NULL; - return dsz; + return 0; } - *buf = tipc_buf_acquire(sz); - if (!(*buf)) - return -ENOMEM; - skb_copy_to_linear_data(*buf, hdr, hsz); - to = (*buf)->data + hsz; - if (len && memcpy_fromiovecend(to, msg_sect, 0, dsz)) { - kfree_skb(*buf); - *buf = NULL; - return -EFAULT; + if (!head) + goto err; + + if (skb_try_coalesce(head, frag, &headstolen, &delta)) { + kfree_skb_partial(frag, headstolen); + } else { + tail = TIPC_SKB_CB(head)->tail; + if (!skb_has_frag_list(head)) + skb_shinfo(head)->frag_list = frag; + else + tail->next = frag; + head->truesize += frag->truesize; + head->data_len += frag->len; + head->len += frag->len; + TIPC_SKB_CB(head)->tail = frag; + } + + if (fragid == LAST_FRAGMENT) { + *buf = head; + TIPC_SKB_CB(head)->tail = NULL; + *headbuf = NULL; + return 1; + } + *buf = NULL; + return 0; + +err: + pr_warn_ratelimited("Unable to build fragment list\n"); + kfree_skb(*buf); + kfree_skb(*headbuf); + *buf = *headbuf = NULL; + return 0; +} + + +/** + * tipc_msg_build - create buffer chain containing specified header and data + * @mhdr: Message header, to be prepended to data + * @iov: User data + * @offset: Posision in iov to start copying from + * @dsz: Total length of user data + * @pktmax: Max packet size that can be used + * @chain: Buffer or chain of buffers to be returned to caller + * Returns message data size or errno: -ENOMEM, -EFAULT + */ +int tipc_msg_build(struct tipc_msg *mhdr, struct iovec const *iov, + int offset, int dsz, int pktmax , struct sk_buff **chain) +{ + int mhsz = msg_hdr_sz(mhdr); + int msz = mhsz + dsz; + int pktno = 1; + int pktsz; + int pktrem = pktmax; + int drem = dsz; + struct tipc_msg pkthdr; + struct sk_buff *buf, *prev; + char *pktpos; + int rc; + + msg_set_size(mhdr, msz); + + /* No fragmentation needed? */ + if (likely(msz <= pktmax)) { + buf = tipc_buf_acquire(msz); + *chain = buf; + if (unlikely(!buf)) + return -ENOMEM; + skb_copy_to_linear_data(buf, mhdr, mhsz); + pktpos = buf->data + mhsz; + if (!dsz || !memcpy_fromiovecend(pktpos, iov, offset, dsz)) + return dsz; + rc = -EFAULT; + goto error; } + + /* Prepare reusable fragment header */ + tipc_msg_init(&pkthdr, MSG_FRAGMENTER, FIRST_FRAGMENT, + INT_H_SIZE, msg_destnode(mhdr)); + msg_set_size(&pkthdr, pktmax); + msg_set_fragm_no(&pkthdr, pktno); + + /* Prepare first fragment */ + *chain = buf = tipc_buf_acquire(pktmax); + if (!buf) + return -ENOMEM; + pktpos = buf->data; + skb_copy_to_linear_data(buf, &pkthdr, INT_H_SIZE); + pktpos += INT_H_SIZE; + pktrem -= INT_H_SIZE; + skb_copy_to_linear_data_offset(buf, INT_H_SIZE, mhdr, mhsz); + pktpos += mhsz; + pktrem -= mhsz; + + do { + if (drem < pktrem) + pktrem = drem; + + if (memcpy_fromiovecend(pktpos, iov, offset, pktrem)) { + rc = -EFAULT; + goto error; + } + drem -= pktrem; + offset += pktrem; + + if (!drem) + break; + + /* Prepare new fragment: */ + if (drem < (pktmax - INT_H_SIZE)) + pktsz = drem + INT_H_SIZE; + else + pktsz = pktmax; + prev = buf; + buf = tipc_buf_acquire(pktsz); + if (!buf) { + rc = -ENOMEM; + goto error; + } + prev->next = buf; + msg_set_type(&pkthdr, FRAGMENT); + msg_set_size(&pkthdr, pktsz); + msg_set_fragm_no(&pkthdr, ++pktno); + skb_copy_to_linear_data(buf, &pkthdr, INT_H_SIZE); + pktpos = buf->data + INT_H_SIZE; + pktrem = pktsz - INT_H_SIZE; + + } while (1); + + msg_set_type(buf_msg(buf), LAST_FRAGMENT); return dsz; +error: + kfree_skb_list(*chain); + *chain = NULL; + return rc; +} + +/** + * tipc_msg_bundle(): Append contents of a buffer to tail of an existing one + * @bbuf: the existing buffer ("bundle") + * @buf: buffer to be appended + * @mtu: max allowable size for the bundle buffer + * Consumes buffer if successful + * Returns true if bundling could be performed, otherwise false + */ +bool tipc_msg_bundle(struct sk_buff *bbuf, struct sk_buff *buf, u32 mtu) +{ + struct tipc_msg *bmsg = buf_msg(bbuf); + struct tipc_msg *msg = buf_msg(buf); + unsigned int bsz = msg_size(bmsg); + unsigned int msz = msg_size(msg); + u32 start = align(bsz); + u32 max = mtu - INT_H_SIZE; + u32 pad = start - bsz; + + if (likely(msg_user(msg) == MSG_FRAGMENTER)) + return false; + if (unlikely(msg_user(msg) == CHANGEOVER_PROTOCOL)) + return false; + if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) + return false; + if (likely(msg_user(bmsg) != MSG_BUNDLER)) + return false; + if (likely(msg_type(bmsg) != BUNDLE_OPEN)) + return false; + if (unlikely(skb_tailroom(bbuf) < (pad + msz))) + return false; + if (unlikely(max < (start + msz))) + return false; + + skb_put(bbuf, pad + msz); + skb_copy_to_linear_data_offset(bbuf, start, buf->data, msz); + msg_set_size(bmsg, start + msz); + msg_set_msgcnt(bmsg, msg_msgcnt(bmsg) + 1); + bbuf->next = buf->next; + kfree_skb(buf); + return true; +} + +/** + * tipc_msg_make_bundle(): Create bundle buf and append message to its tail + * @buf: buffer to be appended and replaced + * @mtu: max allowable size for the bundle buffer, inclusive header + * @dnode: destination node for message. (Not always present in header) + * Replaces buffer if successful + * Returns true if sucess, otherwise false + */ +bool tipc_msg_make_bundle(struct sk_buff **buf, u32 mtu, u32 dnode) +{ + struct sk_buff *bbuf; + struct tipc_msg *bmsg; + struct tipc_msg *msg = buf_msg(*buf); + u32 msz = msg_size(msg); + u32 max = mtu - INT_H_SIZE; + + if (msg_user(msg) == MSG_FRAGMENTER) + return false; + if (msg_user(msg) == CHANGEOVER_PROTOCOL) + return false; + if (msg_user(msg) == BCAST_PROTOCOL) + return false; + if (msz > (max / 2)) + return false; + + bbuf = tipc_buf_acquire(max); + if (!bbuf) + return false; + + skb_trim(bbuf, INT_H_SIZE); + bmsg = buf_msg(bbuf); + tipc_msg_init(bmsg, MSG_BUNDLER, BUNDLE_OPEN, INT_H_SIZE, dnode); + msg_set_seqno(bmsg, msg_seqno(msg)); + msg_set_ack(bmsg, msg_ack(msg)); + msg_set_bcast_ack(bmsg, msg_bcast_ack(msg)); + bbuf->next = (*buf)->next; + tipc_msg_bundle(bbuf, *buf, mtu); + *buf = bbuf; + return true; +} + +/** + * tipc_msg_reverse(): swap source and destination addresses and add error code + * @buf: buffer containing message to be reversed + * @dnode: return value: node where to send message after reversal + * @err: error code to be set in message + * Consumes buffer if failure + * Returns true if success, otherwise false + */ +bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err) +{ + struct tipc_msg *msg = buf_msg(buf); + uint imp = msg_importance(msg); + struct tipc_msg ohdr; + uint rdsz = min_t(uint, msg_data_sz(msg), MAX_FORWARD_SIZE); + + if (skb_linearize(buf)) + goto exit; + if (msg_dest_droppable(msg)) + goto exit; + if (msg_errcode(msg)) + goto exit; + + memcpy(&ohdr, msg, msg_hdr_sz(msg)); + imp = min_t(uint, imp + 1, TIPC_CRITICAL_IMPORTANCE); + if (msg_isdata(msg)) + msg_set_importance(msg, imp); + msg_set_errcode(msg, err); + msg_set_origport(msg, msg_destport(&ohdr)); + msg_set_destport(msg, msg_origport(&ohdr)); + msg_set_prevnode(msg, tipc_own_addr); + if (!msg_short(msg)) { + msg_set_orignode(msg, msg_destnode(&ohdr)); + msg_set_destnode(msg, msg_orignode(&ohdr)); + } + msg_set_size(msg, msg_hdr_sz(msg) + rdsz); + skb_trim(buf, msg_size(msg)); + skb_orphan(buf); + *dnode = msg_orignode(&ohdr); + return true; +exit: + kfree_skb(buf); + return false; +} + +/** + * tipc_msg_eval: determine fate of message that found no destination + * @buf: the buffer containing the message. + * @dnode: return value: next-hop node, if message to be forwarded + * @err: error code to use, if message to be rejected + * + * Does not consume buffer + * Returns 0 (TIPC_OK) if message ok and we can try again, -TIPC error + * code if message to be rejected + */ +int tipc_msg_eval(struct sk_buff *buf, u32 *dnode) +{ + struct tipc_msg *msg = buf_msg(buf); + u32 dport; + + if (msg_type(msg) != TIPC_NAMED_MSG) + return -TIPC_ERR_NO_PORT; + if (skb_linearize(buf)) + return -TIPC_ERR_NO_NAME; + if (msg_data_sz(msg) > MAX_FORWARD_SIZE) + return -TIPC_ERR_NO_NAME; + if (msg_reroute_cnt(msg) > 0) + return -TIPC_ERR_NO_NAME; + + *dnode = addr_domain(msg_lookup_scope(msg)); + dport = tipc_nametbl_translate(msg_nametype(msg), + msg_nameinst(msg), + dnode); + if (!dport) + return -TIPC_ERR_NO_NAME; + msg_incr_reroute_cnt(msg); + msg_set_destnode(msg, *dnode); + msg_set_destport(msg, dport); + return TIPC_OK; +} + +/* tipc_msg_reassemble() - clone a buffer chain of fragments and + * reassemble the clones into one message + */ +struct sk_buff *tipc_msg_reassemble(struct sk_buff *chain) +{ + struct sk_buff *buf = chain; + struct sk_buff *frag = buf; + struct sk_buff *head = NULL; + int hdr_sz; + + /* Copy header if single buffer */ + if (!buf->next) { + hdr_sz = skb_headroom(buf) + msg_hdr_sz(buf_msg(buf)); + return __pskb_copy(buf, hdr_sz, GFP_ATOMIC); + } + + /* Clone all fragments and reassemble */ + while (buf) { + frag = skb_clone(buf, GFP_ATOMIC); + if (!frag) + goto error; + frag->next = NULL; + if (tipc_buf_append(&head, &frag)) + break; + if (!head) + goto error; + buf = buf->next; + } + return frag; +error: + pr_warn("Failed do clone local mcast rcv buffer\n"); + kfree_skb(head); + return NULL; } diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 76d1269b9443..462fa194a6af 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -1,7 +1,7 @@ /* * net/tipc/msg.h: Include file for TIPC message header routines * - * Copyright (c) 2000-2007, Ericsson AB + * Copyright (c) 2000-2007, 2014, Ericsson AB * Copyright (c) 2005-2008, 2010-2011, Wind River Systems * All rights reserved. * @@ -463,6 +463,11 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) #define FRAGMENT 1 #define LAST_FRAGMENT 2 +/* Bundling protocol message types + */ +#define BUNDLE_OPEN 0 +#define BUNDLE_CLOSED 1 + /* * Link management protocol message types */ @@ -706,9 +711,36 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n) msg_set_bits(m, 9, 0, 0xffff, n); } -u32 tipc_msg_tot_importance(struct tipc_msg *m); +static inline u32 tipc_msg_tot_importance(struct tipc_msg *m) +{ + if ((msg_user(m) == MSG_FRAGMENTER) && (msg_type(m) == FIRST_FRAGMENT)) + return msg_importance(msg_get_wrapped(m)); + return msg_importance(m); +} + +static inline u32 msg_tot_origport(struct tipc_msg *m) +{ + if ((msg_user(m) == MSG_FRAGMENTER) && (msg_type(m) == FIRST_FRAGMENT)) + return msg_origport(msg_get_wrapped(m)); + return msg_origport(m); +} + +bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err); + +int tipc_msg_eval(struct sk_buff *buf, u32 *dnode); + void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, u32 destnode); -int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, - unsigned int len, int max_size, struct sk_buff **buf); + +int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf); + +bool tipc_msg_bundle(struct sk_buff *bbuf, struct sk_buff *buf, u32 mtu); + +bool tipc_msg_make_bundle(struct sk_buff **buf, u32 mtu, u32 dnode); + +int tipc_msg_build(struct tipc_msg *mhdr, struct iovec const *iov, + int offset, int dsz, int mtu , struct sk_buff **chain); + +struct sk_buff *tipc_msg_reassemble(struct sk_buff *chain); + #endif diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index aff8041dc157..dcc15bcd5692 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -38,34 +38,6 @@ #include "link.h" #include "name_distr.h" -#define ITEM_SIZE sizeof(struct distr_item) - -/** - * struct distr_item - publication info distributed to other nodes - * @type: name sequence type - * @lower: name sequence lower bound - * @upper: name sequence upper bound - * @ref: publishing port reference - * @key: publication key - * - * ===> All fields are stored in network byte order. <=== - * - * First 3 fields identify (name or) name sequence being published. - * Reference field uniquely identifies port that published name sequence. - * Key field uniquely identifies publication, in the event a port has - * multiple publications of the same name sequence. - * - * Note: There is no field that identifies the publishing node because it is - * the same for all items contained within a publication message. - */ -struct distr_item { - __be32 type; - __be32 lower; - __be32 upper; - __be32 ref; - __be32 key; -}; - /** * struct publ_list - list of publications made by this node * @list: circular list of publications @@ -127,26 +99,24 @@ static struct sk_buff *named_prepare_buf(u32 type, u32 size, u32 dest) return buf; } -static void named_cluster_distribute(struct sk_buff *buf) +void named_cluster_distribute(struct sk_buff *buf) { - struct sk_buff *buf_copy; - struct tipc_node *n_ptr; - struct tipc_link *l_ptr; + struct sk_buff *obuf; + struct tipc_node *node; + u32 dnode; rcu_read_lock(); - list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) { - spin_lock_bh(&n_ptr->lock); - l_ptr = n_ptr->active_links[n_ptr->addr & 1]; - if (l_ptr) { - buf_copy = skb_copy(buf, GFP_ATOMIC); - if (!buf_copy) { - spin_unlock_bh(&n_ptr->lock); - break; - } - msg_set_destnode(buf_msg(buf_copy), n_ptr->addr); - __tipc_link_xmit(l_ptr, buf_copy); - } - spin_unlock_bh(&n_ptr->lock); + list_for_each_entry_rcu(node, &tipc_node_list, list) { + dnode = node->addr; + if (in_own_node(dnode)) + continue; + if (!tipc_node_active_links(node)) + continue; + obuf = skb_copy(buf, GFP_ATOMIC); + if (!obuf) + break; + msg_set_destnode(buf_msg(obuf), dnode); + tipc_link_xmit(obuf, dnode, dnode); } rcu_read_unlock(); @@ -156,7 +126,7 @@ static void named_cluster_distribute(struct sk_buff *buf) /** * tipc_named_publish - tell other nodes about a new publication by this node */ -void tipc_named_publish(struct publication *publ) +struct sk_buff *tipc_named_publish(struct publication *publ) { struct sk_buff *buf; struct distr_item *item; @@ -165,23 +135,23 @@ void tipc_named_publish(struct publication *publ) publ_lists[publ->scope]->size++; if (publ->scope == TIPC_NODE_SCOPE) - return; + return NULL; buf = named_prepare_buf(PUBLICATION, ITEM_SIZE, 0); if (!buf) { pr_warn("Publication distribution failure\n"); - return; + return NULL; } item = (struct distr_item *)msg_data(buf_msg(buf)); publ_to_item(item, publ); - named_cluster_distribute(buf); + return buf; } /** * tipc_named_withdraw - tell other nodes about a withdrawn publication by this node */ -void tipc_named_withdraw(struct publication *publ) +struct sk_buff *tipc_named_withdraw(struct publication *publ) { struct sk_buff *buf; struct distr_item *item; @@ -190,47 +160,57 @@ void tipc_named_withdraw(struct publication *publ) publ_lists[publ->scope]->size--; if (publ->scope == TIPC_NODE_SCOPE) - return; + return NULL; buf = named_prepare_buf(WITHDRAWAL, ITEM_SIZE, 0); if (!buf) { pr_warn("Withdrawal distribution failure\n"); - return; + return NULL; } item = (struct distr_item *)msg_data(buf_msg(buf)); publ_to_item(item, publ); - named_cluster_distribute(buf); + return buf; } -/* +/** * named_distribute - prepare name info for bulk distribution to another node + * @msg_list: list of messages (buffers) to be returned from this function + * @dnode: node to be updated + * @pls: linked list of publication items to be packed into buffer chain */ -static void named_distribute(struct list_head *message_list, u32 node, - struct publ_list *pls, u32 max_item_buf) +static void named_distribute(struct list_head *msg_list, u32 dnode, + struct publ_list *pls) { struct publication *publ; struct sk_buff *buf = NULL; struct distr_item *item = NULL; - u32 left = 0; - u32 rest = pls->size * ITEM_SIZE; + uint dsz = pls->size * ITEM_SIZE; + uint msg_dsz = (tipc_node_get_mtu(dnode, 0) / ITEM_SIZE) * ITEM_SIZE; + uint rem = dsz; + uint msg_rem = 0; list_for_each_entry(publ, &pls->list, local_list) { + /* Prepare next buffer: */ if (!buf) { - left = (rest <= max_item_buf) ? rest : max_item_buf; - rest -= left; - buf = named_prepare_buf(PUBLICATION, left, node); + msg_rem = min_t(uint, rem, msg_dsz); + rem -= msg_rem; + buf = named_prepare_buf(PUBLICATION, msg_rem, dnode); if (!buf) { pr_warn("Bulk publication failure\n"); return; } item = (struct distr_item *)msg_data(buf_msg(buf)); } + + /* Pack publication into message: */ publ_to_item(item, publ); item++; - left -= ITEM_SIZE; - if (!left) { - list_add_tail((struct list_head *)buf, message_list); + msg_rem -= ITEM_SIZE; + + /* Append full buffer to list: */ + if (!msg_rem) { + list_add_tail((struct list_head *)buf, msg_list); buf = NULL; } } @@ -239,38 +219,20 @@ static void named_distribute(struct list_head *message_list, u32 node, /** * tipc_named_node_up - tell specified node about all publications by this node */ -void tipc_named_node_up(unsigned long nodearg) +void tipc_named_node_up(u32 dnode) { - struct tipc_node *n_ptr; - struct tipc_link *l_ptr; - struct list_head message_list; - u32 node = (u32)nodearg; - u32 max_item_buf = 0; - - /* compute maximum amount of publication data to send per message */ - read_lock_bh(&tipc_net_lock); - n_ptr = tipc_node_find(node); - if (n_ptr) { - tipc_node_lock(n_ptr); - l_ptr = n_ptr->active_links[0]; - if (l_ptr) - max_item_buf = ((l_ptr->max_pkt - INT_H_SIZE) / - ITEM_SIZE) * ITEM_SIZE; - tipc_node_unlock(n_ptr); - } - read_unlock_bh(&tipc_net_lock); - if (!max_item_buf) - return; - - /* create list of publication messages, then send them as a unit */ - INIT_LIST_HEAD(&message_list); + LIST_HEAD(msg_list); + struct sk_buff *buf_chain; read_lock_bh(&tipc_nametbl_lock); - named_distribute(&message_list, node, &publ_cluster, max_item_buf); - named_distribute(&message_list, node, &publ_zone, max_item_buf); + named_distribute(&msg_list, dnode, &publ_cluster); + named_distribute(&msg_list, dnode, &publ_zone); read_unlock_bh(&tipc_nametbl_lock); - tipc_link_names_xmit(&message_list, node); + /* Convert circular list to linear list and send: */ + buf_chain = (struct sk_buff *)msg_list.next; + ((struct sk_buff *)msg_list.prev)->next = NULL; + tipc_link_xmit(buf_chain, dnode, dnode); } /** diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h index 9b312ccfd43e..8afe32b7fc9a 100644 --- a/net/tipc/name_distr.h +++ b/net/tipc/name_distr.h @@ -39,9 +39,38 @@ #include "name_table.h" -void tipc_named_publish(struct publication *publ); -void tipc_named_withdraw(struct publication *publ); -void tipc_named_node_up(unsigned long node); +#define ITEM_SIZE sizeof(struct distr_item) + +/** + * struct distr_item - publication info distributed to other nodes + * @type: name sequence type + * @lower: name sequence lower bound + * @upper: name sequence upper bound + * @ref: publishing port reference + * @key: publication key + * + * ===> All fields are stored in network byte order. <=== + * + * First 3 fields identify (name or) name sequence being published. + * Reference field uniquely identifies port that published name sequence. + * Key field uniquely identifies publication, in the event a port has + * multiple publications of the same name sequence. + * + * Note: There is no field that identifies the publishing node because it is + * the same for all items contained within a publication message. + */ +struct distr_item { + __be32 type; + __be32 lower; + __be32 upper; + __be32 ref; + __be32 key; +}; + +struct sk_buff *tipc_named_publish(struct publication *publ); +struct sk_buff *tipc_named_withdraw(struct publication *publ); +void named_cluster_distribute(struct sk_buff *buf); +void tipc_named_node_up(u32 dnode); void tipc_named_rcv(struct sk_buff *buf); void tipc_named_reinit(void); diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 042e8e3cabc0..9d7d37d95187 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -664,6 +664,7 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, u32 scope, u32 port_ref, u32 key) { struct publication *publ; + struct sk_buff *buf = NULL; if (table.local_publ_count >= TIPC_MAX_PUBLICATIONS) { pr_warn("Publication failed, local publication limit reached (%u)\n", @@ -676,9 +677,12 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, tipc_own_addr, port_ref, key); if (likely(publ)) { table.local_publ_count++; - tipc_named_publish(publ); + buf = tipc_named_publish(publ); } write_unlock_bh(&tipc_nametbl_lock); + + if (buf) + named_cluster_distribute(buf); return publ; } @@ -688,15 +692,19 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key) { struct publication *publ; + struct sk_buff *buf; write_lock_bh(&tipc_nametbl_lock); publ = tipc_nametbl_remove_publ(type, lower, tipc_own_addr, ref, key); if (likely(publ)) { table.local_publ_count--; - tipc_named_withdraw(publ); + buf = tipc_named_withdraw(publ); write_unlock_bh(&tipc_nametbl_lock); list_del_init(&publ->pport_list); kfree(publ); + + if (buf) + named_cluster_distribute(buf); return 1; } write_unlock_bh(&tipc_nametbl_lock); @@ -961,6 +969,7 @@ static void tipc_purge_publications(struct name_seq *seq) list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) { tipc_nametbl_remove_publ(publ->type, publ->lower, publ->node, publ->ref, publ->key); + kfree(publ); } } @@ -982,7 +991,6 @@ void tipc_nametbl_stop(void) hlist_for_each_entry_safe(seq, safe, seq_head, ns_list) { tipc_purge_publications(seq); } - continue; } kfree(table.types); table.types = NULL; diff --git a/net/tipc/net.c b/net/tipc/net.c index 4c564eb69e1a..7fcc94998fea 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -1,7 +1,7 @@ /* * net/tipc/net.c: TIPC network routing code * - * Copyright (c) 1995-2006, Ericsson AB + * Copyright (c) 1995-2006, 2014, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -39,45 +39,41 @@ #include "name_distr.h" #include "subscr.h" #include "port.h" +#include "socket.h" #include "node.h" #include "config.h" /* * The TIPC locking policy is designed to ensure a very fine locking * granularity, permitting complete parallel access to individual - * port and node/link instances. The code consists of three major + * port and node/link instances. The code consists of four major * locking domains, each protected with their own disjunct set of locks. * - * 1: The routing hierarchy. - * Comprises the structures 'zone', 'cluster', 'node', 'link' - * and 'bearer'. The whole hierarchy is protected by a big - * read/write lock, tipc_net_lock, to enssure that nothing is added - * or removed while code is accessing any of these structures. - * This layer must not be called from the two others while they - * hold any of their own locks. - * Neither must it itself do any upcalls to the other two before - * it has released tipc_net_lock and other protective locks. + * 1: The bearer level. + * RTNL lock is used to serialize the process of configuring bearer + * on update side, and RCU lock is applied on read side to make + * bearer instance valid on both paths of message transmission and + * reception. * - * Within the tipc_net_lock domain there are two sub-domains;'node' and - * 'bearer', where local write operations are permitted, - * provided that those are protected by individual spin_locks - * per instance. Code holding tipc_net_lock(read) and a node spin_lock - * is permitted to poke around in both the node itself and its - * subordinate links. I.e, it can update link counters and queues, - * change link state, send protocol messages, and alter the - * "active_links" array in the node; but it can _not_ remove a link - * or a node from the overall structure. - * Correspondingly, individual bearers may change status within a - * tipc_net_lock(read), protected by an individual spin_lock ber bearer - * instance, but it needs tipc_net_lock(write) to remove/add any bearers. + * 2: The node and link level. + * All node instances are saved into two tipc_node_list and node_htable + * lists. The two lists are protected by node_list_lock on write side, + * and they are guarded with RCU lock on read side. Especially node + * instance is destroyed only when TIPC module is removed, and we can + * confirm that there has no any user who is accessing the node at the + * moment. Therefore, Except for iterating the two lists within RCU + * protection, it's no needed to hold RCU that we access node instance + * in other places. * + * In addition, all members in node structure including link instances + * are protected by node spin lock. * - * 2: The transport level of the protocol. - * This consists of the structures port, (and its user level - * representations, such as user_port and tipc_sock), reference and - * tipc_user (port.c, reg.c, socket.c). + * 3: The transport level of the protocol. + * This consists of the structures port, (and its user level + * representations, such as user_port and tipc_sock), reference and + * tipc_user (port.c, reg.c, socket.c). * - * This layer has four different locks: + * This layer has four different locks: * - The tipc_port spin_lock. This is protecting each port instance * from parallel data access and removal. Since we can not place * this lock in the port itself, it has been placed in the @@ -96,7 +92,7 @@ * There are two such lists; 'port_list', which is used for management, * and 'wait_list', which is used to queue ports during congestion. * - * 3: The name table (name_table.c, name_distr.c, subscription.c) + * 4: The name table (name_table.c, name_distr.c, subscription.c) * - There is one big read/write-lock (tipc_nametbl_lock) protecting the * overall name table structure. Nothing must be added/removed to * this structure without holding write access to it. @@ -108,85 +104,25 @@ * - A local spin_lock protecting the queue of subscriber events. */ -DEFINE_RWLOCK(tipc_net_lock); - -static void net_route_named_msg(struct sk_buff *buf) -{ - struct tipc_msg *msg = buf_msg(buf); - u32 dnode; - u32 dport; - - if (!msg_named(msg)) { - kfree_skb(buf); - return; - } - - dnode = addr_domain(msg_lookup_scope(msg)); - dport = tipc_nametbl_translate(msg_nametype(msg), msg_nameinst(msg), &dnode); - if (dport) { - msg_set_destnode(msg, dnode); - msg_set_destport(msg, dport); - tipc_net_route_msg(buf); - return; - } - tipc_reject_msg(buf, TIPC_ERR_NO_NAME); -} - -void tipc_net_route_msg(struct sk_buff *buf) -{ - struct tipc_msg *msg; - u32 dnode; - - if (!buf) - return; - msg = buf_msg(buf); - - /* Handle message for this node */ - dnode = msg_short(msg) ? tipc_own_addr : msg_destnode(msg); - if (tipc_in_scope(dnode, tipc_own_addr)) { - if (msg_isdata(msg)) { - if (msg_mcast(msg)) - tipc_port_mcast_rcv(buf, NULL); - else if (msg_destport(msg)) - tipc_port_rcv(buf); - else - net_route_named_msg(buf); - return; - } - switch (msg_user(msg)) { - case NAME_DISTRIBUTOR: - tipc_named_rcv(buf); - break; - case CONN_MANAGER: - tipc_port_proto_rcv(buf); - break; - default: - kfree_skb(buf); - } - return; - } - - /* Handle message for another node */ - skb_trim(buf, msg_size(msg)); - tipc_link_xmit(buf, dnode, msg_link_selector(msg)); -} - -void tipc_net_start(u32 addr) +int tipc_net_start(u32 addr) { char addr_string[16]; + int res; - write_lock_bh(&tipc_net_lock); tipc_own_addr = addr; tipc_named_reinit(); tipc_port_reinit(); - tipc_bclink_init(); - write_unlock_bh(&tipc_net_lock); + res = tipc_bclink_init(); + if (res) + return res; tipc_nametbl_publish(TIPC_CFG_SRV, tipc_own_addr, tipc_own_addr, TIPC_ZONE_SCOPE, 0, tipc_own_addr); + pr_info("Started in network mode\n"); pr_info("Own node address %s, network identity %u\n", tipc_addr_string_fill(addr_string, tipc_own_addr), tipc_net_id); + return 0; } void tipc_net_stop(void) @@ -195,11 +131,11 @@ void tipc_net_stop(void) return; tipc_nametbl_withdraw(TIPC_CFG_SRV, tipc_own_addr, 0, tipc_own_addr); - write_lock_bh(&tipc_net_lock); + rtnl_lock(); tipc_bearer_stop(); tipc_bclink_stop(); tipc_node_stop(); - write_unlock_bh(&tipc_net_lock); + rtnl_unlock(); pr_info("Left network mode\n"); } diff --git a/net/tipc/net.h b/net/tipc/net.h index 079daadb3f72..59ef3388be2c 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -37,11 +37,7 @@ #ifndef _TIPC_NET_H #define _TIPC_NET_H -extern rwlock_t tipc_net_lock; - -void tipc_net_route_msg(struct sk_buff *buf); - -void tipc_net_start(u32 addr); +int tipc_net_start(u32 addr); void tipc_net_stop(void); #endif diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 3aaf73de9e2d..ad844d365340 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -47,7 +47,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info) int hdr_space = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN); u16 cmd; - if ((req_userhdr->cmd & 0xC000) && (!capable(CAP_NET_ADMIN))) + if ((req_userhdr->cmd & 0xC000) && (!netlink_capable(skb, CAP_NET_ADMIN))) cmd = TIPC_CMD_NOT_NET_ADMIN; else cmd = req_userhdr->cmd; diff --git a/net/tipc/node.c b/net/tipc/node.c index 1d3a4999a70f..f7069299943f 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1,7 +1,7 @@ /* * net/tipc/node.c: TIPC node management routines * - * Copyright (c) 2000-2006, 2012 Ericsson AB + * Copyright (c) 2000-2006, 2012-2014, Ericsson AB * Copyright (c) 2005-2006, 2010-2014, Wind River Systems * All rights reserved. * @@ -108,7 +108,7 @@ struct tipc_node *tipc_node_create(u32 addr) break; } list_add_tail_rcu(&n_ptr->list, &temp_node->list); - n_ptr->block_setup = WAIT_PEER_DOWN; + n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN; n_ptr->signature = INVALID_NODE_SIG; tipc_num_nodes++; @@ -144,30 +144,36 @@ void tipc_node_stop(void) void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { struct tipc_link **active = &n_ptr->active_links[0]; + u32 addr = n_ptr->addr; n_ptr->working_links++; - + tipc_nametbl_publish(TIPC_LINK_STATE, addr, addr, TIPC_NODE_SCOPE, + l_ptr->bearer_id, addr); pr_info("Established link <%s> on network plane %c\n", - l_ptr->name, l_ptr->b_ptr->net_plane); + l_ptr->name, l_ptr->net_plane); if (!active[0]) { active[0] = active[1] = l_ptr; node_established_contact(n_ptr); - return; + goto exit; } if (l_ptr->priority < active[0]->priority) { pr_info("New link <%s> becomes standby\n", l_ptr->name); - return; + goto exit; } tipc_link_dup_queue_xmit(active[0], l_ptr); if (l_ptr->priority == active[0]->priority) { active[0] = l_ptr; - return; + goto exit; } pr_info("Old link <%s> becomes standby\n", active[0]->name); if (active[1] != active[0]) pr_info("Old link <%s> becomes standby\n", active[1]->name); active[0] = active[1] = l_ptr; +exit: + /* Leave room for changeover header when returning 'mtu' to users: */ + n_ptr->act_mtus[0] = active[0]->max_pkt - INT_H_SIZE; + n_ptr->act_mtus[1] = active[1]->max_pkt - INT_H_SIZE; } /** @@ -203,16 +209,18 @@ static void node_select_active_links(struct tipc_node *n_ptr) void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { struct tipc_link **active; + u32 addr = n_ptr->addr; n_ptr->working_links--; + tipc_nametbl_withdraw(TIPC_LINK_STATE, addr, l_ptr->bearer_id, addr); if (!tipc_link_is_active(l_ptr)) { pr_info("Lost standby link <%s> on network plane %c\n", - l_ptr->name, l_ptr->b_ptr->net_plane); + l_ptr->name, l_ptr->net_plane); return; } pr_info("Lost link <%s> on network plane %c\n", - l_ptr->name, l_ptr->b_ptr->net_plane); + l_ptr->name, l_ptr->net_plane); active = &n_ptr->active_links[0]; if (active[0] == l_ptr) @@ -225,6 +233,19 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) tipc_link_failover_send_queue(l_ptr); else node_lost_contact(n_ptr); + + /* Leave room for changeover header when returning 'mtu' to users: */ + if (active[0]) { + n_ptr->act_mtus[0] = active[0]->max_pkt - INT_H_SIZE; + n_ptr->act_mtus[1] = active[1]->max_pkt - INT_H_SIZE; + return; + } + + /* Loopback link went down? No fragmentation needed from now on. */ + if (n_ptr->addr == tipc_own_addr) { + n_ptr->act_mtus[0] = MAX_MSG_SIZE; + n_ptr->act_mtus[1] = MAX_MSG_SIZE; + } } int tipc_node_active_links(struct tipc_node *n_ptr) @@ -239,7 +260,7 @@ int tipc_node_is_up(struct tipc_node *n_ptr) void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { - n_ptr->links[l_ptr->b_ptr->identity] = l_ptr; + n_ptr->links[l_ptr->bearer_id] = l_ptr; spin_lock_bh(&node_list_lock); tipc_num_links++; spin_unlock_bh(&node_list_lock); @@ -263,26 +284,12 @@ void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) static void node_established_contact(struct tipc_node *n_ptr) { - tipc_k_signal((Handler)tipc_named_node_up, n_ptr->addr); + n_ptr->action_flags |= TIPC_NOTIFY_NODE_UP; n_ptr->bclink.oos_state = 0; n_ptr->bclink.acked = tipc_bclink_get_last_sent(); tipc_bclink_add_node(n_ptr->addr); } -static void node_name_purge_complete(unsigned long node_addr) -{ - struct tipc_node *n_ptr; - - read_lock_bh(&tipc_net_lock); - n_ptr = tipc_node_find(node_addr); - if (n_ptr) { - tipc_node_lock(n_ptr); - n_ptr->block_setup &= ~WAIT_NAMES_GONE; - tipc_node_unlock(n_ptr); - } - read_unlock_bh(&tipc_net_lock); -} - static void node_lost_contact(struct tipc_node *n_ptr) { char addr_string[16]; @@ -296,10 +303,9 @@ static void node_lost_contact(struct tipc_node *n_ptr) kfree_skb_list(n_ptr->bclink.deferred_head); n_ptr->bclink.deferred_size = 0; - if (n_ptr->bclink.reasm_head) { - kfree_skb(n_ptr->bclink.reasm_head); - n_ptr->bclink.reasm_head = NULL; - n_ptr->bclink.reasm_tail = NULL; + if (n_ptr->bclink.reasm_buf) { + kfree_skb(n_ptr->bclink.reasm_buf); + n_ptr->bclink.reasm_buf = NULL; } tipc_bclink_remove_node(n_ptr->addr); @@ -318,12 +324,13 @@ static void node_lost_contact(struct tipc_node *n_ptr) tipc_link_reset_fragments(l_ptr); } - /* Notify subscribers */ - tipc_nodesub_notify(n_ptr); + n_ptr->action_flags &= ~TIPC_WAIT_OWN_LINKS_DOWN; - /* Prevent re-contact with node until cleanup is done */ - n_ptr->block_setup = WAIT_PEER_DOWN | WAIT_NAMES_GONE; - tipc_k_signal((Handler)node_name_purge_complete, n_ptr->addr); + /* Notify subscribers and prevent re-contact with node until + * cleanup is done. + */ + n_ptr->action_flags |= TIPC_WAIT_PEER_LINKS_DOWN | + TIPC_NOTIFY_NODE_DOWN; } struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) @@ -436,3 +443,56 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) rcu_read_unlock(); return buf; } + +/** + * tipc_node_get_linkname - get the name of a link + * + * @bearer_id: id of the bearer + * @node: peer node address + * @linkname: link name output buffer + * + * Returns 0 on success + */ +int tipc_node_get_linkname(u32 bearer_id, u32 addr, char *linkname, size_t len) +{ + struct tipc_link *link; + struct tipc_node *node = tipc_node_find(addr); + + if ((bearer_id >= MAX_BEARERS) || !node) + return -EINVAL; + tipc_node_lock(node); + link = node->links[bearer_id]; + if (link) { + strncpy(linkname, link->name, len); + tipc_node_unlock(node); + return 0; + } + tipc_node_unlock(node); + return -EINVAL; +} + +void tipc_node_unlock(struct tipc_node *node) +{ + LIST_HEAD(nsub_list); + u32 addr = 0; + + if (likely(!node->action_flags)) { + spin_unlock_bh(&node->lock); + return; + } + + if (node->action_flags & TIPC_NOTIFY_NODE_DOWN) { + list_replace_init(&node->nsub, &nsub_list); + node->action_flags &= ~TIPC_NOTIFY_NODE_DOWN; + } + if (node->action_flags & TIPC_NOTIFY_NODE_UP) { + node->action_flags &= ~TIPC_NOTIFY_NODE_UP; + addr = node->addr; + } + spin_unlock_bh(&node->lock); + + if (!list_empty(&nsub_list)) + tipc_nodesub_notify(&nsub_list); + if (addr) + tipc_named_node_up(addr); +} diff --git a/net/tipc/node.h b/net/tipc/node.h index 7cbb8cec1a93..b61716a8218e 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -41,68 +41,81 @@ #include "addr.h" #include "net.h" #include "bearer.h" +#include "msg.h" /* * Out-of-range value for node signature */ #define INVALID_NODE_SIG 0x10000 -/* Flags used to block (re)establishment of contact with a neighboring node */ -#define WAIT_PEER_DOWN 0x0001 /* wait to see that peer's links are down */ -#define WAIT_NAMES_GONE 0x0002 /* wait for peer's publications to be purged */ -#define WAIT_NODE_DOWN 0x0004 /* wait until peer node is declared down */ +/* Flags used to take different actions according to flag type + * TIPC_WAIT_PEER_LINKS_DOWN: wait to see that peer's links are down + * TIPC_WAIT_OWN_LINKS_DOWN: wait until peer node is declared down + * TIPC_NOTIFY_NODE_DOWN: notify node is down + * TIPC_NOTIFY_NODE_UP: notify node is up + */ +enum { + TIPC_WAIT_PEER_LINKS_DOWN = (1 << 1), + TIPC_WAIT_OWN_LINKS_DOWN = (1 << 2), + TIPC_NOTIFY_NODE_DOWN = (1 << 3), + TIPC_NOTIFY_NODE_UP = (1 << 4) +}; + +/** + * struct tipc_node_bclink - TIPC node bclink structure + * @acked: sequence # of last outbound b'cast message acknowledged by node + * @last_in: sequence # of last in-sequence b'cast message received from node + * @last_sent: sequence # of last b'cast message sent by node + * @oos_state: state tracker for handling OOS b'cast messages + * @deferred_size: number of OOS b'cast messages in deferred queue + * @deferred_head: oldest OOS b'cast message received from node + * @deferred_tail: newest OOS b'cast message received from node + * @reasm_buf: broadcast reassembly queue head from node + * @recv_permitted: true if node is allowed to receive b'cast messages + */ +struct tipc_node_bclink { + u32 acked; + u32 last_in; + u32 last_sent; + u32 oos_state; + u32 deferred_size; + struct sk_buff *deferred_head; + struct sk_buff *deferred_tail; + struct sk_buff *reasm_buf; + bool recv_permitted; +}; /** * struct tipc_node - TIPC node structure * @addr: network address of node * @lock: spinlock governing access to structure * @hash: links to adjacent nodes in unsorted hash chain - * @list: links to adjacent nodes in sorted list of cluster's nodes - * @nsub: list of "node down" subscriptions monitoring node * @active_links: pointers to active links to node * @links: pointers to all links to node + * @action_flags: bit mask of different types of node actions + * @bclink: broadcast-related info + * @list: links to adjacent nodes in sorted list of cluster's nodes * @working_links: number of working links to node (both active and standby) - * @block_setup: bit mask of conditions preventing link establishment to node * @link_cnt: number of links to node * @signature: node instance identifier - * @bclink: broadcast-related info + * @nsub: list of "node down" subscriptions monitoring node * @rcu: rcu struct for tipc_node - * @acked: sequence # of last outbound b'cast message acknowledged by node - * @last_in: sequence # of last in-sequence b'cast message received from node - * @last_sent: sequence # of last b'cast message sent by node - * @oos_state: state tracker for handling OOS b'cast messages - * @deferred_size: number of OOS b'cast messages in deferred queue - * @deferred_head: oldest OOS b'cast message received from node - * @deferred_tail: newest OOS b'cast message received from node - * @reasm_head: broadcast reassembly queue head from node - * @reasm_tail: last broadcast fragment received from node - * @recv_permitted: true if node is allowed to receive b'cast messages */ struct tipc_node { u32 addr; spinlock_t lock; struct hlist_node hash; - struct list_head list; - struct list_head nsub; struct tipc_link *active_links[2]; + u32 act_mtus[2]; struct tipc_link *links[MAX_BEARERS]; + unsigned int action_flags; + struct tipc_node_bclink bclink; + struct list_head list; int link_cnt; int working_links; - int block_setup; u32 signature; + struct list_head nsub; struct rcu_head rcu; - struct { - u32 acked; - u32 last_in; - u32 last_sent; - u32 oos_state; - u32 deferred_size; - struct sk_buff *deferred_head; - struct sk_buff *deferred_tail; - struct sk_buff *reasm_head; - struct sk_buff *reasm_tail; - bool recv_permitted; - } bclink; }; extern struct list_head tipc_node_list; @@ -118,15 +131,33 @@ int tipc_node_active_links(struct tipc_node *n_ptr); int tipc_node_is_up(struct tipc_node *n_ptr); struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space); struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space); +int tipc_node_get_linkname(u32 bearer_id, u32 node, char *linkname, size_t len); +void tipc_node_unlock(struct tipc_node *node); -static inline void tipc_node_lock(struct tipc_node *n_ptr) +static inline void tipc_node_lock(struct tipc_node *node) { - spin_lock_bh(&n_ptr->lock); + spin_lock_bh(&node->lock); } -static inline void tipc_node_unlock(struct tipc_node *n_ptr) +static inline bool tipc_node_blocked(struct tipc_node *node) { - spin_unlock_bh(&n_ptr->lock); + return (node->action_flags & (TIPC_WAIT_PEER_LINKS_DOWN | + TIPC_NOTIFY_NODE_DOWN | TIPC_WAIT_OWN_LINKS_DOWN)); +} + +static inline uint tipc_node_get_mtu(u32 addr, u32 selector) +{ + struct tipc_node *node; + u32 mtu; + + node = tipc_node_find(addr); + + if (likely(node)) + mtu = node->act_mtus[selector & 1]; + else + mtu = MAX_MSG_SIZE; + + return mtu; } #endif diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c index 8a7384c04add..2d13eea8574a 100644 --- a/net/tipc/node_subscr.c +++ b/net/tipc/node_subscr.c @@ -81,15 +81,16 @@ void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub) * * Note: node is locked by caller */ -void tipc_nodesub_notify(struct tipc_node *node) +void tipc_nodesub_notify(struct list_head *nsub_list) { - struct tipc_node_subscr *ns; + struct tipc_node_subscr *ns, *safe; + net_ev_handler handle_node_down; - list_for_each_entry(ns, &node->nsub, nodesub_list) { - if (ns->handle_node_down) { - tipc_k_signal((Handler)ns->handle_node_down, - (unsigned long)ns->usr_handle); + list_for_each_entry_safe(ns, safe, nsub_list, nodesub_list) { + handle_node_down = ns->handle_node_down; + if (handle_node_down) { ns->handle_node_down = NULL; + handle_node_down(ns->usr_handle); } } } diff --git a/net/tipc/node_subscr.h b/net/tipc/node_subscr.h index c95d20727ded..d91b8cc81e3d 100644 --- a/net/tipc/node_subscr.h +++ b/net/tipc/node_subscr.h @@ -58,6 +58,6 @@ struct tipc_node_subscr { void tipc_nodesub_subscribe(struct tipc_node_subscr *node_sub, u32 addr, void *usr_handle, net_ev_handler handle_down); void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub); -void tipc_nodesub_notify(struct tipc_node *node); +void tipc_nodesub_notify(struct list_head *nsub_list); #endif diff --git a/net/tipc/port.c b/net/tipc/port.c index 5c14c7801ee6..7e096a5e7701 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -42,8 +42,6 @@ /* Connection management: */ #define PROBING_INTERVAL 3600000 /* [ms] => 1 h */ -#define CONFIRMED 0 -#define PROBING 1 #define MAX_REJECT_SIZE 1024 @@ -76,124 +74,6 @@ int tipc_port_peer_msg(struct tipc_port *p_ptr, struct tipc_msg *msg) (!peernode && (orignode == tipc_own_addr)); } -/** - * tipc_port_mcast_xmit - send a multicast message to local and remote - * destinations - */ -int tipc_port_mcast_xmit(struct tipc_port *oport, - struct tipc_name_seq const *seq, - struct iovec const *msg_sect, - unsigned int len) -{ - struct tipc_msg *hdr; - struct sk_buff *buf; - struct sk_buff *ibuf = NULL; - struct tipc_port_list dports = {0, NULL, }; - int ext_targets; - int res; - - /* Create multicast message */ - hdr = &oport->phdr; - msg_set_type(hdr, TIPC_MCAST_MSG); - msg_set_lookup_scope(hdr, TIPC_CLUSTER_SCOPE); - msg_set_destport(hdr, 0); - msg_set_destnode(hdr, 0); - msg_set_nametype(hdr, seq->type); - msg_set_namelower(hdr, seq->lower); - msg_set_nameupper(hdr, seq->upper); - msg_set_hdr_sz(hdr, MCAST_H_SIZE); - res = tipc_msg_build(hdr, msg_sect, len, MAX_MSG_SIZE, &buf); - if (unlikely(!buf)) - return res; - - /* Figure out where to send multicast message */ - ext_targets = tipc_nametbl_mc_translate(seq->type, seq->lower, seq->upper, - TIPC_NODE_SCOPE, &dports); - - /* Send message to destinations (duplicate it only if necessary) */ - if (ext_targets) { - if (dports.count != 0) { - ibuf = skb_copy(buf, GFP_ATOMIC); - if (ibuf == NULL) { - tipc_port_list_free(&dports); - kfree_skb(buf); - return -ENOMEM; - } - } - res = tipc_bclink_xmit(buf); - if ((res < 0) && (dports.count != 0)) - kfree_skb(ibuf); - } else { - ibuf = buf; - } - - if (res >= 0) { - if (ibuf) - tipc_port_mcast_rcv(ibuf, &dports); - } else { - tipc_port_list_free(&dports); - } - return res; -} - -/** - * tipc_port_mcast_rcv - deliver multicast message to all destination ports - * - * If there is no port list, perform a lookup to create one - */ -void tipc_port_mcast_rcv(struct sk_buff *buf, struct tipc_port_list *dp) -{ - struct tipc_msg *msg; - struct tipc_port_list dports = {0, NULL, }; - struct tipc_port_list *item = dp; - int cnt = 0; - - msg = buf_msg(buf); - - /* Create destination port list, if one wasn't supplied */ - if (dp == NULL) { - tipc_nametbl_mc_translate(msg_nametype(msg), - msg_namelower(msg), - msg_nameupper(msg), - TIPC_CLUSTER_SCOPE, - &dports); - item = dp = &dports; - } - - /* Deliver a copy of message to each destination port */ - if (dp->count != 0) { - msg_set_destnode(msg, tipc_own_addr); - if (dp->count == 1) { - msg_set_destport(msg, dp->ports[0]); - tipc_port_rcv(buf); - tipc_port_list_free(dp); - return; - } - for (; cnt < dp->count; cnt++) { - int index = cnt % PLSIZE; - struct sk_buff *b = skb_clone(buf, GFP_ATOMIC); - - if (b == NULL) { - pr_warn("Unable to deliver multicast message(s)\n"); - goto exit; - } - if ((index == 0) && (cnt != 0)) - item = item->next; - msg_set_destport(buf_msg(b), item->ports[index]); - tipc_port_rcv(b); - } - } -exit: - kfree_skb(buf); - tipc_port_list_free(dp); -} - - -void tipc_port_wakeup(struct tipc_port *port) -{ - tipc_sock_wakeup(tipc_port_to_sock(port)); -} - /* tipc_port_init - intiate TIPC port and lock it * * Returns obtained reference if initialization is successful, zero otherwise @@ -235,6 +115,8 @@ u32 tipc_port_init(struct tipc_port *p_ptr, void tipc_port_destroy(struct tipc_port *p_ptr) { struct sk_buff *buf = NULL; + struct tipc_msg *msg = NULL; + u32 peer; tipc_withdraw(p_ptr, 0, NULL); @@ -246,14 +128,15 @@ void tipc_port_destroy(struct tipc_port *p_ptr) if (p_ptr->connected) { buf = port_build_peer_abort_msg(p_ptr, TIPC_ERR_NO_PORT); tipc_nodesub_unsubscribe(&p_ptr->subscription); + msg = buf_msg(buf); + peer = msg_destnode(msg); + tipc_link_xmit(buf, peer, msg_link_selector(msg)); } - spin_lock_bh(&tipc_port_list_lock); list_del(&p_ptr->port_list); list_del(&p_ptr->wait_list); spin_unlock_bh(&tipc_port_list_lock); k_term_timer(&p_ptr->timer); - tipc_net_route_msg(buf); } /* @@ -275,100 +158,16 @@ static struct sk_buff *port_build_proto_msg(struct tipc_port *p_ptr, msg_set_destport(msg, tipc_port_peerport(p_ptr)); msg_set_origport(msg, p_ptr->ref); msg_set_msgcnt(msg, ack); + buf->next = NULL; } return buf; } -int tipc_reject_msg(struct sk_buff *buf, u32 err) -{ - struct tipc_msg *msg = buf_msg(buf); - struct sk_buff *rbuf; - struct tipc_msg *rmsg; - int hdr_sz; - u32 imp; - u32 data_sz = msg_data_sz(msg); - u32 src_node; - u32 rmsg_sz; - - /* discard rejected message if it shouldn't be returned to sender */ - if (WARN(!msg_isdata(msg), - "attempt to reject message with user=%u", msg_user(msg))) { - dump_stack(); - goto exit; - } - if (msg_errcode(msg) || msg_dest_droppable(msg)) - goto exit; - - /* - * construct returned message by copying rejected message header and - * data (or subset), then updating header fields that need adjusting - */ - hdr_sz = msg_hdr_sz(msg); - rmsg_sz = hdr_sz + min_t(u32, data_sz, MAX_REJECT_SIZE); - - rbuf = tipc_buf_acquire(rmsg_sz); - if (rbuf == NULL) - goto exit; - - rmsg = buf_msg(rbuf); - skb_copy_to_linear_data(rbuf, msg, rmsg_sz); - - if (msg_connected(rmsg)) { - imp = msg_importance(rmsg); - if (imp < TIPC_CRITICAL_IMPORTANCE) - msg_set_importance(rmsg, ++imp); - } - msg_set_non_seq(rmsg, 0); - msg_set_size(rmsg, rmsg_sz); - msg_set_errcode(rmsg, err); - msg_set_prevnode(rmsg, tipc_own_addr); - msg_swap_words(rmsg, 4, 5); - if (!msg_short(rmsg)) - msg_swap_words(rmsg, 6, 7); - - /* send self-abort message when rejecting on a connected port */ - if (msg_connected(msg)) { - struct tipc_port *p_ptr = tipc_port_lock(msg_destport(msg)); - - if (p_ptr) { - struct sk_buff *abuf = NULL; - - if (p_ptr->connected) - abuf = port_build_self_abort_msg(p_ptr, err); - tipc_port_unlock(p_ptr); - tipc_net_route_msg(abuf); - } - } - - /* send returned message & dispose of rejected message */ - src_node = msg_prevnode(msg); - if (in_own_node(src_node)) - tipc_port_rcv(rbuf); - else - tipc_link_xmit(rbuf, src_node, msg_link_selector(rmsg)); -exit: - kfree_skb(buf); - return data_sz; -} - -int tipc_port_iovec_reject(struct tipc_port *p_ptr, struct tipc_msg *hdr, - struct iovec const *msg_sect, unsigned int len, - int err) -{ - struct sk_buff *buf; - int res; - - res = tipc_msg_build(hdr, msg_sect, len, MAX_MSG_SIZE, &buf); - if (!buf) - return res; - - return tipc_reject_msg(buf, err); -} - static void port_timeout(unsigned long ref) { struct tipc_port *p_ptr = tipc_port_lock(ref); struct sk_buff *buf = NULL; + struct tipc_msg *msg = NULL; if (!p_ptr) return; @@ -379,15 +178,16 @@ static void port_timeout(unsigned long ref) } /* Last probe answered ? */ - if (p_ptr->probing_state == PROBING) { + if (p_ptr->probing_state == TIPC_CONN_PROBING) { buf = port_build_self_abort_msg(p_ptr, TIPC_ERR_NO_PORT); } else { buf = port_build_proto_msg(p_ptr, CONN_PROBE, 0); - p_ptr->probing_state = PROBING; + p_ptr->probing_state = TIPC_CONN_PROBING; k_start_timer(&p_ptr->timer, p_ptr->probing_interval); } tipc_port_unlock(p_ptr); - tipc_net_route_msg(buf); + msg = buf_msg(buf); + tipc_link_xmit(buf, msg_destnode(msg), msg_link_selector(msg)); } @@ -395,12 +195,14 @@ static void port_handle_node_down(unsigned long ref) { struct tipc_port *p_ptr = tipc_port_lock(ref); struct sk_buff *buf = NULL; + struct tipc_msg *msg = NULL; if (!p_ptr) return; buf = port_build_self_abort_msg(p_ptr, TIPC_ERR_NO_NODE); tipc_port_unlock(p_ptr); - tipc_net_route_msg(buf); + msg = buf_msg(buf); + tipc_link_xmit(buf, msg_destnode(msg), msg_link_selector(msg)); } @@ -412,6 +214,7 @@ static struct sk_buff *port_build_self_abort_msg(struct tipc_port *p_ptr, u32 er struct tipc_msg *msg = buf_msg(buf); msg_swap_words(msg, 4, 5); msg_swap_words(msg, 6, 7); + buf->next = NULL; } return buf; } @@ -436,60 +239,11 @@ static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *p_ptr, u32 er if (imp < TIPC_CRITICAL_IMPORTANCE) msg_set_importance(msg, ++imp); msg_set_errcode(msg, err); + buf->next = NULL; } return buf; } -void tipc_port_proto_rcv(struct sk_buff *buf) -{ - struct tipc_msg *msg = buf_msg(buf); - struct tipc_port *p_ptr; - struct sk_buff *r_buf = NULL; - u32 destport = msg_destport(msg); - int wakeable; - - /* Validate connection */ - p_ptr = tipc_port_lock(destport); - if (!p_ptr || !p_ptr->connected || !tipc_port_peer_msg(p_ptr, msg)) { - r_buf = tipc_buf_acquire(BASIC_H_SIZE); - if (r_buf) { - msg = buf_msg(r_buf); - tipc_msg_init(msg, TIPC_HIGH_IMPORTANCE, TIPC_CONN_MSG, - BASIC_H_SIZE, msg_orignode(msg)); - msg_set_errcode(msg, TIPC_ERR_NO_PORT); - msg_set_origport(msg, destport); - msg_set_destport(msg, msg_origport(msg)); - } - if (p_ptr) - tipc_port_unlock(p_ptr); - goto exit; - } - - /* Process protocol message sent by peer */ - switch (msg_type(msg)) { - case CONN_ACK: - wakeable = tipc_port_congested(p_ptr) && p_ptr->congested; - p_ptr->acked += msg_msgcnt(msg); - if (!tipc_port_congested(p_ptr)) { - p_ptr->congested = 0; - if (wakeable) - tipc_port_wakeup(p_ptr); - } - break; - case CONN_PROBE: - r_buf = port_build_proto_msg(p_ptr, CONN_PROBE_REPLY, 0); - break; - default: - /* CONN_PROBE_REPLY or unrecognized - no action required */ - break; - } - p_ptr->probing_state = CONFIRMED; - tipc_port_unlock(p_ptr); -exit: - tipc_net_route_msg(r_buf); - kfree_skb(buf); -} - static int port_print(struct tipc_port *p_ptr, char *buf, int len, int full_id) { struct publication *publ; @@ -581,16 +335,19 @@ void tipc_acknowledge(u32 ref, u32 ack) { struct tipc_port *p_ptr; struct sk_buff *buf = NULL; + struct tipc_msg *msg; p_ptr = tipc_port_lock(ref); if (!p_ptr) return; - if (p_ptr->connected) { - p_ptr->conn_unacked -= ack; + if (p_ptr->connected) buf = port_build_proto_msg(p_ptr, CONN_ACK, ack); - } + tipc_port_unlock(p_ptr); - tipc_net_route_msg(buf); + if (!buf) + return; + msg = buf_msg(buf); + tipc_link_xmit(buf, msg_destnode(msg), msg_link_selector(msg)); } int tipc_publish(struct tipc_port *p_ptr, unsigned int scope, @@ -689,7 +446,7 @@ int __tipc_port_connect(u32 ref, struct tipc_port *p_ptr, msg_set_hdr_sz(msg, SHORT_H_SIZE); p_ptr->probing_interval = PROBING_INTERVAL; - p_ptr->probing_state = CONFIRMED; + p_ptr->probing_state = TIPC_CONN_OK; p_ptr->connected = 1; k_start_timer(&p_ptr->timer, p_ptr->probing_interval); @@ -698,7 +455,7 @@ int __tipc_port_connect(u32 ref, struct tipc_port *p_ptr, (net_ev_handler)port_handle_node_down); res = 0; exit: - p_ptr->max_pkt = tipc_link_get_max_pkt(peer->node, ref); + p_ptr->max_pkt = tipc_node_get_mtu(peer->node, ref); return res; } @@ -741,6 +498,7 @@ int tipc_port_disconnect(u32 ref) */ int tipc_port_shutdown(u32 ref) { + struct tipc_msg *msg; struct tipc_port *p_ptr; struct sk_buff *buf = NULL; @@ -750,180 +508,7 @@ int tipc_port_shutdown(u32 ref) buf = port_build_peer_abort_msg(p_ptr, TIPC_CONN_SHUTDOWN); tipc_port_unlock(p_ptr); - tipc_net_route_msg(buf); + msg = buf_msg(buf); + tipc_link_xmit(buf, msg_destnode(msg), msg_link_selector(msg)); return tipc_port_disconnect(ref); } - -/** - * tipc_port_rcv - receive message from lower layer and deliver to port user - */ -int tipc_port_rcv(struct sk_buff *buf) -{ - struct tipc_port *p_ptr; - struct tipc_msg *msg = buf_msg(buf); - u32 destport = msg_destport(msg); - u32 dsz = msg_data_sz(msg); - u32 err; - - /* forward unresolved named message */ - if (unlikely(!destport)) { - tipc_net_route_msg(buf); - return dsz; - } - - /* validate destination & pass to port, otherwise reject message */ - p_ptr = tipc_port_lock(destport); - if (likely(p_ptr)) { - err = tipc_sk_rcv(&tipc_port_to_sock(p_ptr)->sk, buf); - tipc_port_unlock(p_ptr); - if (likely(!err)) - return dsz; - } else { - err = TIPC_ERR_NO_PORT; - } - - return tipc_reject_msg(buf, err); -} - -/* - * tipc_port_iovec_rcv: Concatenate and deliver sectioned - * message for this node. - */ -static int tipc_port_iovec_rcv(struct tipc_port *sender, - struct iovec const *msg_sect, - unsigned int len) -{ - struct sk_buff *buf; - int res; - - res = tipc_msg_build(&sender->phdr, msg_sect, len, MAX_MSG_SIZE, &buf); - if (likely(buf)) - tipc_port_rcv(buf); - return res; -} - -/** - * tipc_send - send message sections on connection - */ -int tipc_send(struct tipc_port *p_ptr, - struct iovec const *msg_sect, - unsigned int len) -{ - u32 destnode; - int res; - - if (!p_ptr->connected) - return -EINVAL; - - p_ptr->congested = 1; - if (!tipc_port_congested(p_ptr)) { - destnode = tipc_port_peernode(p_ptr); - if (likely(!in_own_node(destnode))) - res = tipc_link_iovec_xmit_fast(p_ptr, msg_sect, len, - destnode); - else - res = tipc_port_iovec_rcv(p_ptr, msg_sect, len); - - if (likely(res != -ELINKCONG)) { - p_ptr->congested = 0; - if (res > 0) - p_ptr->sent++; - return res; - } - } - if (tipc_port_unreliable(p_ptr)) { - p_ptr->congested = 0; - return len; - } - return -ELINKCONG; -} - -/** - * tipc_send2name - send message sections to port name - */ -int tipc_send2name(struct tipc_port *p_ptr, - struct tipc_name const *name, - unsigned int domain, - struct iovec const *msg_sect, - unsigned int len) -{ - struct tipc_msg *msg; - u32 destnode = domain; - u32 destport; - int res; - - if (p_ptr->connected) - return -EINVAL; - - msg = &p_ptr->phdr; - msg_set_type(msg, TIPC_NAMED_MSG); - msg_set_hdr_sz(msg, NAMED_H_SIZE); - msg_set_nametype(msg, name->type); - msg_set_nameinst(msg, name->instance); - msg_set_lookup_scope(msg, tipc_addr_scope(domain)); - destport = tipc_nametbl_translate(name->type, name->instance, &destnode); - msg_set_destnode(msg, destnode); - msg_set_destport(msg, destport); - - if (likely(destport || destnode)) { - if (likely(in_own_node(destnode))) - res = tipc_port_iovec_rcv(p_ptr, msg_sect, len); - else if (tipc_own_addr) - res = tipc_link_iovec_xmit_fast(p_ptr, msg_sect, len, - destnode); - else - res = tipc_port_iovec_reject(p_ptr, msg, msg_sect, - len, TIPC_ERR_NO_NODE); - if (likely(res != -ELINKCONG)) { - if (res > 0) - p_ptr->sent++; - return res; - } - if (tipc_port_unreliable(p_ptr)) - return len; - - return -ELINKCONG; - } - return tipc_port_iovec_reject(p_ptr, msg, msg_sect, len, - TIPC_ERR_NO_NAME); -} - -/** - * tipc_send2port - send message sections to port identity - */ -int tipc_send2port(struct tipc_port *p_ptr, - struct tipc_portid const *dest, - struct iovec const *msg_sect, - unsigned int len) -{ - struct tipc_msg *msg; - int res; - - if (p_ptr->connected) - return -EINVAL; - - msg = &p_ptr->phdr; - msg_set_type(msg, TIPC_DIRECT_MSG); - msg_set_lookup_scope(msg, 0); - msg_set_destnode(msg, dest->node); - msg_set_destport(msg, dest->ref); - msg_set_hdr_sz(msg, BASIC_H_SIZE); - - if (in_own_node(dest->node)) - res = tipc_port_iovec_rcv(p_ptr, msg_sect, len); - else if (tipc_own_addr) - res = tipc_link_iovec_xmit_fast(p_ptr, msg_sect, len, - dest->node); - else - res = tipc_port_iovec_reject(p_ptr, msg, msg_sect, len, - TIPC_ERR_NO_NODE); - if (likely(res != -ELINKCONG)) { - if (res > 0) - p_ptr->sent++; - return res; - } - if (tipc_port_unreliable(p_ptr)) - return len; - - return -ELINKCONG; -} diff --git a/net/tipc/port.h b/net/tipc/port.h index a00397393bd1..3f93454592b6 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -42,9 +42,10 @@ #include "msg.h" #include "node_subscr.h" -#define TIPC_FLOW_CONTROL_WIN 512 -#define CONN_OVERLOAD_LIMIT ((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \ - SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) +#define TIPC_CONNACK_INTV 256 +#define TIPC_FLOWCTRL_WIN (TIPC_CONNACK_INTV * 2) +#define TIPC_CONN_OVERLOAD_LIMIT ((TIPC_FLOWCTRL_WIN * 2 + 1) * \ + SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) /** * struct tipc_port - TIPC port structure @@ -52,17 +53,13 @@ * @connected: non-zero if port is currently connected to a peer port * @conn_type: TIPC type used when connection was established * @conn_instance: TIPC instance used when connection was established - * @conn_unacked: number of unacknowledged messages received from peer port * @published: non-zero if port has one or more associated names - * @congested: non-zero if cannot send because of link or port congestion * @max_pkt: maximum packet size "hint" used when building messages sent by port * @ref: unique reference to port in TIPC object registry * @phdr: preformatted message header used when sending messages * @port_list: adjacent ports in TIPC's global list of ports * @wait_list: adjacent ports in list of ports waiting on link congestion * @waiting_pkts: - * @sent: # of non-empty messages sent by port - * @acked: # of non-empty message acknowledgements from connected port's peer * @publications: list of publications for port * @pub_count: total # of publications port has made during its lifetime * @probing_state: @@ -75,17 +72,13 @@ struct tipc_port { int connected; u32 conn_type; u32 conn_instance; - u32 conn_unacked; int published; - u32 congested; u32 max_pkt; u32 ref; struct tipc_msg phdr; struct list_head port_list; struct list_head wait_list; u32 waiting_pkts; - u32 sent; - u32 acked; struct list_head publications; u32 pub_count; u32 probing_state; @@ -103,8 +96,6 @@ struct tipc_port_list; u32 tipc_port_init(struct tipc_port *p_ptr, const unsigned int importance); -int tipc_reject_msg(struct sk_buff *buf, u32 err); - void tipc_acknowledge(u32 port_ref, u32 ack); void tipc_port_destroy(struct tipc_port *p_ptr); @@ -121,8 +112,6 @@ int tipc_port_disconnect(u32 portref); int tipc_port_shutdown(u32 ref); -void tipc_port_wakeup(struct tipc_port *port); - /* * The following routines require that the port be locked on entry */ @@ -131,40 +120,7 @@ int __tipc_port_connect(u32 ref, struct tipc_port *p_ptr, struct tipc_portid const *peer); int tipc_port_peer_msg(struct tipc_port *p_ptr, struct tipc_msg *msg); -/* - * TIPC messaging routines - */ -int tipc_port_rcv(struct sk_buff *buf); - -int tipc_send(struct tipc_port *port, - struct iovec const *msg_sect, - unsigned int len); - -int tipc_send2name(struct tipc_port *port, - struct tipc_name const *name, - u32 domain, - struct iovec const *msg_sect, - unsigned int len); - -int tipc_send2port(struct tipc_port *port, - struct tipc_portid const *dest, - struct iovec const *msg_sect, - unsigned int len); - -int tipc_port_mcast_xmit(struct tipc_port *port, - struct tipc_name_seq const *seq, - struct iovec const *msg, - unsigned int len); - -int tipc_port_iovec_reject(struct tipc_port *p_ptr, - struct tipc_msg *hdr, - struct iovec const *msg_sect, - unsigned int len, - int err); - struct sk_buff *tipc_port_get_ports(void); -void tipc_port_proto_rcv(struct sk_buff *buf); -void tipc_port_mcast_rcv(struct sk_buff *buf, struct tipc_port_list *dp); void tipc_port_reinit(void); /** @@ -185,12 +141,6 @@ static inline void tipc_port_unlock(struct tipc_port *p_ptr) spin_unlock_bh(p_ptr->lock); } -static inline int tipc_port_congested(struct tipc_port *p_ptr) -{ - return (p_ptr->sent - p_ptr->acked) >= (TIPC_FLOW_CONTROL_WIN * 2); -} - - static inline u32 tipc_port_peernode(struct tipc_port *p_ptr) { return msg_destnode(&p_ptr->phdr); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3c0256962f7d..7d423ee10897 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -36,19 +36,23 @@ #include "core.h" #include "port.h" - +#include "name_table.h" +#include "node.h" +#include "link.h" #include <linux/export.h> #define SS_LISTENING -1 /* socket is listening */ #define SS_READY -2 /* socket is connectionless */ #define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ +#define TIPC_FWD_MSG 1 -static int backlog_rcv(struct sock *sk, struct sk_buff *skb); +static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb); static void tipc_data_ready(struct sock *sk); static void tipc_write_space(struct sock *sk); static int tipc_release(struct socket *sock); static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags); +static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p); static const struct proto_ops packet_ops; static const struct proto_ops stream_ops; @@ -122,9 +126,12 @@ static void advance_rx_queue(struct sock *sk) static void reject_rx_queue(struct sock *sk) { struct sk_buff *buf; + u32 dnode; - while ((buf = __skb_dequeue(&sk->sk_receive_queue))) - tipc_reject_msg(buf, TIPC_ERR_NO_PORT); + while ((buf = __skb_dequeue(&sk->sk_receive_queue))) { + if (tipc_msg_reverse(buf, &dnode, TIPC_ERR_NO_PORT)) + tipc_link_xmit(buf, dnode, 0); + } } /** @@ -195,11 +202,13 @@ static int tipc_sk_create(struct net *net, struct socket *sock, sock->state = state; sock_init_data(sock, sk); - sk->sk_backlog_rcv = backlog_rcv; + sk->sk_backlog_rcv = tipc_backlog_rcv; sk->sk_rcvbuf = sysctl_tipc_rmem[1]; sk->sk_data_ready = tipc_data_ready; sk->sk_write_space = tipc_write_space; - tipc_sk(sk)->conn_timeout = CONN_TIMEOUT_DEFAULT; + tsk->conn_timeout = CONN_TIMEOUT_DEFAULT; + tsk->sent_unacked = 0; + atomic_set(&tsk->dupl_rcvcnt, 0); tipc_port_unlock(port); if (sock->state == SS_READY) { @@ -301,6 +310,7 @@ static int tipc_release(struct socket *sock) struct tipc_sock *tsk; struct tipc_port *port; struct sk_buff *buf; + u32 dnode; /* * Exit if socket isn't fully initialized (occurs when a failed accept() @@ -329,7 +339,8 @@ static int tipc_release(struct socket *sock) sock->state = SS_DISCONNECTING; tipc_port_disconnect(port->ref); } - tipc_reject_msg(buf, TIPC_ERR_NO_PORT); + if (tipc_msg_reverse(buf, &dnode, TIPC_ERR_NO_PORT)) + tipc_link_xmit(buf, dnode, 0); } } @@ -502,12 +513,12 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, switch ((int)sock->state) { case SS_UNCONNECTED: - if (!tsk->port.congested) + if (!tsk->link_cong) mask |= POLLOUT; break; case SS_READY: case SS_CONNECTED: - if (!tsk->port.congested) + if (!tsk->link_cong && !tipc_sk_conn_cong(tsk)) mask |= POLLOUT; /* fall thru' */ case SS_CONNECTING: @@ -524,6 +535,136 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, } /** + * tipc_sendmcast - send multicast message + * @sock: socket structure + * @seq: destination address + * @iov: message data to send + * @dsz: total length of message data + * @timeo: timeout to wait for wakeup + * + * Called from function tipc_sendmsg(), which has done all sanity checks + * Returns the number of bytes sent on success, or errno + */ +static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, + struct iovec *iov, size_t dsz, long timeo) +{ + struct sock *sk = sock->sk; + struct tipc_msg *mhdr = &tipc_sk(sk)->port.phdr; + struct sk_buff *buf; + uint mtu; + int rc; + + msg_set_type(mhdr, TIPC_MCAST_MSG); + msg_set_lookup_scope(mhdr, TIPC_CLUSTER_SCOPE); + msg_set_destport(mhdr, 0); + msg_set_destnode(mhdr, 0); + msg_set_nametype(mhdr, seq->type); + msg_set_namelower(mhdr, seq->lower); + msg_set_nameupper(mhdr, seq->upper); + msg_set_hdr_sz(mhdr, MCAST_H_SIZE); + +new_mtu: + mtu = tipc_bclink_get_mtu(); + rc = tipc_msg_build(mhdr, iov, 0, dsz, mtu, &buf); + if (unlikely(rc < 0)) + return rc; + + do { + rc = tipc_bclink_xmit(buf); + if (likely(rc >= 0)) { + rc = dsz; + break; + } + if (rc == -EMSGSIZE) + goto new_mtu; + if (rc != -ELINKCONG) + break; + rc = tipc_wait_for_sndmsg(sock, &timeo); + if (rc) + kfree_skb_list(buf); + } while (!rc); + return rc; +} + +/* tipc_sk_mcast_rcv - Deliver multicast message to all destination sockets + */ +void tipc_sk_mcast_rcv(struct sk_buff *buf) +{ + struct tipc_msg *msg = buf_msg(buf); + struct tipc_port_list dports = {0, NULL, }; + struct tipc_port_list *item; + struct sk_buff *b; + uint i, last, dst = 0; + u32 scope = TIPC_CLUSTER_SCOPE; + + if (in_own_node(msg_orignode(msg))) + scope = TIPC_NODE_SCOPE; + + /* Create destination port list: */ + tipc_nametbl_mc_translate(msg_nametype(msg), + msg_namelower(msg), + msg_nameupper(msg), + scope, + &dports); + last = dports.count; + if (!last) { + kfree_skb(buf); + return; + } + + for (item = &dports; item; item = item->next) { + for (i = 0; i < PLSIZE && ++dst <= last; i++) { + b = (dst != last) ? skb_clone(buf, GFP_ATOMIC) : buf; + if (!b) { + pr_warn("Failed do clone mcast rcv buffer\n"); + continue; + } + msg_set_destport(msg, item->ports[i]); + tipc_sk_rcv(b); + } + } + tipc_port_list_free(&dports); +} + +/** + * tipc_sk_proto_rcv - receive a connection mng protocol message + * @tsk: receiving socket + * @dnode: node to send response message to, if any + * @buf: buffer containing protocol message + * Returns 0 (TIPC_OK) if message was consumed, 1 (TIPC_FWD_MSG) if + * (CONN_PROBE_REPLY) message should be forwarded. + */ +static int tipc_sk_proto_rcv(struct tipc_sock *tsk, u32 *dnode, + struct sk_buff *buf) +{ + struct tipc_msg *msg = buf_msg(buf); + struct tipc_port *port = &tsk->port; + int conn_cong; + + /* Ignore if connection cannot be validated: */ + if (!port->connected || !tipc_port_peer_msg(port, msg)) + goto exit; + + port->probing_state = TIPC_CONN_OK; + + if (msg_type(msg) == CONN_ACK) { + conn_cong = tipc_sk_conn_cong(tsk); + tsk->sent_unacked -= msg_msgcnt(msg); + if (conn_cong) + tipc_sock_wakeup(tsk); + } else if (msg_type(msg) == CONN_PROBE) { + if (!tipc_msg_reverse(buf, dnode, TIPC_OK)) + return TIPC_OK; + msg_set_type(msg, CONN_PROBE_REPLY); + return TIPC_FWD_MSG; + } + /* Do nothing if msg_type() == CONN_PROBE_REPLY */ +exit: + kfree_skb(buf); + return TIPC_OK; +} + +/** * dest_name_check - verify user is permitted to send to specified port name * @dest: destination address * @m: descriptor for message to be sent @@ -537,6 +678,8 @@ static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m) { struct tipc_cfg_msg_hdr hdr; + if (unlikely(dest->addrtype == TIPC_ADDR_ID)) + return 0; if (likely(dest->addr.name.name.type >= TIPC_RESERVED_TYPES)) return 0; if (likely(dest->addr.name.name.type == TIPC_TOP_SRV)) @@ -573,19 +716,18 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) return sock_intr_errno(*timeo_p); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - done = sk_wait_event(sk, timeo_p, !tsk->port.congested); + done = sk_wait_event(sk, timeo_p, !tsk->link_cong); finish_wait(sk_sleep(sk), &wait); } while (!done); return 0; } - /** * tipc_sendmsg - send message in connectionless manner * @iocb: if NULL, indicates that socket lock is already held * @sock: socket structure * @m: message to send - * @total_len: length of message + * @dsz: amount of user data to be sent * * Message must have an destination specified explicitly. * Used for SOCK_RDM and SOCK_DGRAM messages, @@ -595,100 +737,123 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) * Returns the number of bytes sent on success, or errno otherwise */ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len) + struct msghdr *m, size_t dsz) { + DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); struct tipc_port *port = &tsk->port; - DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); - int needs_conn; + struct tipc_msg *mhdr = &port->phdr; + struct iovec *iov = m->msg_iov; + u32 dnode, dport; + struct sk_buff *buf; + struct tipc_name_seq *seq = &dest->addr.nameseq; + u32 mtu; long timeo; - int res = -EINVAL; + int rc = -EINVAL; if (unlikely(!dest)) return -EDESTADDRREQ; + if (unlikely((m->msg_namelen < sizeof(*dest)) || (dest->family != AF_TIPC))) return -EINVAL; - if (total_len > TIPC_MAX_USER_MSG_SIZE) + + if (dsz > TIPC_MAX_USER_MSG_SIZE) return -EMSGSIZE; if (iocb) lock_sock(sk); - needs_conn = (sock->state != SS_READY); - if (unlikely(needs_conn)) { + if (unlikely(sock->state != SS_READY)) { if (sock->state == SS_LISTENING) { - res = -EPIPE; + rc = -EPIPE; goto exit; } if (sock->state != SS_UNCONNECTED) { - res = -EISCONN; + rc = -EISCONN; goto exit; } if (tsk->port.published) { - res = -EOPNOTSUPP; + rc = -EOPNOTSUPP; goto exit; } if (dest->addrtype == TIPC_ADDR_NAME) { tsk->port.conn_type = dest->addr.name.name.type; tsk->port.conn_instance = dest->addr.name.name.instance; } - - /* Abort any pending connection attempts (very unlikely) */ - reject_rx_queue(sk); } + rc = dest_name_check(dest, m); + if (rc) + goto exit; timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); - do { - if (dest->addrtype == TIPC_ADDR_NAME) { - res = dest_name_check(dest, m); - if (res) - break; - res = tipc_send2name(port, - &dest->addr.name.name, - dest->addr.name.domain, - m->msg_iov, - total_len); - } else if (dest->addrtype == TIPC_ADDR_ID) { - res = tipc_send2port(port, - &dest->addr.id, - m->msg_iov, - total_len); - } else if (dest->addrtype == TIPC_ADDR_MCAST) { - if (needs_conn) { - res = -EOPNOTSUPP; - break; - } - res = dest_name_check(dest, m); - if (res) - break; - res = tipc_port_mcast_xmit(port, - &dest->addr.nameseq, - m->msg_iov, - total_len); + + if (dest->addrtype == TIPC_ADDR_MCAST) { + rc = tipc_sendmcast(sock, seq, iov, dsz, timeo); + goto exit; + } else if (dest->addrtype == TIPC_ADDR_NAME) { + u32 type = dest->addr.name.name.type; + u32 inst = dest->addr.name.name.instance; + u32 domain = dest->addr.name.domain; + + dnode = domain; + msg_set_type(mhdr, TIPC_NAMED_MSG); + msg_set_hdr_sz(mhdr, NAMED_H_SIZE); + msg_set_nametype(mhdr, type); + msg_set_nameinst(mhdr, inst); + msg_set_lookup_scope(mhdr, tipc_addr_scope(domain)); + dport = tipc_nametbl_translate(type, inst, &dnode); + msg_set_destnode(mhdr, dnode); + msg_set_destport(mhdr, dport); + if (unlikely(!dport && !dnode)) { + rc = -EHOSTUNREACH; + goto exit; } - if (likely(res != -ELINKCONG)) { - if (needs_conn && (res >= 0)) + } else if (dest->addrtype == TIPC_ADDR_ID) { + dnode = dest->addr.id.node; + msg_set_type(mhdr, TIPC_DIRECT_MSG); + msg_set_lookup_scope(mhdr, 0); + msg_set_destnode(mhdr, dnode); + msg_set_destport(mhdr, dest->addr.id.ref); + msg_set_hdr_sz(mhdr, BASIC_H_SIZE); + } + +new_mtu: + mtu = tipc_node_get_mtu(dnode, tsk->port.ref); + rc = tipc_msg_build(mhdr, iov, 0, dsz, mtu, &buf); + if (rc < 0) + goto exit; + + do { + rc = tipc_link_xmit(buf, dnode, tsk->port.ref); + if (likely(rc >= 0)) { + if (sock->state != SS_READY) sock->state = SS_CONNECTING; + rc = dsz; break; } - res = tipc_wait_for_sndmsg(sock, &timeo); - if (res) + if (rc == -EMSGSIZE) + goto new_mtu; + + if (rc != -ELINKCONG) break; - } while (1); + rc = tipc_wait_for_sndmsg(sock, &timeo); + if (rc) + kfree_skb_list(buf); + } while (!rc); exit: if (iocb) release_sock(sk); - return res; + + return rc; } static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_port *port = &tsk->port; DEFINE_WAIT(wait); int done; @@ -707,37 +872,49 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); done = sk_wait_event(sk, timeo_p, - (!port->congested || !port->connected)); + (!tsk->link_cong && + !tipc_sk_conn_cong(tsk)) || + !tsk->port.connected); finish_wait(sk_sleep(sk), &wait); } while (!done); return 0; } /** - * tipc_send_packet - send a connection-oriented message - * @iocb: if NULL, indicates that socket lock is already held + * tipc_send_stream - send stream-oriented data + * @iocb: (unused) * @sock: socket structure - * @m: message to send - * @total_len: length of message + * @m: data to send + * @dsz: total length of data to be transmitted * - * Used for SOCK_SEQPACKET messages and SOCK_STREAM data. + * Used for SOCK_STREAM data. * - * Returns the number of bytes sent on success, or errno otherwise + * Returns the number of bytes sent on success (or partial success), + * or errno if no data sent */ -static int tipc_send_packet(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len) +static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t dsz) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_port *port = &tsk->port; + struct tipc_msg *mhdr = &port->phdr; + struct sk_buff *buf; DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); - int res = -EINVAL; + u32 ref = port->ref; + int rc = -EINVAL; long timeo; + u32 dnode; + uint mtu, send, sent = 0; /* Handle implied connection establishment */ - if (unlikely(dest)) - return tipc_sendmsg(iocb, sock, m, total_len); - - if (total_len > TIPC_MAX_USER_MSG_SIZE) + if (unlikely(dest)) { + rc = tipc_sendmsg(iocb, sock, m, dsz); + if (dsz && (dsz == rc)) + tsk->sent_unacked = 1; + return rc; + } + if (dsz > (uint)INT_MAX) return -EMSGSIZE; if (iocb) @@ -745,123 +922,66 @@ static int tipc_send_packet(struct kiocb *iocb, struct socket *sock, if (unlikely(sock->state != SS_CONNECTED)) { if (sock->state == SS_DISCONNECTING) - res = -EPIPE; + rc = -EPIPE; else - res = -ENOTCONN; + rc = -ENOTCONN; goto exit; } timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); + dnode = tipc_port_peernode(port); + +next: + mtu = port->max_pkt; + send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE); + rc = tipc_msg_build(mhdr, m->msg_iov, sent, send, mtu, &buf); + if (unlikely(rc < 0)) + goto exit; do { - res = tipc_send(&tsk->port, m->msg_iov, total_len); - if (likely(res != -ELINKCONG)) - break; - res = tipc_wait_for_sndpkt(sock, &timeo); - if (res) - break; - } while (1); + if (likely(!tipc_sk_conn_cong(tsk))) { + rc = tipc_link_xmit(buf, dnode, ref); + if (likely(!rc)) { + tsk->sent_unacked++; + sent += send; + if (sent == dsz) + break; + goto next; + } + if (rc == -EMSGSIZE) { + port->max_pkt = tipc_node_get_mtu(dnode, ref); + goto next; + } + if (rc != -ELINKCONG) + break; + } + rc = tipc_wait_for_sndpkt(sock, &timeo); + if (rc) + kfree_skb_list(buf); + } while (!rc); exit: if (iocb) release_sock(sk); - return res; + return sent ? sent : rc; } /** - * tipc_send_stream - send stream-oriented data - * @iocb: (unused) + * tipc_send_packet - send a connection-oriented message + * @iocb: if NULL, indicates that socket lock is already held * @sock: socket structure - * @m: data to send - * @total_len: total length of data to be sent + * @m: message to send + * @dsz: length of data to be transmitted * - * Used for SOCK_STREAM data. + * Used for SOCK_SEQPACKET messages. * - * Returns the number of bytes sent on success (or partial success), - * or errno if no data sent + * Returns the number of bytes sent on success, or errno otherwise */ -static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len) +static int tipc_send_packet(struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t dsz) { - struct sock *sk = sock->sk; - struct tipc_sock *tsk = tipc_sk(sk); - struct msghdr my_msg; - struct iovec my_iov; - struct iovec *curr_iov; - int curr_iovlen; - char __user *curr_start; - u32 hdr_size; - int curr_left; - int bytes_to_send; - int bytes_sent; - int res; - - lock_sock(sk); - - /* Handle special cases where there is no connection */ - if (unlikely(sock->state != SS_CONNECTED)) { - if (sock->state == SS_UNCONNECTED) - res = tipc_send_packet(NULL, sock, m, total_len); - else - res = sock->state == SS_DISCONNECTING ? -EPIPE : -ENOTCONN; - goto exit; - } - - if (unlikely(m->msg_name)) { - res = -EISCONN; - goto exit; - } - - if (total_len > (unsigned int)INT_MAX) { - res = -EMSGSIZE; - goto exit; - } - - /* - * Send each iovec entry using one or more messages - * - * Note: This algorithm is good for the most likely case - * (i.e. one large iovec entry), but could be improved to pass sets - * of small iovec entries into send_packet(). - */ - curr_iov = m->msg_iov; - curr_iovlen = m->msg_iovlen; - my_msg.msg_iov = &my_iov; - my_msg.msg_iovlen = 1; - my_msg.msg_flags = m->msg_flags; - my_msg.msg_name = NULL; - bytes_sent = 0; - - hdr_size = msg_hdr_sz(&tsk->port.phdr); - - while (curr_iovlen--) { - curr_start = curr_iov->iov_base; - curr_left = curr_iov->iov_len; - - while (curr_left) { - bytes_to_send = tsk->port.max_pkt - hdr_size; - if (bytes_to_send > TIPC_MAX_USER_MSG_SIZE) - bytes_to_send = TIPC_MAX_USER_MSG_SIZE; - if (curr_left < bytes_to_send) - bytes_to_send = curr_left; - my_iov.iov_base = curr_start; - my_iov.iov_len = bytes_to_send; - res = tipc_send_packet(NULL, sock, &my_msg, - bytes_to_send); - if (res < 0) { - if (bytes_sent) - res = bytes_sent; - goto exit; - } - curr_left -= bytes_to_send; - curr_start += bytes_to_send; - bytes_sent += bytes_to_send; - } + if (dsz > TIPC_MAX_USER_MSG_SIZE) + return -EMSGSIZE; - curr_iov++; - } - res = bytes_sent; -exit: - release_sock(sk); - return res; + return tipc_send_stream(iocb, sock, m, dsz); } /** @@ -983,10 +1103,11 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg, return 0; } -static int tipc_wait_for_rcvmsg(struct socket *sock, long timeo) +static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) { struct sock *sk = sock->sk; DEFINE_WAIT(wait); + long timeo = *timeop; int err; for (;;) { @@ -1011,6 +1132,7 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long timeo) break; } finish_wait(sk_sleep(sk), &wait); + *timeop = timeo; return err; } @@ -1054,7 +1176,7 @@ static int tipc_recvmsg(struct kiocb *iocb, struct socket *sock, restart: /* Look for a message in receive queue; wait if necessary */ - res = tipc_wait_for_rcvmsg(sock, timeo); + res = tipc_wait_for_rcvmsg(sock, &timeo); if (res) goto exit; @@ -1100,8 +1222,10 @@ restart: /* Consume received message (optional) */ if (likely(!(flags & MSG_PEEK))) { if ((sock->state != SS_READY) && - (++port->conn_unacked >= TIPC_FLOW_CONTROL_WIN)) - tipc_acknowledge(port->ref, port->conn_unacked); + (++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) { + tipc_acknowledge(port->ref, tsk->rcv_unacked); + tsk->rcv_unacked = 0; + } advance_rx_queue(sk); } exit: @@ -1152,7 +1276,7 @@ static int tipc_recv_stream(struct kiocb *iocb, struct socket *sock, restart: /* Look for a message in receive queue; wait if necessary */ - res = tipc_wait_for_rcvmsg(sock, timeo); + res = tipc_wait_for_rcvmsg(sock, &timeo); if (res) goto exit; @@ -1209,8 +1333,10 @@ restart: /* Consume received message (optional) */ if (likely(!(flags & MSG_PEEK))) { - if (unlikely(++port->conn_unacked >= TIPC_FLOW_CONTROL_WIN)) - tipc_acknowledge(port->ref, port->conn_unacked); + if (unlikely(++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) { + tipc_acknowledge(port->ref, tsk->rcv_unacked); + tsk->rcv_unacked = 0; + } advance_rx_queue(sk); } @@ -1265,17 +1391,16 @@ static void tipc_data_ready(struct sock *sk) * @tsk: TIPC socket * @msg: message * - * Returns TIPC error status code and socket error status code - * once it encounters some errors + * Returns 0 (TIPC_OK) if everyting ok, -TIPC_ERR_NO_PORT otherwise */ -static u32 filter_connect(struct tipc_sock *tsk, struct sk_buff **buf) +static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf) { struct sock *sk = &tsk->sk; struct tipc_port *port = &tsk->port; struct socket *sock = sk->sk_socket; struct tipc_msg *msg = buf_msg(*buf); - u32 retval = TIPC_ERR_NO_PORT; + int retval = -TIPC_ERR_NO_PORT; int res; if (msg_mcast(msg)) @@ -1378,32 +1503,37 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf) * * Called with socket lock already taken; port lock may also be taken. * - * Returns TIPC error status code (TIPC_OK if message is not to be rejected) + * Returns 0 (TIPC_OK) if message was consumed, -TIPC error code if message + * to be rejected, 1 (TIPC_FWD_MSG) if (CONN_MANAGER) message to be forwarded */ -static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) +static int filter_rcv(struct sock *sk, struct sk_buff *buf) { struct socket *sock = sk->sk_socket; struct tipc_sock *tsk = tipc_sk(sk); struct tipc_msg *msg = buf_msg(buf); unsigned int limit = rcvbuf_limit(sk, buf); - u32 res = TIPC_OK; + u32 onode; + int rc = TIPC_OK; + + if (unlikely(msg_user(msg) == CONN_MANAGER)) + return tipc_sk_proto_rcv(tsk, &onode, buf); /* Reject message if it is wrong sort of message for socket */ if (msg_type(msg) > TIPC_DIRECT_MSG) - return TIPC_ERR_NO_PORT; + return -TIPC_ERR_NO_PORT; if (sock->state == SS_READY) { if (msg_connected(msg)) - return TIPC_ERR_NO_PORT; + return -TIPC_ERR_NO_PORT; } else { - res = filter_connect(tsk, &buf); - if (res != TIPC_OK || buf == NULL) - return res; + rc = filter_connect(tsk, &buf); + if (rc != TIPC_OK || buf == NULL) + return rc; } /* Reject message if there isn't room to queue it */ if (sk_rmem_alloc_get(sk) + buf->truesize >= limit) - return TIPC_ERR_OVERLOAD; + return -TIPC_ERR_OVERLOAD; /* Enqueue message */ TIPC_SKB_CB(buf)->handle = NULL; @@ -1415,7 +1545,7 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) } /** - * backlog_rcv - handle incoming message from backlog queue + * tipc_backlog_rcv - handle incoming message from backlog queue * @sk: socket * @buf: message * @@ -1423,47 +1553,78 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) * * Returns 0 */ -static int backlog_rcv(struct sock *sk, struct sk_buff *buf) +static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *buf) { - u32 res; + int rc; + u32 onode; + struct tipc_sock *tsk = tipc_sk(sk); + uint truesize = buf->truesize; + + rc = filter_rcv(sk, buf); + + if (likely(!rc)) { + if (atomic_read(&tsk->dupl_rcvcnt) < TIPC_CONN_OVERLOAD_LIMIT) + atomic_add(truesize, &tsk->dupl_rcvcnt); + return 0; + } + + if ((rc < 0) && !tipc_msg_reverse(buf, &onode, -rc)) + return 0; + + tipc_link_xmit(buf, onode, 0); - res = filter_rcv(sk, buf); - if (res) - tipc_reject_msg(buf, res); return 0; } /** * tipc_sk_rcv - handle incoming message - * @sk: socket receiving message - * @buf: message - * - * Called with port lock already taken. - * - * Returns TIPC error status code (TIPC_OK if message is not to be rejected) + * @buf: buffer containing arriving message + * Consumes buffer + * Returns 0 if success, or errno: -EHOSTUNREACH */ -u32 tipc_sk_rcv(struct sock *sk, struct sk_buff *buf) +int tipc_sk_rcv(struct sk_buff *buf) { - u32 res; + struct tipc_sock *tsk; + struct tipc_port *port; + struct sock *sk; + u32 dport = msg_destport(buf_msg(buf)); + int rc = TIPC_OK; + uint limit; + u32 dnode; + + /* Validate destination and message */ + port = tipc_port_lock(dport); + if (unlikely(!port)) { + rc = tipc_msg_eval(buf, &dnode); + goto exit; + } - /* - * Process message if socket is unlocked; otherwise add to backlog queue - * - * This code is based on sk_receive_skb(), but must be distinct from it - * since a TIPC-specific filter/reject mechanism is utilized - */ + tsk = tipc_port_to_sock(port); + sk = &tsk->sk; + + /* Queue message */ bh_lock_sock(sk); + if (!sock_owned_by_user(sk)) { - res = filter_rcv(sk, buf); + rc = filter_rcv(sk, buf); } else { - if (sk_add_backlog(sk, buf, rcvbuf_limit(sk, buf))) - res = TIPC_ERR_OVERLOAD; - else - res = TIPC_OK; + if (sk->sk_backlog.len == 0) + atomic_set(&tsk->dupl_rcvcnt, 0); + limit = rcvbuf_limit(sk, buf) + atomic_read(&tsk->dupl_rcvcnt); + if (sk_add_backlog(sk, buf, limit)) + rc = -TIPC_ERR_OVERLOAD; } bh_unlock_sock(sk); + tipc_port_unlock(port); - return res; + if (likely(!rc)) + return 0; +exit: + if ((rc < 0) && !tipc_msg_reverse(buf, &dnode, -rc)) + return -EHOSTUNREACH; + + tipc_link_xmit(buf, dnode, 0); + return (rc < 0) ? -EHOSTUNREACH : 0; } static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) @@ -1727,6 +1888,7 @@ static int tipc_shutdown(struct socket *sock, int how) struct tipc_sock *tsk = tipc_sk(sk); struct tipc_port *port = &tsk->port; struct sk_buff *buf; + u32 peer; int res; if (how != SHUT_RDWR) @@ -1747,7 +1909,8 @@ restart: goto restart; } tipc_port_disconnect(port->ref); - tipc_reject_msg(buf, TIPC_CONN_SHUTDOWN); + if (tipc_msg_reverse(buf, &peer, TIPC_CONN_SHUTDOWN)) + tipc_link_xmit(buf, peer, 0); } else { tipc_port_shutdown(port->ref); } @@ -1905,6 +2068,27 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt, return put_user(sizeof(value), ol); } +static int tipc_ioctl(struct socket *sk, unsigned int cmd, unsigned long arg) +{ + struct tipc_sioc_ln_req lnr; + void __user *argp = (void __user *)arg; + + switch (cmd) { + case SIOCGETLINKNAME: + if (copy_from_user(&lnr, argp, sizeof(lnr))) + return -EFAULT; + if (!tipc_node_get_linkname(lnr.bearer_id, lnr.peer, + lnr.linkname, TIPC_MAX_LINK_NAME)) { + if (copy_to_user(argp, &lnr, sizeof(lnr))) + return -EFAULT; + return 0; + } + return -EADDRNOTAVAIL; + default: + return -ENOIOCTLCMD; + } +} + /* Protocol switches for the various types of TIPC sockets */ static const struct proto_ops msg_ops = { @@ -1917,7 +2101,7 @@ static const struct proto_ops msg_ops = { .accept = sock_no_accept, .getname = tipc_getname, .poll = tipc_poll, - .ioctl = sock_no_ioctl, + .ioctl = tipc_ioctl, .listen = sock_no_listen, .shutdown = tipc_shutdown, .setsockopt = tipc_setsockopt, @@ -1938,7 +2122,7 @@ static const struct proto_ops packet_ops = { .accept = tipc_accept, .getname = tipc_getname, .poll = tipc_poll, - .ioctl = sock_no_ioctl, + .ioctl = tipc_ioctl, .listen = tipc_listen, .shutdown = tipc_shutdown, .setsockopt = tipc_setsockopt, @@ -1959,7 +2143,7 @@ static const struct proto_ops stream_ops = { .accept = tipc_accept, .getname = tipc_getname, .poll = tipc_poll, - .ioctl = sock_no_ioctl, + .ioctl = tipc_ioctl, .listen = tipc_listen, .shutdown = tipc_shutdown, .setsockopt = tipc_setsockopt, diff --git a/net/tipc/socket.h b/net/tipc/socket.h index 74e5c7f195a6..43b75b3ceced 100644 --- a/net/tipc/socket.h +++ b/net/tipc/socket.h @@ -38,18 +38,29 @@ #include "port.h" #include <net/sock.h> +#define TIPC_CONN_OK 0 +#define TIPC_CONN_PROBING 1 + /** * struct tipc_sock - TIPC socket structure * @sk: socket - interacts with 'port' and with user via the socket API * @port: port - interacts with 'sk' and with the rest of the TIPC stack * @peer_name: the peer of the connection, if any * @conn_timeout: the time we can wait for an unresponded setup request + * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue + * @link_cong: non-zero if owner must sleep because of link congestion + * @sent_unacked: # messages sent by socket, and not yet acked by peer + * @rcv_unacked: # messages read by user, but not yet acked back to peer */ struct tipc_sock { struct sock sk; struct tipc_port port; unsigned int conn_timeout; + atomic_t dupl_rcvcnt; + int link_cong; + uint sent_unacked; + uint rcv_unacked; }; static inline struct tipc_sock *tipc_sk(const struct sock *sk) @@ -67,6 +78,13 @@ static inline void tipc_sock_wakeup(struct tipc_sock *tsk) tsk->sk.sk_write_space(&tsk->sk); } -u32 tipc_sk_rcv(struct sock *sk, struct sk_buff *buf); +static inline int tipc_sk_conn_cong(struct tipc_sock *tsk) +{ + return tsk->sent_unacked >= TIPC_FLOWCTRL_WIN; +} + +int tipc_sk_rcv(struct sk_buff *buf); + +void tipc_sk_mcast_rcv(struct sk_buff *buf); #endif diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index bb7e8ba821f4..e96884380732 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1207,7 +1207,7 @@ restart: sk->sk_state = TCP_ESTABLISHED; sock_hold(newsk); - smp_mb__after_atomic_inc(); /* sock_hold() does an atomic_inc() */ + smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */ unix_peer(sk) = newsk; unix_state_unlock(sk); @@ -1492,10 +1492,14 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, if (len > sk->sk_sndbuf - 32) goto out; - if (len > SKB_MAX_ALLOC) + if (len > SKB_MAX_ALLOC) { data_len = min_t(size_t, len - SKB_MAX_ALLOC, MAX_SKB_FRAGS * PAGE_SIZE); + data_len = PAGE_ALIGN(data_len); + + BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE); + } skb = sock_alloc_send_pskb(sk, len - data_len, data_len, msg->msg_flags & MSG_DONTWAIT, &err, @@ -1670,6 +1674,8 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, data_len = max_t(int, 0, size - SKB_MAX_HEAD(0)); + data_len = min_t(size_t, size, PAGE_ALIGN(data_len)); + skb = sock_alloc_send_pskb(sk, size - data_len, data_len, msg->msg_flags & MSG_DONTWAIT, &err, get_order(UNIX_SKB_FRAGS_SZ)); diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 5adfd94c5b85..85d232bed87d 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1925,9 +1925,23 @@ static struct miscdevice vsock_device = { .fops = &vsock_device_ops, }; -static int __vsock_core_init(void) +int __vsock_core_init(const struct vsock_transport *t, struct module *owner) { - int err; + int err = mutex_lock_interruptible(&vsock_register_mutex); + + if (err) + return err; + + if (transport) { + err = -EBUSY; + goto err_busy; + } + + /* Transport must be the owner of the protocol so that it can't + * unload while there are open sockets. + */ + vsock_proto.owner = owner; + transport = t; vsock_init_tables(); @@ -1951,36 +1965,19 @@ static int __vsock_core_init(void) goto err_unregister_proto; } + mutex_unlock(&vsock_register_mutex); return 0; err_unregister_proto: proto_unregister(&vsock_proto); err_misc_deregister: misc_deregister(&vsock_device); - return err; -} - -int vsock_core_init(const struct vsock_transport *t) -{ - int retval = mutex_lock_interruptible(&vsock_register_mutex); - if (retval) - return retval; - - if (transport) { - retval = -EBUSY; - goto out; - } - - transport = t; - retval = __vsock_core_init(); - if (retval) - transport = NULL; - -out: + transport = NULL; +err_busy: mutex_unlock(&vsock_register_mutex); - return retval; + return err; } -EXPORT_SYMBOL_GPL(vsock_core_init); +EXPORT_SYMBOL_GPL(__vsock_core_init); void vsock_core_exit(void) { @@ -2000,5 +1997,5 @@ EXPORT_SYMBOL_GPL(vsock_core_exit); MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMware Virtual Socket Family"); -MODULE_VERSION("1.0.0.0-k"); +MODULE_VERSION("1.0.1.0-k"); MODULE_LICENSE("GPL v2"); diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 16d08b399210..29c8675f9a11 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -95,6 +95,43 @@ config CFG80211_CERTIFICATION_ONUS you are a wireless researcher and are working in a controlled and approved environment by your local regulatory agency. +config CFG80211_REG_CELLULAR_HINTS + bool "cfg80211 regulatory support for cellular base station hints" + depends on CFG80211_CERTIFICATION_ONUS + ---help--- + This option enables support for parsing regulatory hints + from cellular base stations. If enabled and at least one driver + claims support for parsing cellular base station hints the + regulatory core will allow and parse these regulatory hints. + The regulatory core will only apply these regulatory hints on + drivers that support this feature. You should only enable this + feature if you have tested and validated this feature on your + systems. + +config CFG80211_REG_RELAX_NO_IR + bool "cfg80211 support for NO_IR relaxation" + depends on CFG80211_CERTIFICATION_ONUS + ---help--- + This option enables support for relaxation of the NO_IR flag for + situations that certain regulatory bodies have provided clarifications + on how relaxation can occur. This feature has an inherent dependency on + userspace features which must have been properly tested and as such is + not enabled by default. + + A relaxation feature example is allowing the operation of a P2P group + owner (GO) on channels marked with NO_IR if there is an additional BSS + interface which associated to an AP which userspace assumes or confirms + to be an authorized master, i.e., with radar detection support and DFS + capabilities. However, note that in order to not create daisy chain + scenarios, this relaxation is not allowed in cases that the BSS client + is associated to P2P GO and in addition the P2P GO instantiated on + a channel due to this relaxation should not allow connection from + non P2P clients. + + The regulatory core will apply these relaxations only for drivers that + support this feature by declaring the appropriate channel flags and + capabilities in their registration flow. + config CFG80211_DEFAULT_PS bool "enable powersave by default" depends on CFG80211 @@ -125,6 +162,12 @@ config CFG80211_INTERNAL_REGDB and includes code to query that database. This is an alternative to using CRDA for defining regulatory rules for the kernel. + Using this option requires some parsing of the db.txt at build time, + the parser will be upkept with the latest wireless-regdb updates but + older wireless-regdb formats will be ignored. The parser may later + be replaced to avoid issues with conflicts on versions of + wireless-regdb. + For details see: http://wireless.kernel.org/en/developers/Regulatory diff --git a/net/wireless/ap.c b/net/wireless/ap.c index 3e02ade508d8..bdad1f951561 100644 --- a/net/wireless/ap.c +++ b/net/wireless/ap.c @@ -6,8 +6,8 @@ #include "rdev-ops.h" -static int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, - struct net_device *dev, bool notify) +int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, + struct net_device *dev, bool notify) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 9c9501a35fb5..992b34070bcb 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -326,28 +326,57 @@ static int cfg80211_get_chans_dfs_required(struct wiphy *wiphy, int cfg80211_chandef_dfs_required(struct wiphy *wiphy, - const struct cfg80211_chan_def *chandef) + const struct cfg80211_chan_def *chandef, + enum nl80211_iftype iftype) { int width; - int r; + int ret; if (WARN_ON(!cfg80211_chandef_valid(chandef))) return -EINVAL; - width = cfg80211_chandef_get_width(chandef); - if (width < 0) - return -EINVAL; + switch (iftype) { + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_MESH_POINT: + width = cfg80211_chandef_get_width(chandef); + if (width < 0) + return -EINVAL; - r = cfg80211_get_chans_dfs_required(wiphy, chandef->center_freq1, - width); - if (r) - return r; + ret = cfg80211_get_chans_dfs_required(wiphy, + chandef->center_freq1, + width); + if (ret < 0) + return ret; + else if (ret > 0) + return BIT(chandef->width); - if (!chandef->center_freq2) - return 0; + if (!chandef->center_freq2) + return 0; + + ret = cfg80211_get_chans_dfs_required(wiphy, + chandef->center_freq2, + width); + if (ret < 0) + return ret; + else if (ret > 0) + return BIT(chandef->width); - return cfg80211_get_chans_dfs_required(wiphy, chandef->center_freq2, - width); + break; + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_MONITOR: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_WDS: + case NL80211_IFTYPE_P2P_DEVICE: + break; + case NL80211_IFTYPE_UNSPECIFIED: + case NUM_NL80211_IFTYPES: + WARN_ON(1); + } + + return 0; } EXPORT_SYMBOL(cfg80211_chandef_dfs_required); @@ -587,12 +616,14 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, width = 5; break; case NL80211_CHAN_WIDTH_10: + prohibited_flags |= IEEE80211_CHAN_NO_10MHZ; width = 10; break; case NL80211_CHAN_WIDTH_20: if (!ht_cap->ht_supported) return false; case NL80211_CHAN_WIDTH_20_NOHT: + prohibited_flags |= IEEE80211_CHAN_NO_20MHZ; width = 20; break; case NL80211_CHAN_WIDTH_40: @@ -661,17 +692,111 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_chandef_usable); +/* + * For GO only, check if the channel can be used under permissive conditions + * mandated by the some regulatory bodies, i.e., the channel is marked with + * IEEE80211_CHAN_GO_CONCURRENT and there is an additional station interface + * associated to an AP on the same channel or on the same UNII band + * (assuming that the AP is an authorized master). + * In addition allow the GO to operate on a channel on which indoor operation is + * allowed, iff we are currently operating in an indoor environment. + */ +static bool cfg80211_go_permissive_chan(struct cfg80211_registered_device *rdev, + struct ieee80211_channel *chan) +{ + struct wireless_dev *wdev_iter; + struct wiphy *wiphy = wiphy_idx_to_wiphy(rdev->wiphy_idx); + + ASSERT_RTNL(); + + if (!config_enabled(CONFIG_CFG80211_REG_RELAX_NO_IR) || + !(wiphy->regulatory_flags & REGULATORY_ENABLE_RELAX_NO_IR)) + return false; + + if (regulatory_indoor_allowed() && + (chan->flags & IEEE80211_CHAN_INDOOR_ONLY)) + return true; + + if (!(chan->flags & IEEE80211_CHAN_GO_CONCURRENT)) + return false; + + /* + * Generally, it is possible to rely on another device/driver to allow + * the GO concurrent relaxation, however, since the device can further + * enforce the relaxation (by doing a similar verifications as this), + * and thus fail the GO instantiation, consider only the interfaces of + * the current registered device. + */ + list_for_each_entry(wdev_iter, &rdev->wdev_list, list) { + struct ieee80211_channel *other_chan = NULL; + int r1, r2; + + if (wdev_iter->iftype != NL80211_IFTYPE_STATION || + !netif_running(wdev_iter->netdev)) + continue; + + wdev_lock(wdev_iter); + if (wdev_iter->current_bss) + other_chan = wdev_iter->current_bss->pub.channel; + wdev_unlock(wdev_iter); + + if (!other_chan) + continue; + + if (chan == other_chan) + return true; + + if (chan->band != IEEE80211_BAND_5GHZ) + continue; + + r1 = cfg80211_get_unii(chan->center_freq); + r2 = cfg80211_get_unii(other_chan->center_freq); + + if (r1 != -EINVAL && r1 == r2) { + /* + * At some locations channels 149-165 are considered a + * bundle, but at other locations, e.g., Indonesia, + * channels 149-161 are considered a bundle while + * channel 165 is left out and considered to be in a + * different bundle. Thus, in case that there is a + * station interface connected to an AP on channel 165, + * it is assumed that channels 149-161 are allowed for + * GO operations. However, having a station interface + * connected to an AP on channels 149-161, does not + * allow GO operation on channel 165. + */ + if (chan->center_freq == 5825 && + other_chan->center_freq != 5825) + continue; + return true; + } + } + + return false; +} + bool cfg80211_reg_can_beacon(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef) + struct cfg80211_chan_def *chandef, + enum nl80211_iftype iftype) { + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); bool res; u32 prohibited_flags = IEEE80211_CHAN_DISABLED | - IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR; - trace_cfg80211_reg_can_beacon(wiphy, chandef); + trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype); - if (cfg80211_chandef_dfs_required(wiphy, chandef) > 0 && + /* + * Under certain conditions suggested by the some regulatory bodies + * a GO can operate on channels marked with IEEE80211_NO_IR + * so set this flag only if such relaxations are not enabled and + * the conditions are not met. + */ + if (iftype != NL80211_IFTYPE_P2P_GO || + !cfg80211_go_permissive_chan(rdev, chandef->chan)) + prohibited_flags |= IEEE80211_CHAN_NO_IR; + + if (cfg80211_chandef_dfs_required(wiphy, chandef, iftype) > 0 && cfg80211_chandef_dfs_available(wiphy, chandef)) { /* We can skip IEEE80211_CHAN_NO_IR if chandef dfs available */ prohibited_flags = IEEE80211_CHAN_DISABLED; @@ -701,6 +826,8 @@ cfg80211_get_chan_state(struct wireless_dev *wdev, enum cfg80211_chan_mode *chanmode, u8 *radar_detect) { + int ret; + *chan = NULL; *chanmode = CHAN_MODE_UNDEFINED; @@ -743,8 +870,11 @@ cfg80211_get_chan_state(struct wireless_dev *wdev, *chan = wdev->chandef.chan; *chanmode = CHAN_MODE_SHARED; - if (cfg80211_chandef_dfs_required(wdev->wiphy, - &wdev->chandef)) + ret = cfg80211_chandef_dfs_required(wdev->wiphy, + &wdev->chandef, + wdev->iftype); + WARN_ON(ret < 0); + if (ret > 0) *radar_detect |= BIT(wdev->chandef.width); } return; @@ -753,8 +883,11 @@ cfg80211_get_chan_state(struct wireless_dev *wdev, *chan = wdev->chandef.chan; *chanmode = CHAN_MODE_SHARED; - if (cfg80211_chandef_dfs_required(wdev->wiphy, - &wdev->chandef)) + ret = cfg80211_chandef_dfs_required(wdev->wiphy, + &wdev->chandef, + wdev->iftype); + WARN_ON(ret < 0); + if (ret > 0) *radar_detect |= BIT(wdev->chandef.width); } return; diff --git a/net/wireless/core.c b/net/wireless/core.c index 086cddd03ba6..afee5e0455ea 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -25,7 +25,6 @@ #include "sysfs.h" #include "debugfs.h" #include "wext-compat.h" -#include "ethtool.h" #include "rdev-ops.h" /* name for sysfs, %d is appended */ @@ -69,7 +68,7 @@ struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx) int get_wiphy_idx(struct wiphy *wiphy) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); return rdev->wiphy_idx; } @@ -130,7 +129,7 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, newname)) pr_err("failed to rename debugfs dir to %s!\n", newname); - nl80211_notify_dev_rename(rdev); + nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY); return 0; } @@ -210,15 +209,12 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, } } -static int cfg80211_rfkill_set_block(void *data, bool blocked) +void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy) { - struct cfg80211_registered_device *rdev = data; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct wireless_dev *wdev; - if (!blocked) - return 0; - - rtnl_lock(); + ASSERT_RTNL(); list_for_each_entry(wdev, &rdev->wdev_list, list) { if (wdev->netdev) { @@ -234,7 +230,18 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked) break; } } +} +EXPORT_SYMBOL_GPL(cfg80211_shutdown_all_interfaces); +static int cfg80211_rfkill_set_block(void *data, bool blocked) +{ + struct cfg80211_registered_device *rdev = data; + + if (!blocked) + return 0; + + rtnl_lock(); + cfg80211_shutdown_all_interfaces(&rdev->wiphy); rtnl_unlock(); return 0; @@ -260,6 +267,45 @@ static void cfg80211_event_work(struct work_struct *work) rtnl_unlock(); } +void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev) +{ + struct cfg80211_iface_destroy *item; + + ASSERT_RTNL(); + + spin_lock_irq(&rdev->destroy_list_lock); + while ((item = list_first_entry_or_null(&rdev->destroy_list, + struct cfg80211_iface_destroy, + list))) { + struct wireless_dev *wdev, *tmp; + u32 nlportid = item->nlportid; + + list_del(&item->list); + kfree(item); + spin_unlock_irq(&rdev->destroy_list_lock); + + list_for_each_entry_safe(wdev, tmp, &rdev->wdev_list, list) { + if (nlportid == wdev->owner_nlportid) + rdev_del_virtual_intf(rdev, wdev); + } + + spin_lock_irq(&rdev->destroy_list_lock); + } + spin_unlock_irq(&rdev->destroy_list_lock); +} + +static void cfg80211_destroy_iface_wk(struct work_struct *work) +{ + struct cfg80211_registered_device *rdev; + + rdev = container_of(work, struct cfg80211_registered_device, + destroy_work); + + rtnl_lock(); + cfg80211_destroy_ifaces(rdev); + rtnl_unlock(); +} + /* exported functions */ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) @@ -318,6 +364,10 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) rdev->wiphy.dev.class = &ieee80211_class; rdev->wiphy.dev.platform_data = rdev; + INIT_LIST_HEAD(&rdev->destroy_list); + spin_lock_init(&rdev->destroy_list_lock); + INIT_WORK(&rdev->destroy_work, cfg80211_destroy_iface_wk); + #ifdef CONFIG_CFG80211_DEFAULT_PS rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT; #endif @@ -351,6 +401,8 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) rdev->wiphy.rts_threshold = (u32) -1; rdev->wiphy.coverage_class = 0; + rdev->wiphy.max_num_csa_counters = 1; + return &rdev->wiphy; } EXPORT_SYMBOL(wiphy_new); @@ -396,10 +448,7 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) for (j = 0; j < c->n_limits; j++) { u16 types = c->limits[j].types; - /* - * interface types shouldn't overlap, this is - * used in cfg80211_can_change_interface() - */ + /* interface types shouldn't overlap */ if (WARN_ON(types & all_iftypes)) return -EINVAL; all_iftypes |= types; @@ -435,7 +484,7 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) int wiphy_register(struct wiphy *wiphy) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); int res; enum ieee80211_band band; struct ieee80211_supported_band *sband; @@ -610,13 +659,15 @@ int wiphy_register(struct wiphy *wiphy) return res; } + nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY); + return 0; } EXPORT_SYMBOL(wiphy_register); void wiphy_rfkill_start_polling(struct wiphy *wiphy) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); if (!rdev->ops->rfkill_poll) return; @@ -627,7 +678,7 @@ EXPORT_SYMBOL(wiphy_rfkill_start_polling); void wiphy_rfkill_stop_polling(struct wiphy *wiphy) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); rfkill_pause_polling(rdev->rfkill); } @@ -635,7 +686,7 @@ EXPORT_SYMBOL(wiphy_rfkill_stop_polling); void wiphy_unregister(struct wiphy *wiphy) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); wait_event(rdev->dev_wait, ({ int __count; @@ -648,9 +699,10 @@ void wiphy_unregister(struct wiphy *wiphy) rfkill_unregister(rdev->rfkill); rtnl_lock(); + nl80211_notify_wiphy(rdev, NL80211_CMD_DEL_WIPHY); rdev->wiphy.registered = false; - BUG_ON(!list_empty(&rdev->wdev_list)); + WARN_ON(!list_empty(&rdev->wdev_list)); /* * First remove the hardware from everywhere, this makes @@ -675,6 +727,7 @@ void wiphy_unregister(struct wiphy *wiphy) cancel_work_sync(&rdev->conn_work); flush_work(&rdev->event_work); cancel_delayed_work_sync(&rdev->dfs_update_channels_wk); + flush_work(&rdev->destroy_work); #ifdef CONFIG_PM if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup) @@ -707,7 +760,7 @@ EXPORT_SYMBOL(wiphy_free); void wiphy_rfkill_set_hw_state(struct wiphy *wiphy, bool blocked) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); if (rfkill_set_hw_state(rdev->rfkill, blocked)) schedule_work(&rdev->rfkill_sync); @@ -716,7 +769,7 @@ EXPORT_SYMBOL(wiphy_rfkill_set_hw_state); void cfg80211_unregister_wdev(struct wireless_dev *wdev) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); ASSERT_RTNL(); @@ -751,23 +804,23 @@ void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, rdev->num_running_monitor_ifaces += num; } -void cfg80211_leave(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev) +void __cfg80211_leave(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) { struct net_device *dev = wdev->netdev; ASSERT_RTNL(); + ASSERT_WDEV_LOCK(wdev); switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: - cfg80211_leave_ibss(rdev, dev, true); + __cfg80211_leave_ibss(rdev, dev, true); break; case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_STATION: if (rdev->sched_scan_req && dev == rdev->sched_scan_req->dev) __cfg80211_stop_sched_scan(rdev, false); - wdev_lock(wdev); #ifdef CONFIG_CFG80211_WEXT kfree(wdev->wext.ie); wdev->wext.ie = NULL; @@ -776,32 +829,60 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, #endif cfg80211_disconnect(rdev, dev, WLAN_REASON_DEAUTH_LEAVING, true); - wdev_unlock(wdev); break; case NL80211_IFTYPE_MESH_POINT: - cfg80211_leave_mesh(rdev, dev); + __cfg80211_leave_mesh(rdev, dev); break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - cfg80211_stop_ap(rdev, dev, true); + __cfg80211_stop_ap(rdev, dev, true); break; default: break; } } +void cfg80211_leave(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) +{ + wdev_lock(wdev); + __cfg80211_leave(rdev, wdev); + wdev_unlock(wdev); +} + +void cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev, + gfp_t gfp) +{ + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct cfg80211_event *ev; + unsigned long flags; + + trace_cfg80211_stop_iface(wiphy, wdev); + + ev = kzalloc(sizeof(*ev), gfp); + if (!ev) + return; + + ev->type = EVENT_STOPPED; + + spin_lock_irqsave(&wdev->event_lock, flags); + list_add_tail(&ev->list, &wdev->event_list); + spin_unlock_irqrestore(&wdev->event_lock, flags); + queue_work(cfg80211_wq, &rdev->event_work); +} +EXPORT_SYMBOL(cfg80211_stop_iface); + static int cfg80211_netdev_notifier_call(struct notifier_block *nb, unsigned long state, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev; - int ret; if (!wdev) return NOTIFY_DONE; - rdev = wiphy_to_dev(wdev->wiphy); + rdev = wiphy_to_rdev(wdev->wiphy); WARN_ON(wdev->iftype == NL80211_IFTYPE_UNSPECIFIED); @@ -845,8 +926,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, /* allow mac80211 to determine the timeout */ wdev->ps_timeout = -1; - netdev_set_default_ethtool_ops(dev, &cfg80211_ethtool_ops); - if ((wdev->iftype == NL80211_IFTYPE_STATION || wdev->iftype == NL80211_IFTYPE_P2P_CLIENT || wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr) @@ -959,13 +1038,14 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, case NETDEV_PRE_UP: if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype))) return notifier_from_errno(-EOPNOTSUPP); - ret = cfg80211_can_add_interface(rdev, wdev->iftype); - if (ret) - return notifier_from_errno(ret); + if (rfkill_blocked(rdev->rfkill)) + return notifier_from_errno(-ERFKILL); break; + default: + return NOTIFY_DONE; } - return NOTIFY_DONE; + return NOTIFY_OK; } static struct notifier_block cfg80211_netdev_notifier = { diff --git a/net/wireless/core.h b/net/wireless/core.h index 5b1fdcadd469..7e3a3cef7df9 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -80,13 +80,17 @@ struct cfg80211_registered_device { struct cfg80211_coalesce *coalesce; + spinlock_t destroy_list_lock; + struct list_head destroy_list; + struct work_struct destroy_work; + /* must be last because of the way we do wiphy_priv(), * and it should at least be aligned to NETDEV_ALIGN */ struct wiphy wiphy __aligned(NETDEV_ALIGN); }; static inline -struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy) +struct cfg80211_registered_device *wiphy_to_rdev(struct wiphy *wiphy) { BUG_ON(!wiphy); return container_of(wiphy, struct cfg80211_registered_device, wiphy); @@ -181,6 +185,7 @@ enum cfg80211_event_type { EVENT_ROAMED, EVENT_DISCONNECTED, EVENT_IBSS_JOINED, + EVENT_STOPPED, }; struct cfg80211_event { @@ -232,6 +237,13 @@ struct cfg80211_beacon_registration { u32 nlportid; }; +struct cfg80211_iface_destroy { + struct list_head list; + u32 nlportid; +}; + +void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev); + /* free object */ void cfg80211_dev_free(struct cfg80211_registered_device *rdev); @@ -240,8 +252,8 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, void ieee80211_set_bitrate_flags(struct wiphy *wiphy); -void cfg80211_bss_expire(struct cfg80211_registered_device *dev); -void cfg80211_bss_age(struct cfg80211_registered_device *dev, +void cfg80211_bss_expire(struct cfg80211_registered_device *rdev); +void cfg80211_bss_age(struct cfg80211_registered_device *rdev, unsigned long age_secs); /* IBSS */ @@ -270,6 +282,8 @@ int cfg80211_join_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev, struct mesh_setup *setup, const struct mesh_config *conf); +int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, + struct net_device *dev); int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev); int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev, @@ -277,6 +291,8 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev, struct cfg80211_chan_def *chandef); /* AP */ +int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, + struct net_device *dev, bool notify); int cfg80211_stop_ap(struct cfg80211_registered_device *rdev, struct net_device *dev, bool notify); @@ -401,35 +417,6 @@ unsigned int cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef); -static inline int -cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, - enum nl80211_iftype iftype) -{ - return cfg80211_can_use_iftype_chan(rdev, wdev, iftype, NULL, - CHAN_MODE_UNDEFINED, 0); -} - -static inline int -cfg80211_can_add_interface(struct cfg80211_registered_device *rdev, - enum nl80211_iftype iftype) -{ - if (rfkill_blocked(rdev->rfkill)) - return -ERFKILL; - - return cfg80211_can_change_interface(rdev, NULL, iftype); -} - -static inline int -cfg80211_can_use_chan(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, - struct ieee80211_channel *chan, - enum cfg80211_chan_mode chanmode) -{ - return cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, - chan, chanmode, 0); -} - static inline unsigned int elapsed_jiffies_msecs(unsigned long start) { unsigned long end = jiffies; @@ -437,7 +424,7 @@ static inline unsigned int elapsed_jiffies_msecs(unsigned long start) if (end >= start) return jiffies_to_msecs(end - start); - return jiffies_to_msecs(end + (MAX_JIFFY_OFFSET - start) + 1); + return jiffies_to_msecs(end + (ULONG_MAX - start) + 1); } void @@ -459,6 +446,8 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype, int num); +void __cfg80211_leave(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev); void cfg80211_leave(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c index e37862f1b127..e9e91298c70d 100644 --- a/net/wireless/ethtool.c +++ b/net/wireless/ethtool.c @@ -1,11 +1,9 @@ #include <linux/utsname.h> #include <net/cfg80211.h> #include "core.h" -#include "ethtool.h" #include "rdev-ops.h" -static void cfg80211_get_drvinfo(struct net_device *dev, - struct ethtool_drvinfo *info) +void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -23,84 +21,4 @@ static void cfg80211_get_drvinfo(struct net_device *dev, strlcpy(info->bus_info, dev_name(wiphy_dev(wdev->wiphy)), sizeof(info->bus_info)); } - -static int cfg80211_get_regs_len(struct net_device *dev) -{ - /* For now, return 0... */ - return 0; -} - -static void cfg80211_get_regs(struct net_device *dev, struct ethtool_regs *regs, - void *data) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - - regs->version = wdev->wiphy->hw_version; - regs->len = 0; -} - -static void cfg80211_get_ringparam(struct net_device *dev, - struct ethtool_ringparam *rp) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); - - memset(rp, 0, sizeof(*rp)); - - if (rdev->ops->get_ringparam) - rdev_get_ringparam(rdev, &rp->tx_pending, &rp->tx_max_pending, - &rp->rx_pending, &rp->rx_max_pending); -} - -static int cfg80211_set_ringparam(struct net_device *dev, - struct ethtool_ringparam *rp) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); - - if (rp->rx_mini_pending != 0 || rp->rx_jumbo_pending != 0) - return -EINVAL; - - if (rdev->ops->set_ringparam) - return rdev_set_ringparam(rdev, rp->tx_pending, rp->rx_pending); - - return -ENOTSUPP; -} - -static int cfg80211_get_sset_count(struct net_device *dev, int sset) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); - if (rdev->ops->get_et_sset_count) - return rdev_get_et_sset_count(rdev, dev, sset); - return -EOPNOTSUPP; -} - -static void cfg80211_get_stats(struct net_device *dev, - struct ethtool_stats *stats, u64 *data) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); - if (rdev->ops->get_et_stats) - rdev_get_et_stats(rdev, dev, stats, data); -} - -static void cfg80211_get_strings(struct net_device *dev, u32 sset, u8 *data) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); - if (rdev->ops->get_et_strings) - rdev_get_et_strings(rdev, dev, sset, data); -} - -const struct ethtool_ops cfg80211_ethtool_ops = { - .get_drvinfo = cfg80211_get_drvinfo, - .get_regs_len = cfg80211_get_regs_len, - .get_regs = cfg80211_get_regs, - .get_link = ethtool_op_get_link, - .get_ringparam = cfg80211_get_ringparam, - .set_ringparam = cfg80211_set_ringparam, - .get_strings = cfg80211_get_strings, - .get_ethtool_stats = cfg80211_get_stats, - .get_sset_count = cfg80211_get_sset_count, -}; +EXPORT_SYMBOL(cfg80211_get_drvinfo); diff --git a/net/wireless/ethtool.h b/net/wireless/ethtool.h deleted file mode 100644 index 695ecad20bd6..000000000000 --- a/net/wireless/ethtool.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __CFG80211_ETHTOOL__ -#define __CFG80211_ETHTOOL__ - -extern const struct ethtool_ops cfg80211_ethtool_ops; - -#endif /* __CFG80211_ETHTOOL__ */ diff --git a/net/wireless/genregdb.awk b/net/wireless/genregdb.awk index b35da8dc85de..baf2426b555a 100644 --- a/net/wireless/genregdb.awk +++ b/net/wireless/genregdb.awk @@ -51,42 +51,41 @@ function parse_country_head() { function parse_reg_rule() { + flag_starts_at = 7 + start = $1 sub(/\(/, "", start) end = $3 bw = $5 sub(/\),/, "", bw) - gain = $6 - sub(/\(/, "", gain) - sub(/,/, "", gain) - power = $7 - sub(/\)/, "", power) - sub(/,/, "", power) + gain = 0 + power = $6 # power might be in mW... - units = $8 + units = $7 + dfs_cac = 0 + + sub(/\(/, "", power) + sub(/\),/, "", power) + sub(/\),/, "", units) sub(/\)/, "", units) - sub(/,/, "", units) - dfs_cac = $9 + if (units == "mW") { - if (power == 100) { - power = 20 - } else if (power == 200) { - power = 23 - } else if (power == 500) { - power = 27 - } else if (power == 1000) { - power = 30 - } else { - print "Unknown power value in database!" + flag_starts_at = 8 + power = 10 * log(power)/log(10) + if ($8 ~ /[[:digit:]]/) { + flag_starts_at = 9 + dfs_cac = $8 } } else { - dfs_cac = $8 + if ($7 ~ /[[:digit:]]/) { + flag_starts_at = 8 + dfs_cac = $7 + } } - sub(/,/, "", dfs_cac) sub(/\(/, "", dfs_cac) - sub(/\)/, "", dfs_cac) + sub(/\),/, "", dfs_cac) flagstr = "" - for (i=8; i<=NF; i++) + for (i=flag_starts_at; i<=NF; i++) flagstr = flagstr $i split(flagstr, flagarray, ",") flags = "" @@ -117,7 +116,7 @@ function parse_reg_rule() } flags = flags "0" - printf "\t\tREG_RULE_EXT(%d, %d, %d, %d, %d, %d, %s),\n", start, end, bw, gain, power, dfs_cac, flags + printf "\t\tREG_RULE_EXT(%d, %d, %d, %d, %.0f, %d, %s),\n", start, end, bw, gain, power, dfs_cac, flags rules++ } diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index a6b5bdad039c..8f345da3ea5f 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -45,7 +45,7 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, cfg80211_upload_connect_keys(wdev); - nl80211_send_ibss_bssid(wiphy_to_dev(wdev->wiphy), dev, bssid, + nl80211_send_ibss_bssid(wiphy_to_rdev(wdev->wiphy), dev, bssid, GFP_KERNEL); #ifdef CONFIG_CFG80211_WEXT memset(&wrqu, 0, sizeof(wrqu)); @@ -58,7 +58,7 @@ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, struct ieee80211_channel *channel, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_event *ev; unsigned long flags; @@ -88,8 +88,6 @@ static int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, struct cfg80211_cached_keys *connkeys) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct ieee80211_channel *check_chan; - u8 radar_detect_width = 0; int err; ASSERT_WDEV_LOCK(wdev); @@ -126,28 +124,6 @@ static int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, #ifdef CONFIG_CFG80211_WEXT wdev->wext.ibss.chandef = params->chandef; #endif - check_chan = params->chandef.chan; - if (params->userspace_handles_dfs) { - /* Check for radar even if the current channel is not - * a radar channel - it might decide to change to DFS - * channel later. - */ - radar_detect_width = BIT(params->chandef.width); - } - - err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, - check_chan, - (params->channel_fixed && - !radar_detect_width) - ? CHAN_MODE_SHARED - : CHAN_MODE_EXCLUSIVE, - radar_detect_width); - - if (err) { - wdev->connect_keys = NULL; - return err; - } - err = rdev_join_ibss(rdev, dev, params); if (err) { wdev->connect_keys = NULL; @@ -180,7 +156,7 @@ int cfg80211_join_ibss(struct cfg80211_registered_device *rdev, static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); int i; ASSERT_WDEV_LOCK(wdev); @@ -335,7 +311,7 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, struct iw_freq *wextfreq, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct ieee80211_channel *chan = NULL; int err, freq; @@ -346,7 +322,7 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, if (!rdev->ops->join_ibss) return -EOPNOTSUPP; - freq = cfg80211_wext_freq(wdev->wiphy, wextfreq); + freq = cfg80211_wext_freq(wextfreq); if (freq < 0) return freq; @@ -420,7 +396,7 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev, struct iw_point *data, char *ssid) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); size_t len = data->length; int err; @@ -444,8 +420,8 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev, if (len > 0 && ssid[len - 1] == '\0') len--; + memcpy(wdev->ssid, ssid, len); wdev->wext.ibss.ssid = wdev->ssid; - memcpy(wdev->wext.ibss.ssid, ssid, len); wdev->wext.ibss.ssid_len = len; wdev_lock(wdev); @@ -487,7 +463,7 @@ int cfg80211_ibss_wext_siwap(struct net_device *dev, struct sockaddr *ap_addr, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); u8 *bssid = ap_addr->sa_data; int err; @@ -505,6 +481,9 @@ int cfg80211_ibss_wext_siwap(struct net_device *dev, if (is_zero_ether_addr(bssid) || is_broadcast_ether_addr(bssid)) bssid = NULL; + if (bssid && !is_valid_ether_addr(bssid)) + return -EINVAL; + /* both automatic */ if (!bssid && !wdev->wext.ibss.bssid) return 0; diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 5af5cc6b2c4c..092300b30c37 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -99,7 +99,6 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, const struct mesh_config *conf) { struct wireless_dev *wdev = dev->ieee80211_ptr; - u8 radar_detect_width = 0; int err; BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN); @@ -175,22 +174,10 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, scan_width); } - if (!cfg80211_reg_can_beacon(&rdev->wiphy, &setup->chandef)) + if (!cfg80211_reg_can_beacon(&rdev->wiphy, &setup->chandef, + NL80211_IFTYPE_MESH_POINT)) return -EINVAL; - err = cfg80211_chandef_dfs_required(wdev->wiphy, &setup->chandef); - if (err < 0) - return err; - if (err) - radar_detect_width = BIT(setup->chandef.width); - - err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, - setup->chandef.chan, - CHAN_MODE_SHARED, - radar_detect_width); - if (err) - return err; - err = rdev_join_mesh(rdev, dev, conf, setup); if (!err) { memcpy(wdev->ssid, setup->mesh_id, setup->mesh_id_len); @@ -236,17 +223,6 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev, if (!netif_running(wdev->netdev)) return -ENETDOWN; - /* cfg80211_can_use_chan() calls - * cfg80211_can_use_iftype_chan() with no radar - * detection, so if we're trying to use a radar - * channel here, something is wrong. - */ - WARN_ON_ONCE(chandef->chan->flags & IEEE80211_CHAN_RADAR); - err = cfg80211_can_use_chan(rdev, wdev, chandef->chan, - CHAN_MODE_SHARED); - if (err) - return err; - err = rdev_libertas_set_mesh_channel(rdev, wdev->netdev, chandef->chan); if (!err) @@ -262,8 +238,8 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev, return 0; } -static int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, - struct net_device *dev) +int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, + struct net_device *dev) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index c52ff59a3e96..266766b8d80b 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -23,7 +23,7 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, struct cfg80211_bss *bss, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; u8 *ie = mgmt->u.assoc_resp.variable; int ieoffs = offsetof(struct ieee80211_mgmt, u.assoc_resp.variable); @@ -54,7 +54,7 @@ EXPORT_SYMBOL(cfg80211_rx_assoc_resp); static void cfg80211_process_auth(struct wireless_dev *wdev, const u8 *buf, size_t len) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); nl80211_send_rx_auth(rdev, wdev->netdev, buf, len, GFP_KERNEL); cfg80211_sme_rx_auth(wdev, buf, len); @@ -63,7 +63,7 @@ static void cfg80211_process_auth(struct wireless_dev *wdev, static void cfg80211_process_deauth(struct wireless_dev *wdev, const u8 *buf, size_t len) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; const u8 *bssid = mgmt->bssid; u16 reason_code = le16_to_cpu(mgmt->u.deauth.reason_code); @@ -82,7 +82,7 @@ static void cfg80211_process_deauth(struct wireless_dev *wdev, static void cfg80211_process_disassoc(struct wireless_dev *wdev, const u8 *buf, size_t len) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; const u8 *bssid = mgmt->bssid; u16 reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); @@ -123,7 +123,7 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); trace_cfg80211_send_auth_timeout(dev, addr); @@ -136,7 +136,7 @@ void cfg80211_assoc_timeout(struct net_device *dev, struct cfg80211_bss *bss) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); trace_cfg80211_send_assoc_timeout(dev, bss->bssid); @@ -172,7 +172,7 @@ void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr, const u8 *tsc, gfp_t gfp) { struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); #ifdef CONFIG_CFG80211_WEXT union iwreq_data wrqu; char *buf = kmalloc(128, gfp); @@ -233,14 +233,8 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, if (!req.bss) return -ENOENT; - err = cfg80211_can_use_chan(rdev, wdev, req.bss->channel, - CHAN_MODE_SHARED); - if (err) - goto out; - err = rdev_auth(rdev, dev, &req); -out: cfg80211_put_bss(&rdev->wiphy, req.bss); return err; } @@ -306,16 +300,10 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, if (!req->bss) return -ENOENT; - err = cfg80211_can_use_chan(rdev, wdev, chan, CHAN_MODE_SHARED); - if (err) - goto out; - err = rdev_assoc(rdev, dev, req); if (!err) cfg80211_hold_bss(bss_from_pub(req->bss)); - -out: - if (err) + else cfg80211_put_bss(&rdev->wiphy, req->bss); return err; @@ -414,7 +402,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, int match_len) { struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct cfg80211_mgmt_registration *reg, *nreg; int err = 0; u16 mgmt_type; @@ -473,7 +461,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid) { struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct cfg80211_mgmt_registration *reg, *tmp; spin_lock_bh(&wdev->mgmt_registrations_lock); @@ -620,7 +608,7 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm, const u8 *buf, size_t len, u32 flags, gfp_t gfp) { struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct cfg80211_mgmt_registration *reg; const struct ieee80211_txrx_stypes *stypes = &wiphy->mgmt_stypes[wdev->iftype]; @@ -739,7 +727,7 @@ void cfg80211_radar_event(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, gfp_t gfp) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); unsigned long timeout; trace_cfg80211_radar_event(wiphy, chandef); @@ -764,7 +752,7 @@ void cfg80211_cac_event(struct net_device *netdev, { struct wireless_dev *wdev = netdev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); unsigned long timeout; trace_cfg80211_cac_event(netdev, event); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 052c1bf8ffac..df7b1332a1ec 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -168,8 +168,8 @@ __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs) netdev = __dev_get_by_index(netns, ifindex); if (netdev) { if (netdev->ieee80211_ptr) - tmp = wiphy_to_dev( - netdev->ieee80211_ptr->wiphy); + tmp = wiphy_to_rdev( + netdev->ieee80211_ptr->wiphy); else tmp = NULL; @@ -337,6 +337,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_TDLS_OPERATION] = { .type = NLA_U8 }, [NL80211_ATTR_TDLS_SUPPORT] = { .type = NLA_FLAG }, [NL80211_ATTR_TDLS_EXTERNAL_SETUP] = { .type = NLA_FLAG }, + [NL80211_ATTR_TDLS_INITIATOR] = { .type = NLA_FLAG }, [NL80211_ATTR_DONT_WAIT_FOR_ACK] = { .type = NLA_FLAG }, [NL80211_ATTR_PROBE_RESP] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN }, @@ -371,8 +372,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_CH_SWITCH_COUNT] = { .type = NLA_U32 }, [NL80211_ATTR_CH_SWITCH_BLOCK_TX] = { .type = NLA_FLAG }, [NL80211_ATTR_CSA_IES] = { .type = NLA_NESTED }, - [NL80211_ATTR_CSA_C_OFF_BEACON] = { .type = NLA_U16 }, - [NL80211_ATTR_CSA_C_OFF_PRESP] = { .type = NLA_U16 }, + [NL80211_ATTR_CSA_C_OFF_BEACON] = { .type = NLA_BINARY }, + [NL80211_ATTR_CSA_C_OFF_PRESP] = { .type = NLA_BINARY }, [NL80211_ATTR_STA_SUPPORTED_CHANNELS] = { .type = NLA_BINARY }, [NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES] = { .type = NLA_BINARY }, [NL80211_ATTR_HANDLE_DFS] = { .type = NLA_FLAG }, @@ -385,6 +386,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_MAC_HINT] = { .len = ETH_ALEN }, [NL80211_ATTR_WIPHY_FREQ_HINT] = { .type = NLA_U32 }, [NL80211_ATTR_TDLS_PEER_CAPABILITY] = { .type = NLA_U32 }, + [NL80211_ATTR_IFACE_SOCKET_OWNER] = { .type = NLA_FLAG }, + [NL80211_ATTR_CSA_C_OFFSETS_TX] = { .type = NLA_BINARY }, }; /* policy for the key attributes */ @@ -484,7 +487,7 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, err = PTR_ERR(*wdev); goto out_unlock; } - *rdev = wiphy_to_dev((*wdev)->wiphy); + *rdev = wiphy_to_rdev((*wdev)->wiphy); /* 0 is the first index - add 1 to parse only once */ cb->args[0] = (*rdev)->wiphy_idx + 1; cb->args[1] = (*wdev)->identifier; @@ -497,7 +500,7 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, err = -ENODEV; goto out_unlock; } - *rdev = wiphy_to_dev(wiphy); + *rdev = wiphy_to_rdev(wiphy); *wdev = NULL; list_for_each_entry(tmp, &(*rdev)->wdev_list, list) { @@ -566,6 +569,13 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct ieee80211_channel *chan, bool large) { + /* Some channels must be completely excluded from the + * list to protect old user-space tools from breaking + */ + if (!large && chan->flags & + (IEEE80211_CHAN_NO_10MHZ | IEEE80211_CHAN_NO_20MHZ)) + return 0; + if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_FREQ, chan->center_freq)) goto nla_put_failure; @@ -613,6 +623,18 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, if ((chan->flags & IEEE80211_CHAN_NO_160MHZ) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_160MHZ)) goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_INDOOR_ONLY) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_INDOOR_ONLY)) + goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_GO_CONCURRENT) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_GO_CONCURRENT)) + goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_NO_20MHZ) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_20MHZ)) + goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_NO_10MHZ) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_10MHZ)) + goto nla_put_failure; } if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, @@ -950,8 +972,10 @@ static int nl80211_put_iface_combinations(struct wiphy *wiphy, c->max_interfaces)) goto nla_put_failure; if (large && - nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS, - c->radar_detect_widths)) + (nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS, + c->radar_detect_widths) || + nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_REGIONS, + c->radar_detect_regions))) goto nla_put_failure; nla_nest_end(msg, nl_combi); @@ -1006,42 +1030,42 @@ static int nl80211_send_wowlan_tcp_caps(struct cfg80211_registered_device *rdev, } static int nl80211_send_wowlan(struct sk_buff *msg, - struct cfg80211_registered_device *dev, + struct cfg80211_registered_device *rdev, bool large) { struct nlattr *nl_wowlan; - if (!dev->wiphy.wowlan) + if (!rdev->wiphy.wowlan) return 0; nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED); if (!nl_wowlan) return -ENOBUFS; - if (((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_ANY) && + if (((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_ANY) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) || - ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_DISCONNECT) && + ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_DISCONNECT) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) || - ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_MAGIC_PKT) && + ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_MAGIC_PKT) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) || - ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) && + ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED)) || - ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && + ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) || - ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) && + ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) || - ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) && + ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) || - ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_RFKILL_RELEASE) && + ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_RFKILL_RELEASE) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE))) return -ENOBUFS; - if (dev->wiphy.wowlan->n_patterns) { + if (rdev->wiphy.wowlan->n_patterns) { struct nl80211_pattern_support pat = { - .max_patterns = dev->wiphy.wowlan->n_patterns, - .min_pattern_len = dev->wiphy.wowlan->pattern_min_len, - .max_pattern_len = dev->wiphy.wowlan->pattern_max_len, - .max_pkt_offset = dev->wiphy.wowlan->max_pkt_offset, + .max_patterns = rdev->wiphy.wowlan->n_patterns, + .min_pattern_len = rdev->wiphy.wowlan->pattern_min_len, + .max_pattern_len = rdev->wiphy.wowlan->pattern_max_len, + .max_pkt_offset = rdev->wiphy.wowlan->max_pkt_offset, }; if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN, @@ -1049,7 +1073,7 @@ static int nl80211_send_wowlan(struct sk_buff *msg, return -ENOBUFS; } - if (large && nl80211_send_wowlan_tcp_caps(dev, msg)) + if (large && nl80211_send_wowlan_tcp_caps(rdev, msg)) return -ENOBUFS; nla_nest_end(msg, nl_wowlan); @@ -1059,19 +1083,19 @@ static int nl80211_send_wowlan(struct sk_buff *msg, #endif static int nl80211_send_coalesce(struct sk_buff *msg, - struct cfg80211_registered_device *dev) + struct cfg80211_registered_device *rdev) { struct nl80211_coalesce_rule_support rule; - if (!dev->wiphy.coalesce) + if (!rdev->wiphy.coalesce) return 0; - rule.max_rules = dev->wiphy.coalesce->n_rules; - rule.max_delay = dev->wiphy.coalesce->max_delay; - rule.pat.max_patterns = dev->wiphy.coalesce->n_patterns; - rule.pat.min_pattern_len = dev->wiphy.coalesce->pattern_min_len; - rule.pat.max_pattern_len = dev->wiphy.coalesce->pattern_max_len; - rule.pat.max_pkt_offset = dev->wiphy.coalesce->max_pkt_offset; + rule.max_rules = rdev->wiphy.coalesce->n_rules; + rule.max_delay = rdev->wiphy.coalesce->max_delay; + rule.pat.max_patterns = rdev->wiphy.coalesce->n_patterns; + rule.pat.min_pattern_len = rdev->wiphy.coalesce->pattern_min_len; + rule.pat.max_pattern_len = rdev->wiphy.coalesce->pattern_max_len; + rule.pat.max_pkt_offset = rdev->wiphy.coalesce->max_pkt_offset; if (nla_put(msg, NL80211_ATTR_COALESCE_RULE, sizeof(rule), &rule)) return -ENOBUFS; @@ -1202,7 +1226,8 @@ struct nl80211_dump_wiphy_state { bool split; }; -static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, +static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, + enum nl80211_commands cmd, struct sk_buff *msg, u32 portid, u32 seq, int flags, struct nl80211_dump_wiphy_state *state) { @@ -1214,63 +1239,66 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, struct ieee80211_channel *chan; int i; const struct ieee80211_txrx_stypes *mgmt_stypes = - dev->wiphy.mgmt_stypes; + rdev->wiphy.mgmt_stypes; u32 features; - hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY); + hdr = nl80211hdr_put(msg, portid, seq, flags, cmd); if (!hdr) return -ENOBUFS; if (WARN_ON(!state)) return -EINVAL; - if (nla_put_u32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx) || + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_string(msg, NL80211_ATTR_WIPHY_NAME, - wiphy_name(&dev->wiphy)) || + wiphy_name(&rdev->wiphy)) || nla_put_u32(msg, NL80211_ATTR_GENERATION, cfg80211_rdev_list_generation)) goto nla_put_failure; + if (cmd != NL80211_CMD_NEW_WIPHY) + goto finish; + switch (state->split_start) { case 0: if (nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT, - dev->wiphy.retry_short) || + rdev->wiphy.retry_short) || nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_LONG, - dev->wiphy.retry_long) || + rdev->wiphy.retry_long) || nla_put_u32(msg, NL80211_ATTR_WIPHY_FRAG_THRESHOLD, - dev->wiphy.frag_threshold) || + rdev->wiphy.frag_threshold) || nla_put_u32(msg, NL80211_ATTR_WIPHY_RTS_THRESHOLD, - dev->wiphy.rts_threshold) || + rdev->wiphy.rts_threshold) || nla_put_u8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS, - dev->wiphy.coverage_class) || + rdev->wiphy.coverage_class) || nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS, - dev->wiphy.max_scan_ssids) || + rdev->wiphy.max_scan_ssids) || nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCHED_SCAN_SSIDS, - dev->wiphy.max_sched_scan_ssids) || + rdev->wiphy.max_sched_scan_ssids) || nla_put_u16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN, - dev->wiphy.max_scan_ie_len) || + rdev->wiphy.max_scan_ie_len) || nla_put_u16(msg, NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN, - dev->wiphy.max_sched_scan_ie_len) || + rdev->wiphy.max_sched_scan_ie_len) || nla_put_u8(msg, NL80211_ATTR_MAX_MATCH_SETS, - dev->wiphy.max_match_sets)) + rdev->wiphy.max_match_sets)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) && + if ((rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) && nla_put_flag(msg, NL80211_ATTR_SUPPORT_IBSS_RSN)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) && + if ((rdev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) && nla_put_flag(msg, NL80211_ATTR_SUPPORT_MESH_AUTH)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) && + if ((rdev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) && nla_put_flag(msg, NL80211_ATTR_SUPPORT_AP_UAPSD)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_FW_ROAM) && + if ((rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_FW_ROAM) && nla_put_flag(msg, NL80211_ATTR_ROAM_SUPPORT)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) && + if ((rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) && nla_put_flag(msg, NL80211_ATTR_TDLS_SUPPORT)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) && + if ((rdev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) && nla_put_flag(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP)) goto nla_put_failure; state->split_start++; @@ -1278,35 +1306,35 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, break; case 1: if (nla_put(msg, NL80211_ATTR_CIPHER_SUITES, - sizeof(u32) * dev->wiphy.n_cipher_suites, - dev->wiphy.cipher_suites)) + sizeof(u32) * rdev->wiphy.n_cipher_suites, + rdev->wiphy.cipher_suites)) goto nla_put_failure; if (nla_put_u8(msg, NL80211_ATTR_MAX_NUM_PMKIDS, - dev->wiphy.max_num_pmkids)) + rdev->wiphy.max_num_pmkids)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) && + if ((rdev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) && nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE)) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX, - dev->wiphy.available_antennas_tx) || + rdev->wiphy.available_antennas_tx) || nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX, - dev->wiphy.available_antennas_rx)) + rdev->wiphy.available_antennas_rx)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD) && + if ((rdev->wiphy.flags & WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD) && nla_put_u32(msg, NL80211_ATTR_PROBE_RESP_OFFLOAD, - dev->wiphy.probe_resp_offload)) + rdev->wiphy.probe_resp_offload)) goto nla_put_failure; - if ((dev->wiphy.available_antennas_tx || - dev->wiphy.available_antennas_rx) && - dev->ops->get_antenna) { + if ((rdev->wiphy.available_antennas_tx || + rdev->wiphy.available_antennas_rx) && + rdev->ops->get_antenna) { u32 tx_ant = 0, rx_ant = 0; int res; - res = rdev_get_antenna(dev, &tx_ant, &rx_ant); + res = rdev_get_antenna(rdev, &tx_ant, &rx_ant); if (!res) { if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_TX, @@ -1323,7 +1351,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, break; case 2: if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES, - dev->wiphy.interface_modes)) + rdev->wiphy.interface_modes)) goto nla_put_failure; state->split_start++; if (state->split) @@ -1337,7 +1365,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, band < IEEE80211_NUM_BANDS; band++) { struct ieee80211_supported_band *sband; - sband = dev->wiphy.bands[band]; + sband = rdev->wiphy.bands[band]; if (!sband) continue; @@ -1414,7 +1442,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, i = 0; #define CMD(op, n) \ do { \ - if (dev->ops->op) { \ + if (rdev->ops->op) { \ i++; \ if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \ goto nla_put_failure; \ @@ -1438,59 +1466,58 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, CMD(set_pmksa, SET_PMKSA); CMD(del_pmksa, DEL_PMKSA); CMD(flush_pmksa, FLUSH_PMKSA); - if (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) + if (rdev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) CMD(remain_on_channel, REMAIN_ON_CHANNEL); CMD(set_bitrate_mask, SET_TX_BITRATE_MASK); CMD(mgmt_tx, FRAME); CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL); - if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { + if (rdev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { i++; if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS)) goto nla_put_failure; } - if (dev->ops->set_monitor_channel || dev->ops->start_ap || - dev->ops->join_mesh) { + if (rdev->ops->set_monitor_channel || rdev->ops->start_ap || + rdev->ops->join_mesh) { i++; if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL)) goto nla_put_failure; } CMD(set_wds_peer, SET_WDS_PEER); - if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) { + if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) { CMD(tdls_mgmt, TDLS_MGMT); CMD(tdls_oper, TDLS_OPER); } - if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) + if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) CMD(sched_scan_start, START_SCHED_SCAN); CMD(probe_client, PROBE_CLIENT); CMD(set_noack_map, SET_NOACK_MAP); - if (dev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) { + if (rdev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) { i++; if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS)) goto nla_put_failure; } CMD(start_p2p_device, START_P2P_DEVICE); CMD(set_mcast_rate, SET_MCAST_RATE); +#ifdef CONFIG_NL80211_TESTMODE + CMD(testmode_cmd, TESTMODE); +#endif if (state->split) { CMD(crit_proto_start, CRIT_PROTOCOL_START); CMD(crit_proto_stop, CRIT_PROTOCOL_STOP); - if (dev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH) + if (rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH) CMD(channel_switch, CHANNEL_SWITCH); + CMD(set_qos_map, SET_QOS_MAP); } - CMD(set_qos_map, SET_QOS_MAP); - -#ifdef CONFIG_NL80211_TESTMODE - CMD(testmode_cmd, TESTMODE); -#endif - + /* add into the if now */ #undef CMD - if (dev->ops->connect || dev->ops->auth) { + if (rdev->ops->connect || rdev->ops->auth) { i++; if (nla_put_u32(msg, i, NL80211_CMD_CONNECT)) goto nla_put_failure; } - if (dev->ops->disconnect || dev->ops->deauth) { + if (rdev->ops->disconnect || rdev->ops->deauth) { i++; if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT)) goto nla_put_failure; @@ -1501,14 +1528,14 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if (state->split) break; case 5: - if (dev->ops->remain_on_channel && - (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) && + if (rdev->ops->remain_on_channel && + (rdev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) && nla_put_u32(msg, NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION, - dev->wiphy.max_remain_on_channel_duration)) + rdev->wiphy.max_remain_on_channel_duration)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX) && + if ((rdev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX) && nla_put_flag(msg, NL80211_ATTR_OFFCHANNEL_TX_OK)) goto nla_put_failure; @@ -1519,7 +1546,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, break; case 6: #ifdef CONFIG_PM - if (nl80211_send_wowlan(msg, dev, state->split)) + if (nl80211_send_wowlan(msg, rdev, state->split)) goto nla_put_failure; state->split_start++; if (state->split) @@ -1529,10 +1556,10 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, #endif case 7: if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES, - dev->wiphy.software_iftypes)) + rdev->wiphy.software_iftypes)) goto nla_put_failure; - if (nl80211_put_iface_combinations(&dev->wiphy, msg, + if (nl80211_put_iface_combinations(&rdev->wiphy, msg, state->split)) goto nla_put_failure; @@ -1540,12 +1567,12 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if (state->split) break; case 8: - if ((dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) && + if ((rdev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) && nla_put_u32(msg, NL80211_ATTR_DEVICE_AP_SME, - dev->wiphy.ap_sme_capa)) + rdev->wiphy.ap_sme_capa)) goto nla_put_failure; - features = dev->wiphy.features; + features = rdev->wiphy.features; /* * We can only add the per-channel limit information if the * dump is split, otherwise it makes it too big. Therefore @@ -1556,16 +1583,16 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if (nla_put_u32(msg, NL80211_ATTR_FEATURE_FLAGS, features)) goto nla_put_failure; - if (dev->wiphy.ht_capa_mod_mask && + if (rdev->wiphy.ht_capa_mod_mask && nla_put(msg, NL80211_ATTR_HT_CAPABILITY_MASK, - sizeof(*dev->wiphy.ht_capa_mod_mask), - dev->wiphy.ht_capa_mod_mask)) + sizeof(*rdev->wiphy.ht_capa_mod_mask), + rdev->wiphy.ht_capa_mod_mask)) goto nla_put_failure; - if (dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME && - dev->wiphy.max_acl_mac_addrs && + if (rdev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME && + rdev->wiphy.max_acl_mac_addrs && nla_put_u32(msg, NL80211_ATTR_MAC_ACL_MAX, - dev->wiphy.max_acl_mac_addrs)) + rdev->wiphy.max_acl_mac_addrs)) goto nla_put_failure; /* @@ -1581,41 +1608,41 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, state->split_start++; break; case 9: - if (dev->wiphy.extended_capabilities && + if (rdev->wiphy.extended_capabilities && (nla_put(msg, NL80211_ATTR_EXT_CAPA, - dev->wiphy.extended_capabilities_len, - dev->wiphy.extended_capabilities) || + rdev->wiphy.extended_capabilities_len, + rdev->wiphy.extended_capabilities) || nla_put(msg, NL80211_ATTR_EXT_CAPA_MASK, - dev->wiphy.extended_capabilities_len, - dev->wiphy.extended_capabilities_mask))) + rdev->wiphy.extended_capabilities_len, + rdev->wiphy.extended_capabilities_mask))) goto nla_put_failure; - if (dev->wiphy.vht_capa_mod_mask && + if (rdev->wiphy.vht_capa_mod_mask && nla_put(msg, NL80211_ATTR_VHT_CAPABILITY_MASK, - sizeof(*dev->wiphy.vht_capa_mod_mask), - dev->wiphy.vht_capa_mod_mask)) + sizeof(*rdev->wiphy.vht_capa_mod_mask), + rdev->wiphy.vht_capa_mod_mask)) goto nla_put_failure; state->split_start++; break; case 10: - if (nl80211_send_coalesce(msg, dev)) + if (nl80211_send_coalesce(msg, rdev)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ) && + if ((rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ) && (nla_put_flag(msg, NL80211_ATTR_SUPPORT_5_MHZ) || nla_put_flag(msg, NL80211_ATTR_SUPPORT_10_MHZ))) goto nla_put_failure; - if (dev->wiphy.max_ap_assoc_sta && + if (rdev->wiphy.max_ap_assoc_sta && nla_put_u32(msg, NL80211_ATTR_MAX_AP_ASSOC_STA, - dev->wiphy.max_ap_assoc_sta)) + rdev->wiphy.max_ap_assoc_sta)) goto nla_put_failure; state->split_start++; break; case 11: - if (dev->wiphy.n_vendor_commands) { + if (rdev->wiphy.n_vendor_commands) { const struct nl80211_vendor_cmd_info *info; struct nlattr *nested; @@ -1623,15 +1650,15 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if (!nested) goto nla_put_failure; - for (i = 0; i < dev->wiphy.n_vendor_commands; i++) { - info = &dev->wiphy.vendor_commands[i].info; + for (i = 0; i < rdev->wiphy.n_vendor_commands; i++) { + info = &rdev->wiphy.vendor_commands[i].info; if (nla_put(msg, i + 1, sizeof(*info), info)) goto nla_put_failure; } nla_nest_end(msg, nested); } - if (dev->wiphy.n_vendor_events) { + if (rdev->wiphy.n_vendor_events) { const struct nl80211_vendor_cmd_info *info; struct nlattr *nested; @@ -1640,18 +1667,26 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if (!nested) goto nla_put_failure; - for (i = 0; i < dev->wiphy.n_vendor_events; i++) { - info = &dev->wiphy.vendor_events[i]; + for (i = 0; i < rdev->wiphy.n_vendor_events; i++) { + info = &rdev->wiphy.vendor_events[i]; if (nla_put(msg, i + 1, sizeof(*info), info)) goto nla_put_failure; } nla_nest_end(msg, nested); } + state->split_start++; + break; + case 12: + if (rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH && + nla_put_u8(msg, NL80211_ATTR_MAX_CSA_COUNTERS, + rdev->wiphy.max_num_csa_counters)) + goto nla_put_failure; /* done */ state->split_start = 0; break; } + finish: return genlmsg_end(msg, hdr); nla_put_failure: @@ -1684,7 +1719,7 @@ static int nl80211_dump_wiphy_parse(struct sk_buff *skb, if (!netdev) return -ENODEV; if (netdev->ieee80211_ptr) { - rdev = wiphy_to_dev( + rdev = wiphy_to_rdev( netdev->ieee80211_ptr->wiphy); state->filter_wiphy = rdev->wiphy_idx; } @@ -1697,7 +1732,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) { int idx = 0, ret; struct nl80211_dump_wiphy_state *state = (void *)cb->args[0]; - struct cfg80211_registered_device *dev; + struct cfg80211_registered_device *rdev; rtnl_lock(); if (!state) { @@ -1716,17 +1751,18 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) cb->args[0] = (long)state; } - list_for_each_entry(dev, &cfg80211_rdev_list, list) { - if (!net_eq(wiphy_net(&dev->wiphy), sock_net(skb->sk))) + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + if (!net_eq(wiphy_net(&rdev->wiphy), sock_net(skb->sk))) continue; if (++idx <= state->start) continue; if (state->filter_wiphy != -1 && - state->filter_wiphy != dev->wiphy_idx) + state->filter_wiphy != rdev->wiphy_idx) continue; /* attempt to fit multiple wiphy data chunks into the skb */ do { - ret = nl80211_send_wiphy(dev, skb, + ret = nl80211_send_wiphy(rdev, NL80211_CMD_NEW_WIPHY, + skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, state); @@ -1774,14 +1810,15 @@ static int nl80211_dump_wiphy_done(struct netlink_callback *cb) static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *msg; - struct cfg80211_registered_device *dev = info->user_ptr[0]; + struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct nl80211_dump_wiphy_state state = {}; msg = nlmsg_new(4096, GFP_KERNEL); if (!msg) return -ENOMEM; - if (nl80211_send_wiphy(dev, msg, info->snd_portid, info->snd_seq, 0, + if (nl80211_send_wiphy(rdev, NL80211_CMD_NEW_WIPHY, msg, + info->snd_portid, info->snd_seq, 0, &state) < 0) { nlmsg_free(msg); return -ENOBUFS; @@ -1908,18 +1945,20 @@ static int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, } static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, + struct net_device *dev, struct genl_info *info) { struct cfg80211_chan_def chandef; int result; enum nl80211_iftype iftype = NL80211_IFTYPE_MONITOR; + struct wireless_dev *wdev = NULL; - if (wdev) - iftype = wdev->iftype; - + if (dev) + wdev = dev->ieee80211_ptr; if (!nl80211_can_set_dev_channel(wdev)) return -EOPNOTSUPP; + if (wdev) + iftype = wdev->iftype; result = nl80211_parse_chandef(rdev, info, &chandef); if (result) @@ -1928,14 +1967,27 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, switch (iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - if (wdev->beacon_interval) { - result = -EBUSY; - break; - } - if (!cfg80211_reg_can_beacon(&rdev->wiphy, &chandef)) { + if (!cfg80211_reg_can_beacon(&rdev->wiphy, &chandef, iftype)) { result = -EINVAL; break; } + if (wdev->beacon_interval) { + if (!dev || !rdev->ops->set_ap_chanwidth || + !(rdev->wiphy.features & + NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE)) { + result = -EBUSY; + break; + } + + /* Only allow dynamic channel width changes */ + if (chandef.chan != wdev->preset_chandef.chan) { + result = -EBUSY; + break; + } + result = rdev_set_ap_chanwidth(rdev, dev, &chandef); + if (result) + break; + } wdev->preset_chandef = chandef; result = 0; break; @@ -1957,7 +2009,7 @@ static int nl80211_set_channel(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *netdev = info->user_ptr[1]; - return __nl80211_set_channel(rdev, netdev->ieee80211_ptr, info); + return __nl80211_set_channel(rdev, netdev, info); } static int nl80211_set_wds_peer(struct sk_buff *skb, struct genl_info *info) @@ -2013,7 +2065,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) netdev = __dev_get_by_index(genl_info_net(info), ifindex); if (netdev && netdev->ieee80211_ptr) - rdev = wiphy_to_dev(netdev->ieee80211_ptr->wiphy); + rdev = wiphy_to_rdev(netdev->ieee80211_ptr->wiphy); else netdev = NULL; } @@ -2079,9 +2131,10 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) } if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { - result = __nl80211_set_channel(rdev, - nl80211_can_set_dev_channel(wdev) ? wdev : NULL, - info); + result = __nl80211_set_channel( + rdev, + nl80211_can_set_dev_channel(wdev) ? netdev : NULL, + info); if (result) return result; } @@ -2229,7 +2282,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) static inline u64 wdev_id(struct wireless_dev *wdev) { return (u64)wdev->identifier | - ((u64)wiphy_to_dev(wdev->wiphy)->wiphy_idx << 32); + ((u64)wiphy_to_rdev(wdev->wiphy)->wiphy_idx << 32); } static int nl80211_send_chandef(struct sk_buff *msg, @@ -2355,7 +2408,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *msg; - struct cfg80211_registered_device *dev = info->user_ptr[0]; + struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -2363,7 +2416,7 @@ static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info) return -ENOMEM; if (nl80211_send_iface(msg, info->snd_portid, info->snd_seq, 0, - dev, wdev) < 0) { + rdev, wdev) < 0) { nlmsg_free(msg); return -ENOBUFS; } @@ -2514,6 +2567,9 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED; u32 flags; + /* to avoid failing a new interface creation due to pending removal */ + cfg80211_destroy_ifaces(rdev); + memset(¶ms, 0, sizeof(params)); if (!info->attrs[NL80211_ATTR_IFNAME]) @@ -2563,6 +2619,9 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) return PTR_ERR(wdev); } + if (info->attrs[NL80211_ATTR_IFACE_SOCKET_OWNER]) + wdev->owner_nlportid = info->snd_portid; + switch (type) { case NL80211_IFTYPE_MESH_POINT: if (!info->attrs[NL80211_ATTR_MESH_ID]) @@ -3142,7 +3201,6 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_ap_settings params; int err; - u8 radar_detect_width = 0; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) @@ -3258,24 +3316,10 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) } else if (!nl80211_get_ap_channel(rdev, ¶ms)) return -EINVAL; - if (!cfg80211_reg_can_beacon(&rdev->wiphy, ¶ms.chandef)) + if (!cfg80211_reg_can_beacon(&rdev->wiphy, ¶ms.chandef, + wdev->iftype)) return -EINVAL; - err = cfg80211_chandef_dfs_required(wdev->wiphy, ¶ms.chandef); - if (err < 0) - return err; - if (err) { - radar_detect_width = BIT(params.chandef.width); - params.radar_required = true; - } - - err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, - params.chandef.chan, - CHAN_MODE_SHARED, - radar_detect_width); - if (err) - return err; - if (info->attrs[NL80211_ATTR_ACL_POLICY]) { params.acl = parse_acl_data(&rdev->wiphy, info); if (IS_ERR(params.acl)) @@ -3613,6 +3657,10 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, nla_put_u32(msg, NL80211_STA_INFO_TX_FAILED, sinfo->tx_failed)) goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_EXPECTED_THROUGHPUT) && + nla_put_u32(msg, NL80211_STA_INFO_EXPECTED_THROUGHPUT, + sinfo->expected_throughput)) + goto nla_put_failure; if ((sinfo->filled & STATION_INFO_BEACON_LOSS_COUNT) && nla_put_u32(msg, NL80211_STA_INFO_BEACON_LOSS, sinfo->beacon_loss_count)) @@ -3675,13 +3723,13 @@ static int nl80211_dump_station(struct sk_buff *skb, struct netlink_callback *cb) { struct station_info sinfo; - struct cfg80211_registered_device *dev; + struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; u8 mac_addr[ETH_ALEN]; int sta_idx = cb->args[2]; int err; - err = nl80211_prepare_wdev_dump(skb, cb, &dev, &wdev); + err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev); if (err) return err; @@ -3690,14 +3738,14 @@ static int nl80211_dump_station(struct sk_buff *skb, goto out_err; } - if (!dev->ops->dump_station) { + if (!rdev->ops->dump_station) { err = -EOPNOTSUPP; goto out_err; } while (1) { memset(&sinfo, 0, sizeof(sinfo)); - err = rdev_dump_station(dev, wdev->netdev, sta_idx, + err = rdev_dump_station(rdev, wdev->netdev, sta_idx, mac_addr, &sinfo); if (err == -ENOENT) break; @@ -3707,7 +3755,7 @@ static int nl80211_dump_station(struct sk_buff *skb, if (nl80211_send_station(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - dev, wdev->netdev, mac_addr, + rdev, wdev->netdev, mac_addr, &sinfo) < 0) goto out; @@ -3719,7 +3767,7 @@ static int nl80211_dump_station(struct sk_buff *skb, cb->args[2] = sta_idx; err = skb->len; out_err: - nl80211_finish_wdev_dump(dev); + nl80211_finish_wdev_dump(rdev); return err; } @@ -3766,7 +3814,8 @@ int cfg80211_check_station_change(struct wiphy *wiphy, { if (params->listen_interval != -1) return -EINVAL; - if (params->aid) + if (params->aid && + !(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) return -EINVAL; /* When you run into this, adjust the code below for the new flag */ @@ -4380,18 +4429,18 @@ static int nl80211_dump_mpath(struct sk_buff *skb, struct netlink_callback *cb) { struct mpath_info pinfo; - struct cfg80211_registered_device *dev; + struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; u8 dst[ETH_ALEN]; u8 next_hop[ETH_ALEN]; int path_idx = cb->args[2]; int err; - err = nl80211_prepare_wdev_dump(skb, cb, &dev, &wdev); + err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev); if (err) return err; - if (!dev->ops->dump_mpath) { + if (!rdev->ops->dump_mpath) { err = -EOPNOTSUPP; goto out_err; } @@ -4402,7 +4451,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb, } while (1) { - err = rdev_dump_mpath(dev, wdev->netdev, path_idx, dst, + err = rdev_dump_mpath(rdev, wdev->netdev, path_idx, dst, next_hop, &pinfo); if (err == -ENOENT) break; @@ -4423,7 +4472,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb, cb->args[2] = path_idx; err = skb->len; out_err: - nl80211_finish_wdev_dump(dev); + nl80211_finish_wdev_dump(rdev); return err; } @@ -4663,7 +4712,6 @@ static int parse_reg_rule(struct nlattr *tb[], static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) { - int r; char *data = NULL; enum nl80211_user_reg_hint_type user_reg_hint_type; @@ -4676,11 +4724,6 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) if (unlikely(!rcu_access_pointer(cfg80211_regdomain))) return -EINPROGRESS; - if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) - return -EINVAL; - - data = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]); - if (info->attrs[NL80211_ATTR_USER_REG_HINT_TYPE]) user_reg_hint_type = nla_get_u32(info->attrs[NL80211_ATTR_USER_REG_HINT_TYPE]); @@ -4690,14 +4733,16 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) switch (user_reg_hint_type) { case NL80211_USER_REG_HINT_USER: case NL80211_USER_REG_HINT_CELL_BASE: - break; + if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) + return -EINVAL; + + data = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]); + return regulatory_hint_user(data, user_reg_hint_type); + case NL80211_USER_REG_HINT_INDOOR: + return regulatory_hint_indoor_user(); default: return -EINVAL; } - - r = regulatory_hint_user(data, user_reg_hint_type); - - return r; } static int nl80211_get_mesh_config(struct sk_buff *skb, @@ -5796,7 +5841,8 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, if (wdev->cac_started) return -EBUSY; - err = cfg80211_chandef_dfs_required(wdev->wiphy, &chandef); + err = cfg80211_chandef_dfs_required(wdev->wiphy, &chandef, + wdev->iftype); if (err < 0) return err; @@ -5809,12 +5855,6 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, if (!rdev->ops->start_radar_detection) return -EOPNOTSUPP; - err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, - chandef.chan, CHAN_MODE_SHARED, - BIT(chandef.width)); - if (err) - return err; - cac_time_ms = cfg80211_chandef_dfs_cac_time(&rdev->wiphy, &chandef); if (WARN_ON(!cac_time_ms)) cac_time_ms = IEEE80211_DFS_MIN_CAC_TIME_MS; @@ -5843,6 +5883,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) u8 radar_detect_width = 0; int err; bool need_new_beacon = false; + int len, i; if (!rdev->ops->channel_switch || !(rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH)) @@ -5901,26 +5942,55 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) if (!csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]) return -EINVAL; - params.counter_offset_beacon = - nla_get_u16(csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]); - if (params.counter_offset_beacon >= params.beacon_csa.tail_len) + len = nla_len(csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]); + if (!len || (len % sizeof(u16))) return -EINVAL; - /* sanity check - counters should be the same */ - if (params.beacon_csa.tail[params.counter_offset_beacon] != - params.count) + params.n_counter_offsets_beacon = len / sizeof(u16); + if (rdev->wiphy.max_num_csa_counters && + (params.n_counter_offsets_beacon > + rdev->wiphy.max_num_csa_counters)) return -EINVAL; + params.counter_offsets_beacon = + nla_data(csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]); + + /* sanity checks - counters should fit and be the same */ + for (i = 0; i < params.n_counter_offsets_beacon; i++) { + u16 offset = params.counter_offsets_beacon[i]; + + if (offset >= params.beacon_csa.tail_len) + return -EINVAL; + + if (params.beacon_csa.tail[offset] != params.count) + return -EINVAL; + } + if (csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]) { - params.counter_offset_presp = - nla_get_u16(csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]); - if (params.counter_offset_presp >= - params.beacon_csa.probe_resp_len) + len = nla_len(csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]); + if (!len || (len % sizeof(u16))) return -EINVAL; - if (params.beacon_csa.probe_resp[params.counter_offset_presp] != - params.count) + params.n_counter_offsets_presp = len / sizeof(u16); + if (rdev->wiphy.max_num_csa_counters && + (params.n_counter_offsets_beacon > + rdev->wiphy.max_num_csa_counters)) return -EINVAL; + + params.counter_offsets_presp = + nla_data(csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]); + + /* sanity checks - counters should fit and be the same */ + for (i = 0; i < params.n_counter_offsets_presp; i++) { + u16 offset = params.counter_offsets_presp[i]; + + if (offset >= params.beacon_csa.probe_resp_len) + return -EINVAL; + + if (params.beacon_csa.probe_resp[offset] != + params.count) + return -EINVAL; + } } skip_beacons: @@ -5928,34 +5998,21 @@ skip_beacons: if (err) return err; - if (!cfg80211_reg_can_beacon(&rdev->wiphy, ¶ms.chandef)) + if (!cfg80211_reg_can_beacon(&rdev->wiphy, ¶ms.chandef, + wdev->iftype)) return -EINVAL; - switch (dev->ieee80211_ptr->iftype) { - case NL80211_IFTYPE_AP: - case NL80211_IFTYPE_P2P_GO: - case NL80211_IFTYPE_ADHOC: - case NL80211_IFTYPE_MESH_POINT: - err = cfg80211_chandef_dfs_required(wdev->wiphy, - ¶ms.chandef); - if (err < 0) - return err; - if (err) { - radar_detect_width = BIT(params.chandef.width); - params.radar_required = true; - } - break; - default: - break; - } - - err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, - params.chandef.chan, - CHAN_MODE_SHARED, - radar_detect_width); - if (err) + err = cfg80211_chandef_dfs_required(wdev->wiphy, + ¶ms.chandef, + wdev->iftype); + if (err < 0) return err; + if (err > 0) { + radar_detect_width = BIT(params.chandef.width); + params.radar_required = true; + } + if (info->attrs[NL80211_ATTR_CH_SWITCH_BLOCK_TX]) params.block_tx = true; @@ -6175,12 +6232,12 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) { struct survey_info survey; - struct cfg80211_registered_device *dev; + struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; int survey_idx = cb->args[2]; int res; - res = nl80211_prepare_wdev_dump(skb, cb, &dev, &wdev); + res = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev); if (res) return res; @@ -6189,7 +6246,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, goto out_err; } - if (!dev->ops->dump_survey) { + if (!rdev->ops->dump_survey) { res = -EOPNOTSUPP; goto out_err; } @@ -6197,7 +6254,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, while (1) { struct ieee80211_channel *chan; - res = rdev_dump_survey(dev, wdev->netdev, survey_idx, &survey); + res = rdev_dump_survey(rdev, wdev->netdev, survey_idx, &survey); if (res == -ENOENT) break; if (res) @@ -6209,7 +6266,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, goto out; } - chan = ieee80211_get_channel(&dev->wiphy, + chan = ieee80211_get_channel(&rdev->wiphy, survey.channel->center_freq); if (!chan || chan->flags & IEEE80211_CHAN_DISABLED) { survey_idx++; @@ -6228,7 +6285,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, cb->args[2] = survey_idx; res = skb->len; out_err: - nl80211_finish_wdev_dump(dev); + nl80211_finish_wdev_dump(rdev); return res; } @@ -6704,7 +6761,8 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) if (err) return err; - if (!cfg80211_reg_can_beacon(&rdev->wiphy, &ibss.chandef)) + if (!cfg80211_reg_can_beacon(&rdev->wiphy, &ibss.chandef, + NL80211_IFTYPE_ADHOC)) return -EINVAL; switch (ibss.chandef.width) { @@ -6879,7 +6937,7 @@ struct sk_buff *__cfg80211_alloc_event_skb(struct wiphy *wiphy, int vendor_event_idx, int approxlen, gfp_t gfp) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); const struct nl80211_vendor_cmd_info *info; switch (cmd) { @@ -7297,6 +7355,7 @@ static int nl80211_tdls_mgmt(struct sk_buff *skb, struct genl_info *info) u32 peer_capability = 0; u16 status_code; u8 *peer; + bool initiator; if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) || !rdev->ops->tdls_mgmt) @@ -7313,12 +7372,14 @@ static int nl80211_tdls_mgmt(struct sk_buff *skb, struct genl_info *info) action_code = nla_get_u8(info->attrs[NL80211_ATTR_TDLS_ACTION]); status_code = nla_get_u16(info->attrs[NL80211_ATTR_STATUS_CODE]); dialog_token = nla_get_u8(info->attrs[NL80211_ATTR_TDLS_DIALOG_TOKEN]); + initiator = nla_get_flag(info->attrs[NL80211_ATTR_TDLS_INITIATOR]); if (info->attrs[NL80211_ATTR_TDLS_PEER_CAPABILITY]) peer_capability = nla_get_u32(info->attrs[NL80211_ATTR_TDLS_PEER_CAPABILITY]); return rdev_tdls_mgmt(rdev, dev, peer, action_code, dialog_token, status_code, peer_capability, + initiator, nla_data(info->attrs[NL80211_ATTR_IE]), nla_len(info->attrs[NL80211_ATTR_IE])); } @@ -7767,6 +7828,27 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) if (!chandef.chan && params.offchan) return -EINVAL; + params.buf = nla_data(info->attrs[NL80211_ATTR_FRAME]); + params.len = nla_len(info->attrs[NL80211_ATTR_FRAME]); + + if (info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]) { + int len = nla_len(info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]); + int i; + + if (len % sizeof(u16)) + return -EINVAL; + + params.n_csa_offsets = len / sizeof(u16); + params.csa_offsets = + nla_data(info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]); + + /* check that all the offsets fit the frame */ + for (i = 0; i < params.n_csa_offsets; i++) { + if (params.csa_offsets[i] >= params.len) + return -EINVAL; + } + } + if (!params.dont_wait_for_ack) { msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) @@ -7780,8 +7862,6 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) } } - params.buf = nla_data(info->attrs[NL80211_ATTR_FRAME]); - params.len = nla_len(info->attrs[NL80211_ATTR_FRAME]); params.chan = chandef.chan; err = cfg80211_mlme_mgmt_tx(rdev, wdev, ¶ms, &cookie); if (err) @@ -8478,6 +8558,8 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN], rem) { + u8 *mask_pat; + nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat), nla_len(pat), NULL); err = -EINVAL; @@ -8501,19 +8583,18 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) goto error; new_triggers.patterns[i].pkt_offset = pkt_offset; - new_triggers.patterns[i].mask = - kmalloc(mask_len + pat_len, GFP_KERNEL); - if (!new_triggers.patterns[i].mask) { + mask_pat = kmalloc(mask_len + pat_len, GFP_KERNEL); + if (!mask_pat) { err = -ENOMEM; goto error; } - new_triggers.patterns[i].pattern = - new_triggers.patterns[i].mask + mask_len; - memcpy(new_triggers.patterns[i].mask, - nla_data(pat_tb[NL80211_PKTPAT_MASK]), + new_triggers.patterns[i].mask = mask_pat; + memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_MASK]), mask_len); + mask_pat += mask_len; + new_triggers.patterns[i].pattern = mask_pat; new_triggers.patterns[i].pattern_len = pat_len; - memcpy(new_triggers.patterns[i].pattern, + memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_PATTERN]), pat_len); i++; @@ -8705,6 +8786,8 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, nla_for_each_nested(pat, tb[NL80211_ATTR_COALESCE_RULE_PKT_PATTERN], rem) { + u8 *mask_pat; + nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat), nla_len(pat), NULL); if (!pat_tb[NL80211_PKTPAT_MASK] || @@ -8726,17 +8809,19 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, return -EINVAL; new_rule->patterns[i].pkt_offset = pkt_offset; - new_rule->patterns[i].mask = - kmalloc(mask_len + pat_len, GFP_KERNEL); - if (!new_rule->patterns[i].mask) + mask_pat = kmalloc(mask_len + pat_len, GFP_KERNEL); + if (!mask_pat) return -ENOMEM; - new_rule->patterns[i].pattern = - new_rule->patterns[i].mask + mask_len; - memcpy(new_rule->patterns[i].mask, - nla_data(pat_tb[NL80211_PKTPAT_MASK]), mask_len); + + new_rule->patterns[i].mask = mask_pat; + memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_MASK]), + mask_len); + + mask_pat += mask_len; + new_rule->patterns[i].pattern = mask_pat; new_rule->patterns[i].pattern_len = pat_len; - memcpy(new_rule->patterns[i].pattern, - nla_data(pat_tb[NL80211_PKTPAT_PATTERN]), pat_len); + memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_PATTERN]), + pat_len); i++; } @@ -8981,9 +9066,8 @@ static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info) if (wdev->p2p_started) return 0; - err = cfg80211_can_add_interface(rdev, wdev->iftype); - if (err) - return err; + if (rfkill_blocked(rdev->rfkill)) + return -ERFKILL; err = rdev_start_p2p_device(rdev, wdev); if (err) @@ -9192,7 +9276,7 @@ struct sk_buff *__cfg80211_alloc_reply_skb(struct wiphy *wiphy, enum nl80211_attrs attr, int approxlen) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); if (WARN_ON(!rdev->cur_cmd_info)) return NULL; @@ -9316,7 +9400,7 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, } dev = wdev->netdev; - rdev = wiphy_to_dev(wdev->wiphy); + rdev = wiphy_to_rdev(wdev->wiphy); if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV) { if (!dev) { @@ -10017,16 +10101,20 @@ static const struct genl_ops nl80211_ops[] = { /* notification functions */ -void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev) +void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev, + enum nl80211_commands cmd) { struct sk_buff *msg; struct nl80211_dump_wiphy_state state = {}; + WARN_ON(cmd != NL80211_CMD_NEW_WIPHY && + cmd != NL80211_CMD_DEL_WIPHY); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; - if (nl80211_send_wiphy(rdev, msg, 0, 0, 0, &state) < 0) { + if (nl80211_send_wiphy(rdev, cmd, msg, 0, 0, 0, &state) < 0) { nlmsg_free(msg); return; } @@ -10345,7 +10433,7 @@ void cfg80211_rx_unprot_mlme_mgmt(struct net_device *dev, const u8 *buf, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); const struct ieee80211_mgmt *mgmt = (void *)buf; u32 cmd; @@ -10567,7 +10655,7 @@ void cfg80211_notify_new_peer_candidate(struct net_device *dev, const u8 *addr, const u8* ie, u8 ie_len, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; @@ -10747,7 +10835,7 @@ void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie, unsigned int duration, gfp_t gfp) { struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); trace_cfg80211_ready_on_channel(wdev, cookie, chan, duration); nl80211_send_remain_on_chan_event(NL80211_CMD_REMAIN_ON_CHANNEL, @@ -10761,7 +10849,7 @@ void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie, gfp_t gfp) { struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); trace_cfg80211_ready_on_channel_expired(wdev, cookie, chan); nl80211_send_remain_on_chan_event(NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, @@ -10773,7 +10861,7 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, struct station_info *sinfo, gfp_t gfp) { struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; trace_cfg80211_new_sta(dev, mac_addr, sinfo); @@ -10796,7 +10884,7 @@ EXPORT_SYMBOL(cfg80211_new_sta); void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp) { struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; @@ -10833,7 +10921,7 @@ void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr, gfp_t gfp) { struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; @@ -10868,7 +10956,7 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, const u8 *addr, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; u32 nlportid = ACCESS_ONCE(wdev->ap_unexpected_nlportid); @@ -10988,7 +11076,7 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, const u8 *buf, size_t len, bool ack, gfp_t gfp) { struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct net_device *netdev = wdev->netdev; struct sk_buff *msg; void *hdr; @@ -11032,7 +11120,7 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; struct nlattr *pinfoattr; void *hdr; @@ -11124,7 +11212,7 @@ void cfg80211_gtk_rekey_notify(struct net_device *dev, const u8 *bssid, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); trace_cfg80211_gtk_rekey_notify(dev, bssid); nl80211_gtk_rekey_notify(rdev, dev, bssid, replay_ctr, gfp); @@ -11182,7 +11270,7 @@ void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); trace_cfg80211_pmksa_candidate_notify(dev, index, bssid, preauth); nl80211_pmksa_candidate_notify(rdev, dev, index, bssid, preauth, gfp); @@ -11229,7 +11317,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); ASSERT_WDEV_LOCK(wdev); @@ -11253,7 +11341,7 @@ void cfg80211_cqm_txe_notify(struct net_device *dev, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; struct nlattr *pinfoattr; void *hdr; @@ -11353,7 +11441,7 @@ void cfg80211_cqm_pktloss_notify(struct net_device *dev, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; struct nlattr *pinfoattr; void *hdr; @@ -11400,7 +11488,7 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr, u64 cookie, bool acked, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; @@ -11440,7 +11528,7 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy, const u8 *frame, size_t len, int freq, int sig_dbm) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; struct cfg80211_beacon_registration *reg; @@ -11487,7 +11575,7 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, struct cfg80211_wowlan_wakeup *wakeup, gfp_t gfp) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; int size = 200; @@ -11597,7 +11685,7 @@ void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer, u16 reason_code, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; @@ -11649,9 +11737,15 @@ static int nl80211_netlink_notify(struct notifier_block * nb, rcu_read_lock(); list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) { - list_for_each_entry_rcu(wdev, &rdev->wdev_list, list) + bool schedule_destroy_work = false; + + list_for_each_entry_rcu(wdev, &rdev->wdev_list, list) { cfg80211_mlme_unregister_socket(wdev, notify->portid); + if (wdev->owner_nlportid == notify->portid) + schedule_destroy_work = true; + } + spin_lock_bh(&rdev->beacon_registrations_lock); list_for_each_entry_safe(reg, tmp, &rdev->beacon_registrations, list) { @@ -11662,11 +11756,24 @@ static int nl80211_netlink_notify(struct notifier_block * nb, } } spin_unlock_bh(&rdev->beacon_registrations_lock); + + if (schedule_destroy_work) { + struct cfg80211_iface_destroy *destroy; + + destroy = kzalloc(sizeof(*destroy), GFP_ATOMIC); + if (destroy) { + destroy->nlportid = notify->portid; + spin_lock(&rdev->destroy_list_lock); + list_add(&destroy->list, &rdev->destroy_list); + spin_unlock(&rdev->destroy_list_lock); + schedule_work(&rdev->destroy_work); + } + } } rcu_read_unlock(); - return NOTIFY_DONE; + return NOTIFY_OK; } static struct notifier_block nl80211_netlink_notifier = { @@ -11677,7 +11784,7 @@ void cfg80211_ft_event(struct net_device *netdev, struct cfg80211_ft_event_params *ft_event) { struct wiphy *wiphy = netdev->ieee80211_ptr->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; @@ -11724,7 +11831,7 @@ void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp) void *hdr; u32 nlportid; - rdev = wiphy_to_dev(wdev->wiphy); + rdev = wiphy_to_rdev(wdev->wiphy); if (!rdev->crit_proto_nlportid) return; @@ -11759,7 +11866,7 @@ EXPORT_SYMBOL(cfg80211_crit_proto_stopped); void nl80211_send_ap_stopped(struct wireless_dev *wdev) { struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 1e6df9630f42..49c9a482dd12 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -5,7 +5,8 @@ int nl80211_init(void); void nl80211_exit(void); -void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev); +void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev, + enum nl80211_commands cmd); void nl80211_send_scan_start(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); struct sk_buff *nl80211_build_scan_msg(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 74d97d33c938..56c2240c30ce 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -199,7 +199,7 @@ static inline int rdev_change_station(struct cfg80211_registered_device *rdev, } static inline int rdev_get_station(struct cfg80211_registered_device *rdev, - struct net_device *dev, u8 *mac, + struct net_device *dev, const u8 *mac, struct station_info *sinfo) { int ret; @@ -714,25 +714,6 @@ static inline int rdev_get_antenna(struct cfg80211_registered_device *rdev, return ret; } -static inline int rdev_set_ringparam(struct cfg80211_registered_device *rdev, - u32 tx, u32 rx) -{ - int ret; - trace_rdev_set_ringparam(&rdev->wiphy, tx, rx); - ret = rdev->ops->set_ringparam(&rdev->wiphy, tx, rx); - trace_rdev_return_int(&rdev->wiphy, ret); - return ret; -} - -static inline void rdev_get_ringparam(struct cfg80211_registered_device *rdev, - u32 *tx, u32 *tx_max, u32 *rx, - u32 *rx_max) -{ - trace_rdev_get_ringparam(&rdev->wiphy); - rdev->ops->get_ringparam(&rdev->wiphy, tx, tx_max, rx, rx_max); - trace_rdev_return_void_tx_rx(&rdev->wiphy, *tx, *tx_max, *rx, *rx_max); -} - static inline int rdev_sched_scan_start(struct cfg80211_registered_device *rdev, struct net_device *dev, @@ -770,15 +751,15 @@ static inline int rdev_tdls_mgmt(struct cfg80211_registered_device *rdev, struct net_device *dev, u8 *peer, u8 action_code, u8 dialog_token, u16 status_code, u32 peer_capability, - const u8 *buf, size_t len) + bool initiator, const u8 *buf, size_t len) { int ret; trace_rdev_tdls_mgmt(&rdev->wiphy, dev, peer, action_code, dialog_token, status_code, peer_capability, - buf, len); + initiator, buf, len); ret = rdev->ops->tdls_mgmt(&rdev->wiphy, dev, peer, action_code, dialog_token, status_code, peer_capability, - buf, len); + initiator, buf, len); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } @@ -816,35 +797,6 @@ static inline int rdev_set_noack_map(struct cfg80211_registered_device *rdev, } static inline int -rdev_get_et_sset_count(struct cfg80211_registered_device *rdev, - struct net_device *dev, int sset) -{ - int ret; - trace_rdev_get_et_sset_count(&rdev->wiphy, dev, sset); - ret = rdev->ops->get_et_sset_count(&rdev->wiphy, dev, sset); - trace_rdev_return_int(&rdev->wiphy, ret); - return ret; -} - -static inline void rdev_get_et_stats(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ethtool_stats *stats, u64 *data) -{ - trace_rdev_get_et_stats(&rdev->wiphy, dev); - rdev->ops->get_et_stats(&rdev->wiphy, dev, stats, data); - trace_rdev_return_void(&rdev->wiphy); -} - -static inline void rdev_get_et_strings(struct cfg80211_registered_device *rdev, - struct net_device *dev, u32 sset, - u8 *data) -{ - trace_rdev_get_et_strings(&rdev->wiphy, dev, sset); - rdev->ops->get_et_strings(&rdev->wiphy, dev, sset, data); - trace_rdev_return_void(&rdev->wiphy); -} - -static inline int rdev_get_channel(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, struct cfg80211_chan_def *chandef) @@ -950,4 +902,17 @@ static inline int rdev_set_qos_map(struct cfg80211_registered_device *rdev, return ret; } +static inline int +rdev_set_ap_chanwidth(struct cfg80211_registered_device *rdev, + struct net_device *dev, struct cfg80211_chan_def *chandef) +{ + int ret; + + trace_rdev_set_ap_chanwidth(&rdev->wiphy, dev, chandef); + ret = rdev->ops->set_ap_chanwidth(&rdev->wiphy, dev, chandef); + trace_rdev_return_int(&rdev->wiphy, ret); + + return ret; +} + #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/reg.c b/net/wireless/reg.c index f59aaac586f8..1afdf45db38f 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -65,11 +65,26 @@ #define REG_DBG_PRINT(args...) #endif +/** + * enum reg_request_treatment - regulatory request treatment + * + * @REG_REQ_OK: continue processing the regulatory request + * @REG_REQ_IGNORE: ignore the regulatory request + * @REG_REQ_INTERSECT: the regulatory domain resulting from this request should + * be intersected with the current one. + * @REG_REQ_ALREADY_SET: the regulatory request will not change the current + * regulatory settings, and no further processing is required. + * @REG_REQ_USER_HINT_HANDLED: a non alpha2 user hint was handled and no + * further processing is required, i.e., not need to update last_request + * etc. This should be used for user hints that do not provide an alpha2 + * but some other type of regulatory hint, i.e., indoor operation. + */ enum reg_request_treatment { REG_REQ_OK, REG_REQ_IGNORE, REG_REQ_INTERSECT, REG_REQ_ALREADY_SET, + REG_REQ_USER_HINT_HANDLED, }; static struct regulatory_request core_request_world = { @@ -106,6 +121,14 @@ const struct ieee80211_regdomain __rcu *cfg80211_regdomain; */ static int reg_num_devs_support_basehint; +/* + * State variable indicating if the platform on which the devices + * are attached is operating in an indoor environment. The state variable + * is relevant for all registered devices. + * (protected by RTNL) + */ +static bool reg_is_indoor; + static const struct ieee80211_regdomain *get_cfg80211_regdom(void) { return rtnl_dereference(cfg80211_regdomain); @@ -240,8 +263,16 @@ static char user_alpha2[2]; module_param(ieee80211_regdom, charp, 0444); MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code"); -static void reg_free_request(struct regulatory_request *lr) +static void reg_free_request(struct regulatory_request *request) { + if (request != get_last_request()) + kfree(request); +} + +static void reg_free_last_request(void) +{ + struct regulatory_request *lr = get_last_request(); + if (lr != &core_request_world && lr) kfree_rcu(lr, rcu_head); } @@ -254,7 +285,7 @@ static void reg_update_last_request(struct regulatory_request *request) if (lr == request) return; - reg_free_request(lr); + reg_free_last_request(); rcu_assign_pointer(last_request, request); } @@ -873,6 +904,8 @@ static u32 map_regdom_flags(u32 rd_flags) channel_flags |= IEEE80211_CHAN_RADAR; if (rd_flags & NL80211_RRF_NO_OFDM) channel_flags |= IEEE80211_CHAN_NO_OFDM; + if (rd_flags & NL80211_RRF_NO_OUTDOOR) + channel_flags |= IEEE80211_CHAN_INDOOR_ONLY; return channel_flags; } @@ -1071,6 +1104,13 @@ static void handle_channel(struct wiphy *wiphy, (int) MBI_TO_DBI(power_rule->max_antenna_gain); chan->max_reg_power = chan->max_power = chan->orig_mpwr = (int) MBM_TO_DBM(power_rule->max_eirp); + + if (chan->flags & IEEE80211_CHAN_RADAR) { + chan->dfs_cac_ms = IEEE80211_DFS_MIN_CAC_TIME_MS; + if (reg_rule->dfs_cac_ms) + chan->dfs_cac_ms = reg_rule->dfs_cac_ms; + } + return; } @@ -1126,12 +1166,19 @@ static bool reg_request_cell_base(struct regulatory_request *request) return request->user_reg_hint_type == NL80211_USER_REG_HINT_CELL_BASE; } +static bool reg_request_indoor(struct regulatory_request *request) +{ + if (request->initiator != NL80211_REGDOM_SET_BY_USER) + return false; + return request->user_reg_hint_type == NL80211_USER_REG_HINT_INDOOR; +} + bool reg_last_request_cell_base(void) { return reg_request_cell_base(get_last_request()); } -#ifdef CONFIG_CFG80211_CERTIFICATION_ONUS +#ifdef CONFIG_CFG80211_REG_CELLULAR_HINTS /* Core specific check */ static enum reg_request_treatment reg_ignore_cell_hint(struct regulatory_request *pending_request) @@ -1568,6 +1615,11 @@ __reg_process_hint_user(struct regulatory_request *user_request) { struct regulatory_request *lr = get_last_request(); + if (reg_request_indoor(user_request)) { + reg_is_indoor = true; + return REG_REQ_USER_HINT_HANDLED; + } + if (reg_request_cell_base(user_request)) return reg_ignore_cell_hint(user_request); @@ -1615,8 +1667,9 @@ reg_process_hint_user(struct regulatory_request *user_request) treatment = __reg_process_hint_user(user_request); if (treatment == REG_REQ_IGNORE || - treatment == REG_REQ_ALREADY_SET) { - kfree(user_request); + treatment == REG_REQ_ALREADY_SET || + treatment == REG_REQ_USER_HINT_HANDLED) { + reg_free_request(user_request); return treatment; } @@ -1676,14 +1729,15 @@ reg_process_hint_driver(struct wiphy *wiphy, case REG_REQ_OK: break; case REG_REQ_IGNORE: - kfree(driver_request); + case REG_REQ_USER_HINT_HANDLED: + reg_free_request(driver_request); return treatment; case REG_REQ_INTERSECT: /* fall through */ case REG_REQ_ALREADY_SET: regd = reg_copy_regd(get_cfg80211_regdom()); if (IS_ERR(regd)) { - kfree(driver_request); + reg_free_request(driver_request); return REG_REQ_IGNORE; } rcu_assign_pointer(wiphy->regd, regd); @@ -1775,12 +1829,13 @@ reg_process_hint_country_ie(struct wiphy *wiphy, case REG_REQ_OK: break; case REG_REQ_IGNORE: + case REG_REQ_USER_HINT_HANDLED: /* fall through */ case REG_REQ_ALREADY_SET: - kfree(country_ie_request); + reg_free_request(country_ie_request); return treatment; case REG_REQ_INTERSECT: - kfree(country_ie_request); + reg_free_request(country_ie_request); /* * This doesn't happen yet, not sure we * ever want to support it for this case. @@ -1813,7 +1868,8 @@ static void reg_process_hint(struct regulatory_request *reg_request) case NL80211_REGDOM_SET_BY_USER: treatment = reg_process_hint_user(reg_request); if (treatment == REG_REQ_IGNORE || - treatment == REG_REQ_ALREADY_SET) + treatment == REG_REQ_ALREADY_SET || + treatment == REG_REQ_USER_HINT_HANDLED) return; queue_delayed_work(system_power_efficient_wq, ®_timeout, msecs_to_jiffies(3142)); @@ -1841,7 +1897,7 @@ static void reg_process_hint(struct regulatory_request *reg_request) return; out_free: - kfree(reg_request); + reg_free_request(reg_request); } /* @@ -1857,7 +1913,7 @@ static void reg_process_pending_hints(void) /* When last_request->processed becomes true this will be rescheduled */ if (lr && !lr->processed) { - REG_DBG_PRINT("Pending regulatory request, waiting for it to be processed...\n"); + reg_process_hint(lr); return; } @@ -1967,6 +2023,22 @@ int regulatory_hint_user(const char *alpha2, return 0; } +int regulatory_hint_indoor_user(void) +{ + struct regulatory_request *request; + + request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); + if (!request) + return -ENOMEM; + + request->wiphy_idx = WIPHY_IDX_INVALID; + request->initiator = NL80211_REGDOM_SET_BY_USER; + request->user_reg_hint_type = NL80211_USER_REG_HINT_INDOOR; + queue_regulatory_request(request); + + return 0; +} + /* Driver hints */ int regulatory_hint(struct wiphy *wiphy, const char *alpha2) { @@ -2134,6 +2206,8 @@ static void restore_regulatory_settings(bool reset_user) ASSERT_RTNL(); + reg_is_indoor = false; + reset_regdomains(true, &world_regdom); restore_alpha2(alpha2, reset_user); @@ -2594,7 +2668,7 @@ void wiphy_regulatory_deregister(struct wiphy *wiphy) reg_num_devs_support_basehint--; rcu_free_regdom(get_wiphy_regdom(wiphy)); - rcu_assign_pointer(wiphy->regd, NULL); + RCU_INIT_POINTER(wiphy->regd, NULL); if (lr) request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx); @@ -2614,6 +2688,40 @@ static void reg_timeout_work(struct work_struct *work) rtnl_unlock(); } +/* + * See http://www.fcc.gov/document/5-ghz-unlicensed-spectrum-unii, for + * UNII band definitions + */ +int cfg80211_get_unii(int freq) +{ + /* UNII-1 */ + if (freq >= 5150 && freq <= 5250) + return 0; + + /* UNII-2A */ + if (freq > 5250 && freq <= 5350) + return 1; + + /* UNII-2B */ + if (freq > 5350 && freq <= 5470) + return 2; + + /* UNII-2C */ + if (freq > 5470 && freq <= 5725) + return 3; + + /* UNII-3 */ + if (freq > 5725 && freq <= 5825) + return 4; + + return -EINVAL; +} + +bool regulatory_indoor_allowed(void) +{ + return reg_is_indoor; +} + int __init regulatory_init(void) { int err = 0; diff --git a/net/wireless/reg.h b/net/wireless/reg.h index 37c180df34b7..5e48031ccb9a 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -25,6 +25,7 @@ enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy); int regulatory_hint_user(const char *alpha2, enum nl80211_user_reg_hint_type user_reg_hint_type); +int regulatory_hint_indoor_user(void); void wiphy_regulatory_register(struct wiphy *wiphy); void wiphy_regulatory_deregister(struct wiphy *wiphy); @@ -104,4 +105,21 @@ void regulatory_hint_country_ie(struct wiphy *wiphy, */ void regulatory_hint_disconnect(void); +/** + * cfg80211_get_unii - get the U-NII band for the frequency + * @freq: the frequency for which we want to get the UNII band. + + * Get a value specifying the U-NII band frequency belongs to. + * U-NII bands are defined by the FCC in C.F.R 47 part 15. + * + * Returns -EINVAL if freq is invalid, 0 for UNII-1, 1 for UNII-2A, + * 2 for UNII-2B, 3 for UNII-2C and 4 for UNII-3. + */ +int cfg80211_get_unii(int freq); + +/** + * regulatory_indoor_allowed - is indoor operation allowed + */ +bool regulatory_indoor_allowed(void); + #endif /* __NET_WIRELESS_REG_H */ diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 7d09a712cb1f..0798c62e6085 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -81,10 +81,10 @@ static void bss_free(struct cfg80211_internal_bss *bss) kfree(bss); } -static inline void bss_ref_get(struct cfg80211_registered_device *dev, +static inline void bss_ref_get(struct cfg80211_registered_device *rdev, struct cfg80211_internal_bss *bss) { - lockdep_assert_held(&dev->bss_lock); + lockdep_assert_held(&rdev->bss_lock); bss->refcount++; if (bss->pub.hidden_beacon_bss) { @@ -95,10 +95,10 @@ static inline void bss_ref_get(struct cfg80211_registered_device *dev, } } -static inline void bss_ref_put(struct cfg80211_registered_device *dev, +static inline void bss_ref_put(struct cfg80211_registered_device *rdev, struct cfg80211_internal_bss *bss) { - lockdep_assert_held(&dev->bss_lock); + lockdep_assert_held(&rdev->bss_lock); if (bss->pub.hidden_beacon_bss) { struct cfg80211_internal_bss *hbss; @@ -114,10 +114,10 @@ static inline void bss_ref_put(struct cfg80211_registered_device *dev, bss_free(bss); } -static bool __cfg80211_unlink_bss(struct cfg80211_registered_device *dev, +static bool __cfg80211_unlink_bss(struct cfg80211_registered_device *rdev, struct cfg80211_internal_bss *bss) { - lockdep_assert_held(&dev->bss_lock); + lockdep_assert_held(&rdev->bss_lock); if (!list_empty(&bss->hidden_list)) { /* @@ -134,31 +134,31 @@ static bool __cfg80211_unlink_bss(struct cfg80211_registered_device *dev, } list_del_init(&bss->list); - rb_erase(&bss->rbn, &dev->bss_tree); - bss_ref_put(dev, bss); + rb_erase(&bss->rbn, &rdev->bss_tree); + bss_ref_put(rdev, bss); return true; } -static void __cfg80211_bss_expire(struct cfg80211_registered_device *dev, +static void __cfg80211_bss_expire(struct cfg80211_registered_device *rdev, unsigned long expire_time) { struct cfg80211_internal_bss *bss, *tmp; bool expired = false; - lockdep_assert_held(&dev->bss_lock); + lockdep_assert_held(&rdev->bss_lock); - list_for_each_entry_safe(bss, tmp, &dev->bss_list, list) { + list_for_each_entry_safe(bss, tmp, &rdev->bss_list, list) { if (atomic_read(&bss->hold)) continue; if (!time_after(expire_time, bss->ts)) continue; - if (__cfg80211_unlink_bss(dev, bss)) + if (__cfg80211_unlink_bss(rdev, bss)) expired = true; } if (expired) - dev->bss_generation++; + rdev->bss_generation++; } void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, @@ -238,11 +238,11 @@ void __cfg80211_scan_done(struct work_struct *wk) void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted) { trace_cfg80211_scan_done(request, aborted); - WARN_ON(request != wiphy_to_dev(request->wiphy)->scan_req); + WARN_ON(request != wiphy_to_rdev(request->wiphy)->scan_req); request->aborted = aborted; request->notified = true; - queue_work(cfg80211_wq, &wiphy_to_dev(request->wiphy)->scan_done_wk); + queue_work(cfg80211_wq, &wiphy_to_rdev(request->wiphy)->scan_done_wk); } EXPORT_SYMBOL(cfg80211_scan_done); @@ -278,20 +278,28 @@ void cfg80211_sched_scan_results(struct wiphy *wiphy) { trace_cfg80211_sched_scan_results(wiphy); /* ignore if we're not scanning */ - if (wiphy_to_dev(wiphy)->sched_scan_req) + if (wiphy_to_rdev(wiphy)->sched_scan_req) queue_work(cfg80211_wq, - &wiphy_to_dev(wiphy)->sched_scan_results_wk); + &wiphy_to_rdev(wiphy)->sched_scan_results_wk); } EXPORT_SYMBOL(cfg80211_sched_scan_results); -void cfg80211_sched_scan_stopped(struct wiphy *wiphy) +void cfg80211_sched_scan_stopped_rtnl(struct wiphy *wiphy) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + + ASSERT_RTNL(); trace_cfg80211_sched_scan_stopped(wiphy); - rtnl_lock(); __cfg80211_stop_sched_scan(rdev, true); +} +EXPORT_SYMBOL(cfg80211_sched_scan_stopped_rtnl); + +void cfg80211_sched_scan_stopped(struct wiphy *wiphy) +{ + rtnl_lock(); + cfg80211_sched_scan_stopped_rtnl(wiphy); rtnl_unlock(); } EXPORT_SYMBOL(cfg80211_sched_scan_stopped); @@ -322,21 +330,21 @@ int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev, return 0; } -void cfg80211_bss_age(struct cfg80211_registered_device *dev, +void cfg80211_bss_age(struct cfg80211_registered_device *rdev, unsigned long age_secs) { struct cfg80211_internal_bss *bss; unsigned long age_jiffies = msecs_to_jiffies(age_secs * MSEC_PER_SEC); - spin_lock_bh(&dev->bss_lock); - list_for_each_entry(bss, &dev->bss_list, list) + spin_lock_bh(&rdev->bss_lock); + list_for_each_entry(bss, &rdev->bss_list, list) bss->ts -= age_jiffies; - spin_unlock_bh(&dev->bss_lock); + spin_unlock_bh(&rdev->bss_lock); } -void cfg80211_bss_expire(struct cfg80211_registered_device *dev) +void cfg80211_bss_expire(struct cfg80211_registered_device *rdev) { - __cfg80211_bss_expire(dev, jiffies - IEEE80211_SCAN_RESULT_EXPIRE); + __cfg80211_bss_expire(rdev, jiffies - IEEE80211_SCAN_RESULT_EXPIRE); } const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len) @@ -526,32 +534,34 @@ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, const u8 *ssid, size_t ssid_len, u16 capa_mask, u16 capa_val) { - struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct cfg80211_internal_bss *bss, *res = NULL; unsigned long now = jiffies; trace_cfg80211_get_bss(wiphy, channel, bssid, ssid, ssid_len, capa_mask, capa_val); - spin_lock_bh(&dev->bss_lock); + spin_lock_bh(&rdev->bss_lock); - list_for_each_entry(bss, &dev->bss_list, list) { + list_for_each_entry(bss, &rdev->bss_list, list) { if ((bss->pub.capability & capa_mask) != capa_val) continue; if (channel && bss->pub.channel != channel) continue; + if (!is_valid_ether_addr(bss->pub.bssid)) + continue; /* Don't get expired BSS structs */ if (time_after(now, bss->ts + IEEE80211_SCAN_RESULT_EXPIRE) && !atomic_read(&bss->hold)) continue; if (is_bss(&bss->pub, bssid, ssid, ssid_len)) { res = bss; - bss_ref_get(dev, res); + bss_ref_get(rdev, res); break; } } - spin_unlock_bh(&dev->bss_lock); + spin_unlock_bh(&rdev->bss_lock); if (!res) return NULL; trace_cfg80211_return_bss(&res->pub); @@ -559,10 +569,10 @@ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_get_bss); -static void rb_insert_bss(struct cfg80211_registered_device *dev, +static void rb_insert_bss(struct cfg80211_registered_device *rdev, struct cfg80211_internal_bss *bss) { - struct rb_node **p = &dev->bss_tree.rb_node; + struct rb_node **p = &rdev->bss_tree.rb_node; struct rb_node *parent = NULL; struct cfg80211_internal_bss *tbss; int cmp; @@ -585,15 +595,15 @@ static void rb_insert_bss(struct cfg80211_registered_device *dev, } rb_link_node(&bss->rbn, parent, p); - rb_insert_color(&bss->rbn, &dev->bss_tree); + rb_insert_color(&bss->rbn, &rdev->bss_tree); } static struct cfg80211_internal_bss * -rb_find_bss(struct cfg80211_registered_device *dev, +rb_find_bss(struct cfg80211_registered_device *rdev, struct cfg80211_internal_bss *res, enum bss_compare_mode mode) { - struct rb_node *n = dev->bss_tree.rb_node; + struct rb_node *n = rdev->bss_tree.rb_node; struct cfg80211_internal_bss *bss; int r; @@ -612,7 +622,7 @@ rb_find_bss(struct cfg80211_registered_device *dev, return NULL; } -static bool cfg80211_combine_bsses(struct cfg80211_registered_device *dev, +static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev, struct cfg80211_internal_bss *new) { const struct cfg80211_bss_ies *ies; @@ -642,7 +652,7 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *dev, /* This is the bad part ... */ - list_for_each_entry(bss, &dev->bss_list, list) { + list_for_each_entry(bss, &rdev->bss_list, list) { if (!ether_addr_equal(bss->pub.bssid, new->pub.bssid)) continue; if (bss->pub.channel != new->pub.channel) @@ -676,7 +686,7 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *dev, /* Returned bss is reference counted and must be cleaned up appropriately. */ static struct cfg80211_internal_bss * -cfg80211_bss_update(struct cfg80211_registered_device *dev, +cfg80211_bss_update(struct cfg80211_registered_device *rdev, struct cfg80211_internal_bss *tmp, bool signal_valid) { @@ -687,14 +697,14 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, tmp->ts = jiffies; - spin_lock_bh(&dev->bss_lock); + spin_lock_bh(&rdev->bss_lock); if (WARN_ON(!rcu_access_pointer(tmp->pub.ies))) { - spin_unlock_bh(&dev->bss_lock); + spin_unlock_bh(&rdev->bss_lock); return NULL; } - found = rb_find_bss(dev, tmp, BSS_CMP_REGULAR); + found = rb_find_bss(rdev, tmp, BSS_CMP_REGULAR); if (found) { /* Update IEs */ @@ -781,7 +791,7 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, * is allocated on the stack since it's not needed in the * more common case of an update */ - new = kzalloc(sizeof(*new) + dev->wiphy.bss_priv_size, + new = kzalloc(sizeof(*new) + rdev->wiphy.bss_priv_size, GFP_ATOMIC); if (!new) { ies = (void *)rcu_dereference(tmp->pub.beacon_ies); @@ -797,9 +807,9 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, INIT_LIST_HEAD(&new->hidden_list); if (rcu_access_pointer(tmp->pub.proberesp_ies)) { - hidden = rb_find_bss(dev, tmp, BSS_CMP_HIDE_ZLEN); + hidden = rb_find_bss(rdev, tmp, BSS_CMP_HIDE_ZLEN); if (!hidden) - hidden = rb_find_bss(dev, tmp, + hidden = rb_find_bss(rdev, tmp, BSS_CMP_HIDE_NUL); if (hidden) { new->pub.hidden_beacon_bss = &hidden->pub; @@ -816,24 +826,24 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, * expensive search for any probe responses that should * be grouped with this beacon for updates ... */ - if (!cfg80211_combine_bsses(dev, new)) { + if (!cfg80211_combine_bsses(rdev, new)) { kfree(new); goto drop; } } - list_add_tail(&new->list, &dev->bss_list); - rb_insert_bss(dev, new); + list_add_tail(&new->list, &rdev->bss_list); + rb_insert_bss(rdev, new); found = new; } - dev->bss_generation++; - bss_ref_get(dev, found); - spin_unlock_bh(&dev->bss_lock); + rdev->bss_generation++; + bss_ref_get(rdev, found); + spin_unlock_bh(&rdev->bss_lock); return found; drop: - spin_unlock_bh(&dev->bss_lock); + spin_unlock_bh(&rdev->bss_lock); return NULL; } @@ -881,6 +891,7 @@ cfg80211_inform_bss_width(struct wiphy *wiphy, struct cfg80211_bss_ies *ies; struct ieee80211_channel *channel; struct cfg80211_internal_bss tmp = {}, *res; + bool signal_valid; if (WARN_ON(!wiphy)) return NULL; @@ -917,8 +928,9 @@ cfg80211_inform_bss_width(struct wiphy *wiphy, rcu_assign_pointer(tmp.pub.beacon_ies, ies); rcu_assign_pointer(tmp.pub.ies, ies); - res = cfg80211_bss_update(wiphy_to_dev(wiphy), &tmp, - rx_channel == channel); + signal_valid = abs(rx_channel->center_freq - channel->center_freq) <= + wiphy->max_adj_channel_rssi_comp; + res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid); if (!res) return NULL; @@ -942,6 +954,7 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy, struct cfg80211_internal_bss tmp = {}, *res; struct cfg80211_bss_ies *ies; struct ieee80211_channel *channel; + bool signal_valid; size_t ielen = len - offsetof(struct ieee80211_mgmt, u.probe_resp.variable); @@ -989,8 +1002,9 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy, tmp.pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int); tmp.pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info); - res = cfg80211_bss_update(wiphy_to_dev(wiphy), &tmp, - rx_channel == channel); + signal_valid = abs(rx_channel->center_freq - channel->center_freq) <= + wiphy->max_adj_channel_rssi_comp; + res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid); if (!res) return NULL; @@ -1005,7 +1019,7 @@ EXPORT_SYMBOL(cfg80211_inform_bss_width_frame); void cfg80211_ref_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) { - struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct cfg80211_internal_bss *bss; if (!pub) @@ -1013,15 +1027,15 @@ void cfg80211_ref_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) bss = container_of(pub, struct cfg80211_internal_bss, pub); - spin_lock_bh(&dev->bss_lock); - bss_ref_get(dev, bss); - spin_unlock_bh(&dev->bss_lock); + spin_lock_bh(&rdev->bss_lock); + bss_ref_get(rdev, bss); + spin_unlock_bh(&rdev->bss_lock); } EXPORT_SYMBOL(cfg80211_ref_bss); void cfg80211_put_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) { - struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct cfg80211_internal_bss *bss; if (!pub) @@ -1029,15 +1043,15 @@ void cfg80211_put_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) bss = container_of(pub, struct cfg80211_internal_bss, pub); - spin_lock_bh(&dev->bss_lock); - bss_ref_put(dev, bss); - spin_unlock_bh(&dev->bss_lock); + spin_lock_bh(&rdev->bss_lock); + bss_ref_put(rdev, bss); + spin_unlock_bh(&rdev->bss_lock); } EXPORT_SYMBOL(cfg80211_put_bss); void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) { - struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct cfg80211_internal_bss *bss; if (WARN_ON(!pub)) @@ -1045,12 +1059,12 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) bss = container_of(pub, struct cfg80211_internal_bss, pub); - spin_lock_bh(&dev->bss_lock); + spin_lock_bh(&rdev->bss_lock); if (!list_empty(&bss->list)) { - if (__cfg80211_unlink_bss(dev, bss)) - dev->bss_generation++; + if (__cfg80211_unlink_bss(rdev, bss)) + rdev->bss_generation++; } - spin_unlock_bh(&dev->bss_lock); + spin_unlock_bh(&rdev->bss_lock); } EXPORT_SYMBOL(cfg80211_unlink_bss); @@ -1067,7 +1081,7 @@ cfg80211_get_dev_from_ifindex(struct net *net, int ifindex) if (!dev) return ERR_PTR(-ENODEV); if (dev->ieee80211_ptr) - rdev = wiphy_to_dev(dev->ieee80211_ptr->wiphy); + rdev = wiphy_to_rdev(dev->ieee80211_ptr->wiphy); else rdev = ERR_PTR(-ENODEV); dev_put(dev); @@ -1147,7 +1161,11 @@ int cfg80211_wext_siwscan(struct net_device *dev, int k; int wiphy_freq = wiphy->bands[band]->channels[j].center_freq; for (k = 0; k < wreq->num_channels; k++) { - int wext_freq = cfg80211_wext_freq(wiphy, &wreq->channel_list[k]); + struct iw_freq *freq = + &wreq->channel_list[k]; + int wext_freq = + cfg80211_wext_freq(freq); + if (wext_freq == wiphy_freq) goto wext_freq_found; } @@ -1459,7 +1477,7 @@ ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info, } -static int ieee80211_scan_results(struct cfg80211_registered_device *dev, +static int ieee80211_scan_results(struct cfg80211_registered_device *rdev, struct iw_request_info *info, char *buf, size_t len) { @@ -1467,18 +1485,18 @@ static int ieee80211_scan_results(struct cfg80211_registered_device *dev, char *end_buf = buf + len; struct cfg80211_internal_bss *bss; - spin_lock_bh(&dev->bss_lock); - cfg80211_bss_expire(dev); + spin_lock_bh(&rdev->bss_lock); + cfg80211_bss_expire(rdev); - list_for_each_entry(bss, &dev->bss_list, list) { + list_for_each_entry(bss, &rdev->bss_list, list) { if (buf + len - current_ev <= IW_EV_ADDR_LEN) { - spin_unlock_bh(&dev->bss_lock); + spin_unlock_bh(&rdev->bss_lock); return -E2BIG; } - current_ev = ieee80211_bss(&dev->wiphy, info, bss, + current_ev = ieee80211_bss(&rdev->wiphy, info, bss, current_ev, end_buf); } - spin_unlock_bh(&dev->bss_lock); + spin_unlock_bh(&rdev->bss_lock); return current_ev - buf; } diff --git a/net/wireless/sme.c b/net/wireless/sme.c index acdcb4a81817..8bbeeb302216 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -59,7 +59,7 @@ static void cfg80211_sme_free(struct wireless_dev *wdev) static int cfg80211_conn_scan(struct wireless_dev *wdev) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_scan_request *request; int n_channels, err; @@ -130,7 +130,7 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) static int cfg80211_conn_do_work(struct wireless_dev *wdev) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_connect_params *params; struct cfg80211_assoc_request req = {}; int err; @@ -149,7 +149,8 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) case CFG80211_CONN_SCAN_AGAIN: return cfg80211_conn_scan(wdev); case CFG80211_CONN_AUTHENTICATE_NEXT: - BUG_ON(!rdev->ops->auth); + if (WARN_ON(!rdev->ops->auth)) + return -EOPNOTSUPP; wdev->conn->state = CFG80211_CONN_AUTHENTICATING; return cfg80211_mlme_auth(rdev, wdev->netdev, params->channel, params->auth_type, @@ -161,7 +162,8 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) case CFG80211_CONN_AUTH_FAILED: return -ENOTCONN; case CFG80211_CONN_ASSOCIATE_NEXT: - BUG_ON(!rdev->ops->assoc); + if (WARN_ON(!rdev->ops->assoc)) + return -EOPNOTSUPP; wdev->conn->state = CFG80211_CONN_ASSOCIATING; if (wdev->conn->prev_bssid_valid) req.prev_bssid = wdev->conn->prev_bssid; @@ -234,7 +236,6 @@ void cfg80211_conn_work(struct work_struct *work) NULL, 0, NULL, 0, WLAN_STATUS_UNSPECIFIED_FAILURE, false, NULL); - cfg80211_sme_free(wdev); } wdev_unlock(wdev); } @@ -245,7 +246,7 @@ void cfg80211_conn_work(struct work_struct *work) /* Returned bss is reference counted and must be cleaned up appropriately. */ static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_bss *bss; u16 capa = WLAN_CAPABILITY_ESS; @@ -275,7 +276,7 @@ static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev) static void __cfg80211_sme_scan_done(struct net_device *dev) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_bss *bss; ASSERT_WDEV_LOCK(wdev); @@ -306,7 +307,7 @@ void cfg80211_sme_scan_done(struct net_device *dev) void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len) { struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; u16 status_code = le16_to_cpu(mgmt->u.auth.status_code); @@ -352,7 +353,7 @@ void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len) bool cfg80211_sme_rx_assoc_resp(struct wireless_dev *wdev, u16 status) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); if (!wdev->conn) return false; @@ -386,7 +387,7 @@ void cfg80211_sme_deauth(struct wireless_dev *wdev) void cfg80211_sme_auth_timeout(struct wireless_dev *wdev) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); if (!wdev->conn) return; @@ -397,7 +398,7 @@ void cfg80211_sme_auth_timeout(struct wireless_dev *wdev) void cfg80211_sme_disassoc(struct wireless_dev *wdev) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); if (!wdev->conn) return; @@ -408,7 +409,7 @@ void cfg80211_sme_disassoc(struct wireless_dev *wdev) void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); if (!wdev->conn) return; @@ -421,7 +422,7 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev, struct cfg80211_connect_params *connect, const u8 *prev_bssid) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_bss *bss; int err; @@ -468,7 +469,7 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev, } wdev->conn->params.ssid = wdev->ssid; - wdev->conn->params.ssid_len = connect->ssid_len; + wdev->conn->params.ssid_len = wdev->ssid_len; /* see if we have the bss already */ bss = cfg80211_get_conn_bss(wdev); @@ -480,7 +481,6 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev, /* we're good if we have a matching bss struct */ if (bss) { - wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT; err = cfg80211_conn_do_work(wdev); cfg80211_put_bss(wdev->wiphy, bss); } else { @@ -506,7 +506,7 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev, static int cfg80211_sme_disconnect(struct wireless_dev *wdev, u16 reason) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); int err; if (!wdev->conn) @@ -594,7 +594,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, return; } - nl80211_send_connect_result(wiphy_to_dev(wdev->wiphy), dev, + nl80211_send_connect_result(wiphy_to_rdev(wdev->wiphy), dev, bssid, req_ie, req_ie_len, resp_ie, resp_ie_len, status, GFP_KERNEL); @@ -625,7 +625,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, #endif if (!bss && (status == WLAN_STATUS_SUCCESS)) { - WARN_ON_ONCE(!wiphy_to_dev(wdev->wiphy)->ops->connect); + WARN_ON_ONCE(!wiphy_to_rdev(wdev->wiphy)->ops->connect); bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid, wdev->ssid, wdev->ssid_len, WLAN_CAPABILITY_ESS, @@ -648,6 +648,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, cfg80211_unhold_bss(bss_from_pub(bss)); cfg80211_put_bss(wdev->wiphy, bss); } + cfg80211_sme_free(wdev); return; } @@ -687,7 +688,7 @@ void cfg80211_connect_result(struct net_device *dev, const u8 *bssid, u16 status, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_event *ev; unsigned long flags; @@ -742,7 +743,8 @@ void __cfg80211_roamed(struct wireless_dev *wdev, cfg80211_hold_bss(bss_from_pub(bss)); wdev->current_bss = bss_from_pub(bss); - nl80211_send_roamed(wiphy_to_dev(wdev->wiphy), wdev->netdev, bss->bssid, + nl80211_send_roamed(wiphy_to_rdev(wdev->wiphy), + wdev->netdev, bss->bssid, req_ie, req_ie_len, resp_ie, resp_ie_len, GFP_KERNEL); @@ -801,7 +803,7 @@ void cfg80211_roamed_bss(struct net_device *dev, size_t resp_ie_len, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_event *ev; unsigned long flags; @@ -834,7 +836,7 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, size_t ie_len, u16 reason, bool from_ap) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); int i; #ifdef CONFIG_CFG80211_WEXT union iwreq_data wrqu; @@ -877,10 +879,10 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, } void cfg80211_disconnected(struct net_device *dev, u16 reason, - u8 *ie, size_t ie_len, gfp_t gfp) + const u8 *ie, size_t ie_len, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_event *ev; unsigned long flags; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index aabccf13e07b..0c524cd76c83 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -298,11 +298,6 @@ DEFINE_EVENT(wiphy_only_evt, rdev_return_void, TP_ARGS(wiphy) ); -DEFINE_EVENT(wiphy_only_evt, rdev_get_ringparam, - TP_PROTO(struct wiphy *wiphy), - TP_ARGS(wiphy) -); - DEFINE_EVENT(wiphy_only_evt, rdev_get_antenna, TP_PROTO(struct wiphy *wiphy), TP_ARGS(wiphy) @@ -580,11 +575,6 @@ DEFINE_EVENT(wiphy_netdev_evt, rdev_stop_ap, TP_ARGS(wiphy, netdev) ); -DEFINE_EVENT(wiphy_netdev_evt, rdev_get_et_stats, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev), - TP_ARGS(wiphy, netdev) -); - DEFINE_EVENT(wiphy_netdev_evt, rdev_sched_scan_stop, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev), TP_ARGS(wiphy, netdev) @@ -1439,11 +1429,6 @@ DECLARE_EVENT_CLASS(tx_rx_evt, WIPHY_PR_ARG, __entry->tx, __entry->rx) ); -DEFINE_EVENT(tx_rx_evt, rdev_set_ringparam, - TP_PROTO(struct wiphy *wiphy, u32 tx, u32 rx), - TP_ARGS(wiphy, rx, tx) -); - DEFINE_EVENT(tx_rx_evt, rdev_set_antenna, TP_PROTO(struct wiphy *wiphy, u32 tx, u32 rx), TP_ARGS(wiphy, rx, tx) @@ -1469,9 +1454,9 @@ TRACE_EVENT(rdev_tdls_mgmt, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 *peer, u8 action_code, u8 dialog_token, u16 status_code, u32 peer_capability, - const u8 *buf, size_t len), + bool initiator, const u8 *buf, size_t len), TP_ARGS(wiphy, netdev, peer, action_code, dialog_token, status_code, - peer_capability, buf, len), + peer_capability, initiator, buf, len), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY @@ -1480,6 +1465,7 @@ TRACE_EVENT(rdev_tdls_mgmt, __field(u8, dialog_token) __field(u16, status_code) __field(u32, peer_capability) + __field(bool, initiator) __dynamic_array(u8, buf, len) ), TP_fast_assign( @@ -1490,13 +1476,16 @@ TRACE_EVENT(rdev_tdls_mgmt, __entry->dialog_token = dialog_token; __entry->status_code = status_code; __entry->peer_capability = peer_capability; + __entry->initiator = initiator; memcpy(__get_dynamic_array(buf), buf, len); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT ", action_code: %u, " - "dialog_token: %u, status_code: %u, peer_capability: %u buf: %#.2x ", + "dialog_token: %u, status_code: %u, peer_capability: %u " + "initiator: %s buf: %#.2x ", WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer), __entry->action_code, __entry->dialog_token, __entry->status_code, __entry->peer_capability, + BOOL_TO_STR(__entry->initiator), ((u8 *)__get_dynamic_array(buf))[0]) ); @@ -1725,40 +1714,6 @@ TRACE_EVENT(rdev_set_noack_map, WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->noack_map) ); -TRACE_EVENT(rdev_get_et_sset_count, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int sset), - TP_ARGS(wiphy, netdev, sset), - TP_STRUCT__entry( - WIPHY_ENTRY - NETDEV_ENTRY - __field(int, sset) - ), - TP_fast_assign( - WIPHY_ASSIGN; - NETDEV_ASSIGN; - __entry->sset = sset; - ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", sset: %d", - WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->sset) -); - -TRACE_EVENT(rdev_get_et_strings, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u32 sset), - TP_ARGS(wiphy, netdev, sset), - TP_STRUCT__entry( - WIPHY_ENTRY - NETDEV_ENTRY - __field(u32, sset) - ), - TP_fast_assign( - WIPHY_ASSIGN; - NETDEV_ASSIGN; - __entry->sset = sset; - ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", sset: %u", - WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->sset) -); - DEFINE_EVENT(wiphy_wdev_evt, rdev_get_channel, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), TP_ARGS(wiphy, wdev) @@ -1876,29 +1831,33 @@ TRACE_EVENT(rdev_channel_switch, WIPHY_ENTRY NETDEV_ENTRY CHAN_DEF_ENTRY - __field(u16, counter_offset_beacon) - __field(u16, counter_offset_presp) __field(bool, radar_required) __field(bool, block_tx) __field(u8, count) + __dynamic_array(u16, bcn_ofs, params->n_counter_offsets_beacon) + __dynamic_array(u16, pres_ofs, params->n_counter_offsets_presp) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; CHAN_DEF_ASSIGN(¶ms->chandef); - __entry->counter_offset_beacon = params->counter_offset_beacon; - __entry->counter_offset_presp = params->counter_offset_presp; __entry->radar_required = params->radar_required; __entry->block_tx = params->block_tx; __entry->count = params->count; + memcpy(__get_dynamic_array(bcn_ofs), + params->counter_offsets_beacon, + params->n_counter_offsets_beacon * sizeof(u16)); + + /* probe response offsets are optional */ + if (params->n_counter_offsets_presp) + memcpy(__get_dynamic_array(pres_ofs), + params->counter_offsets_presp, + params->n_counter_offsets_presp * sizeof(u16)); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT - ", block_tx: %d, count: %u, radar_required: %d" - ", counter offsets (beacon/presp): %u/%u", + ", block_tx: %d, count: %u, radar_required: %d", WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG, - __entry->block_tx, __entry->count, __entry->radar_required, - __entry->counter_offset_beacon, - __entry->counter_offset_presp) + __entry->block_tx, __entry->count, __entry->radar_required) ); TRACE_EVENT(rdev_set_qos_map, @@ -1919,6 +1878,24 @@ TRACE_EVENT(rdev_set_qos_map, WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->num_des) ); +TRACE_EVENT(rdev_set_ap_chanwidth, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_chan_def *chandef), + TP_ARGS(wiphy, netdev, chandef), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + CHAN_DEF_ENTRY + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + CHAN_DEF_ASSIGN(chandef); + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT, + WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG) +); + /************************************************************* * cfg80211 exported functions traces * *************************************************************/ @@ -2072,7 +2049,8 @@ TRACE_EVENT(cfg80211_michael_mic_failure, MAC_ASSIGN(addr, addr); __entry->key_type = key_type; __entry->key_id = key_id; - memcpy(__entry->tsc, tsc, 6); + if (tsc) + memcpy(__entry->tsc, tsc, 6); ), TP_printk(NETDEV_PR_FMT ", " MAC_PR_FMT ", key type: %d, key id: %d, tsc: %pm", NETDEV_PR_ARG, MAC_PR_ARG(addr), __entry->key_type, @@ -2193,18 +2171,21 @@ TRACE_EVENT(cfg80211_cqm_rssi_notify, ); TRACE_EVENT(cfg80211_reg_can_beacon, - TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef), - TP_ARGS(wiphy, chandef), + TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, + enum nl80211_iftype iftype), + TP_ARGS(wiphy, chandef, iftype), TP_STRUCT__entry( WIPHY_ENTRY CHAN_DEF_ENTRY + __field(enum nl80211_iftype, iftype) ), TP_fast_assign( WIPHY_ASSIGN; CHAN_DEF_ASSIGN(chandef); + __entry->iftype = iftype; ), - TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT, - WIPHY_PR_ARG, CHAN_DEF_PR_ARG) + TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", iftype=%d", + WIPHY_PR_ARG, CHAN_DEF_PR_ARG, __entry->iftype) ); TRACE_EVENT(cfg80211_chandef_dfs_required, @@ -2615,6 +2596,21 @@ TRACE_EVENT(cfg80211_ft_event, WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(target_ap)) ); +TRACE_EVENT(cfg80211_stop_iface, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), + TP_ARGS(wiphy, wdev), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, + WIPHY_PR_ARG, WDEV_PR_ARG) +); + #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH diff --git a/net/wireless/util.c b/net/wireless/util.c index e5872ff2c27c..728f1c0dc70d 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -476,7 +476,8 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, EXPORT_SYMBOL(ieee80211_data_to_8023); int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, - enum nl80211_iftype iftype, u8 *bssid, bool qos) + enum nl80211_iftype iftype, + const u8 *bssid, bool qos) { struct ieee80211_hdr hdr; u16 hdrlen, ethertype; @@ -770,7 +771,7 @@ EXPORT_SYMBOL(ieee80211_bss_get_ie); void cfg80211_upload_connect_keys(struct wireless_dev *wdev) { - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct net_device *dev = wdev->netdev; int i; @@ -839,6 +840,9 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev) __cfg80211_ibss_joined(wdev->netdev, ev->ij.bssid, ev->ij.channel); break; + case EVENT_STOPPED: + __cfg80211_leave(wiphy_to_rdev(wdev->wiphy), wdev); + break; } wdev_unlock(wdev); @@ -888,11 +892,6 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, return -EBUSY; if (ntype != otype && netif_running(dev)) { - err = cfg80211_can_change_interface(rdev, dev->ieee80211_ptr, - ntype); - if (err) - return err; - dev->ieee80211_ptr->use_4addr = false; dev->ieee80211_ptr->mesh_id_up_len = 0; wdev_lock(dev->ieee80211_ptr); @@ -1268,6 +1267,120 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, return res; } +int cfg80211_iter_combinations(struct wiphy *wiphy, + const int num_different_channels, + const u8 radar_detect, + const int iftype_num[NUM_NL80211_IFTYPES], + void (*iter)(const struct ieee80211_iface_combination *c, + void *data), + void *data) +{ + const struct ieee80211_regdomain *regdom; + enum nl80211_dfs_regions region = 0; + int i, j, iftype; + int num_interfaces = 0; + u32 used_iftypes = 0; + + if (radar_detect) { + rcu_read_lock(); + regdom = rcu_dereference(cfg80211_regdomain); + if (regdom) + region = regdom->dfs_region; + rcu_read_unlock(); + } + + for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) { + num_interfaces += iftype_num[iftype]; + if (iftype_num[iftype] > 0 && + !(wiphy->software_iftypes & BIT(iftype))) + used_iftypes |= BIT(iftype); + } + + for (i = 0; i < wiphy->n_iface_combinations; i++) { + const struct ieee80211_iface_combination *c; + struct ieee80211_iface_limit *limits; + u32 all_iftypes = 0; + + c = &wiphy->iface_combinations[i]; + + if (num_interfaces > c->max_interfaces) + continue; + if (num_different_channels > c->num_different_channels) + continue; + + limits = kmemdup(c->limits, sizeof(limits[0]) * c->n_limits, + GFP_KERNEL); + if (!limits) + return -ENOMEM; + + for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) { + if (wiphy->software_iftypes & BIT(iftype)) + continue; + for (j = 0; j < c->n_limits; j++) { + all_iftypes |= limits[j].types; + if (!(limits[j].types & BIT(iftype))) + continue; + if (limits[j].max < iftype_num[iftype]) + goto cont; + limits[j].max -= iftype_num[iftype]; + } + } + + if (radar_detect != (c->radar_detect_widths & radar_detect)) + goto cont; + + if (radar_detect && c->radar_detect_regions && + !(c->radar_detect_regions & BIT(region))) + goto cont; + + /* Finally check that all iftypes that we're currently + * using are actually part of this combination. If they + * aren't then we can't use this combination and have + * to continue to the next. + */ + if ((all_iftypes & used_iftypes) != used_iftypes) + goto cont; + + /* This combination covered all interface types and + * supported the requested numbers, so we're good. + */ + + (*iter)(c, data); + cont: + kfree(limits); + } + + return 0; +} +EXPORT_SYMBOL(cfg80211_iter_combinations); + +static void +cfg80211_iter_sum_ifcombs(const struct ieee80211_iface_combination *c, + void *data) +{ + int *num = data; + (*num)++; +} + +int cfg80211_check_combinations(struct wiphy *wiphy, + const int num_different_channels, + const u8 radar_detect, + const int iftype_num[NUM_NL80211_IFTYPES]) +{ + int err, num = 0; + + err = cfg80211_iter_combinations(wiphy, num_different_channels, + radar_detect, iftype_num, + cfg80211_iter_sum_ifcombs, &num); + if (err) + return err; + if (num == 0) + return -EBUSY; + + return 0; +} +EXPORT_SYMBOL(cfg80211_check_combinations); + int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, enum nl80211_iftype iftype, @@ -1276,7 +1389,6 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, u8 radar_detect) { struct wireless_dev *wdev_iter; - u32 used_iftypes = BIT(iftype); int num[NUM_NL80211_IFTYPES]; struct ieee80211_channel *used_channels[CFG80211_MAX_NUM_DIFFERENT_CHANNELS]; @@ -1284,7 +1396,7 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, enum cfg80211_chan_mode chmode; int num_different_channels = 0; int total = 1; - int i, j; + int i; ASSERT_RTNL(); @@ -1306,6 +1418,11 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, num[iftype] = 1; + /* TODO: We'll probably not need this anymore, since this + * should only be called with CHAN_MODE_UNDEFINED. There are + * still a couple of pending calls where other chanmodes are + * used, but we should get rid of them. + */ switch (chanmode) { case CHAN_MODE_UNDEFINED: break; @@ -1369,65 +1486,13 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, num[wdev_iter->iftype]++; total++; - used_iftypes |= BIT(wdev_iter->iftype); } if (total == 1 && !radar_detect) return 0; - for (i = 0; i < rdev->wiphy.n_iface_combinations; i++) { - const struct ieee80211_iface_combination *c; - struct ieee80211_iface_limit *limits; - u32 all_iftypes = 0; - - c = &rdev->wiphy.iface_combinations[i]; - - if (total > c->max_interfaces) - continue; - if (num_different_channels > c->num_different_channels) - continue; - - limits = kmemdup(c->limits, sizeof(limits[0]) * c->n_limits, - GFP_KERNEL); - if (!limits) - return -ENOMEM; - - for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) { - if (rdev->wiphy.software_iftypes & BIT(iftype)) - continue; - for (j = 0; j < c->n_limits; j++) { - all_iftypes |= limits[j].types; - if (!(limits[j].types & BIT(iftype))) - continue; - if (limits[j].max < num[iftype]) - goto cont; - limits[j].max -= num[iftype]; - } - } - - if (radar_detect && !(c->radar_detect_widths & radar_detect)) - goto cont; - - /* - * Finally check that all iftypes that we're currently - * using are actually part of this combination. If they - * aren't then we can't use this combination and have - * to continue to the next. - */ - if ((all_iftypes & used_iftypes) != used_iftypes) - goto cont; - - /* - * This combination covered all interface types and - * supported the requested numbers, so we're good. - */ - kfree(limits); - return 0; - cont: - kfree(limits); - } - - return -EBUSY; + return cfg80211_check_combinations(&rdev->wiphy, num_different_channels, + radar_detect, num); } int ieee80211_get_ratemask(struct ieee80211_supported_band *sband, @@ -1481,6 +1546,24 @@ unsigned int ieee80211_get_num_supported_channels(struct wiphy *wiphy) } EXPORT_SYMBOL(ieee80211_get_num_supported_channels); +int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr, + struct station_info *sinfo) +{ + struct cfg80211_registered_device *rdev; + struct wireless_dev *wdev; + + wdev = dev->ieee80211_ptr; + if (!wdev) + return -EOPNOTSUPP; + + rdev = wiphy_to_rdev(wdev->wiphy); + if (!rdev->ops->get_station) + return -EOPNOTSUPP; + + return rdev_get_station(rdev, dev, mac_addr, sinfo); +} +EXPORT_SYMBOL(cfg80211_get_station); + /* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */ /* Ethernet-II snap header (RFC1042 for most EtherTypes) */ const unsigned char rfc1042_header[] __aligned(2) = diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 5661a54ac7ee..11120bb14162 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -73,7 +73,7 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, struct vif_params vifparams; enum nl80211_iftype type; - rdev = wiphy_to_dev(wdev->wiphy); + rdev = wiphy_to_rdev(wdev->wiphy); switch (*mode) { case IW_MODE_INFRA: @@ -253,12 +253,12 @@ EXPORT_SYMBOL_GPL(cfg80211_wext_giwrange); /** * cfg80211_wext_freq - get wext frequency for non-"auto" - * @wiphy: the wiphy + * @dev: the net device * @freq: the wext freq encoding * * Returns a frequency, or a negative error code, or 0 for auto. */ -int cfg80211_wext_freq(struct wiphy *wiphy, struct iw_freq *freq) +int cfg80211_wext_freq(struct iw_freq *freq) { /* * Parse frequency - return 0 for auto and @@ -286,7 +286,7 @@ int cfg80211_wext_siwrts(struct net_device *dev, struct iw_param *rts, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); u32 orts = wdev->wiphy->rts_threshold; int err; @@ -324,7 +324,7 @@ int cfg80211_wext_siwfrag(struct net_device *dev, struct iw_param *frag, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); u32 ofrag = wdev->wiphy->frag_threshold; int err; @@ -364,7 +364,7 @@ static int cfg80211_wext_siwretry(struct net_device *dev, struct iw_param *retry, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); u32 changed = 0; u8 olong = wdev->wiphy->retry_long; u8 oshort = wdev->wiphy->retry_short; @@ -587,7 +587,7 @@ static int cfg80211_wext_siwencode(struct net_device *dev, struct iw_point *erq, char *keybuf) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); int idx, err; bool remove = false; struct key_params params; @@ -647,7 +647,7 @@ static int cfg80211_wext_siwencodeext(struct net_device *dev, struct iw_point *erq, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct iw_encode_ext *ext = (struct iw_encode_ext *) extra; const u8 *addr; int idx; @@ -775,7 +775,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, struct iw_freq *wextfreq, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_chan_def chandef = { .width = NL80211_CHAN_WIDTH_20_NOHT, }; @@ -787,7 +787,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, case NL80211_IFTYPE_ADHOC: return cfg80211_ibss_wext_siwfreq(dev, info, wextfreq, extra); case NL80211_IFTYPE_MONITOR: - freq = cfg80211_wext_freq(wdev->wiphy, wextfreq); + freq = cfg80211_wext_freq(wextfreq); if (freq < 0) return freq; if (freq == 0) @@ -798,7 +798,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, return -EINVAL; return cfg80211_set_monitor_channel(rdev, &chandef); case NL80211_IFTYPE_MESH_POINT: - freq = cfg80211_wext_freq(wdev->wiphy, wextfreq); + freq = cfg80211_wext_freq(wextfreq); if (freq < 0) return freq; if (freq == 0) @@ -818,7 +818,7 @@ static int cfg80211_wext_giwfreq(struct net_device *dev, struct iw_freq *freq, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_chan_def chandef; int ret; @@ -847,7 +847,7 @@ static int cfg80211_wext_siwtxpower(struct net_device *dev, union iwreq_data *data, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); enum nl80211_tx_power_setting type; int dbm = 0; @@ -899,7 +899,7 @@ static int cfg80211_wext_giwtxpower(struct net_device *dev, union iwreq_data *data, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); int err, val; if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM) @@ -1119,7 +1119,7 @@ static int cfg80211_wext_siwpower(struct net_device *dev, struct iw_param *wrq, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); bool ps = wdev->ps; int timeout = wdev->ps_timeout; int err; @@ -1177,7 +1177,7 @@ static int cfg80211_wds_wext_siwap(struct net_device *dev, struct sockaddr *addr, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); int err; if (WARN_ON(wdev->iftype != NL80211_IFTYPE_WDS)) @@ -1221,7 +1221,7 @@ static int cfg80211_wext_siwrate(struct net_device *dev, struct iw_param *rate, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_bitrate_mask mask; u32 fixed, maxrate; struct ieee80211_supported_band *sband; @@ -1272,7 +1272,7 @@ static int cfg80211_wext_giwrate(struct net_device *dev, struct iw_param *rate, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); /* we are under RTNL - globally locked - so can use a static struct */ static struct station_info sinfo; u8 addr[ETH_ALEN]; @@ -1310,7 +1310,7 @@ static int cfg80211_wext_giwrate(struct net_device *dev, static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); /* we are under RTNL - globally locked - so can use static structs */ static struct iw_statistics wstats; static struct station_info sinfo; @@ -1449,7 +1449,7 @@ static int cfg80211_wext_siwpmksa(struct net_device *dev, struct iw_point *data, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_pmksa cfg_pmksa; struct iw_pmksa *pmksa = (struct iw_pmksa *)extra; diff --git a/net/wireless/wext-compat.h b/net/wireless/wext-compat.h index 5d766b0118e8..ebcacca2f731 100644 --- a/net/wireless/wext-compat.h +++ b/net/wireless/wext-compat.h @@ -50,7 +50,7 @@ int cfg80211_wext_siwgenie(struct net_device *dev, struct iw_point *data, char *extra); -int cfg80211_wext_freq(struct wiphy *wiphy, struct iw_freq *freq); +int cfg80211_wext_freq(struct iw_freq *freq); extern const struct iw_handler_def cfg80211_wext_handler; diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index 86c331a65664..c7e5c8eb4f24 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -67,7 +67,7 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, struct iw_freq *wextfreq, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct ieee80211_channel *chan = NULL; int err, freq; @@ -75,7 +75,7 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) return -EINVAL; - freq = cfg80211_wext_freq(wdev->wiphy, wextfreq); + freq = cfg80211_wext_freq(wextfreq); if (freq < 0) return freq; @@ -169,7 +169,7 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, struct iw_point *data, char *ssid) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); size_t len = data->length; int err; @@ -260,7 +260,7 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev, struct sockaddr *ap_addr, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); u8 *bssid = ap_addr->sa_data; int err; @@ -333,7 +333,7 @@ int cfg80211_wext_siwgenie(struct net_device *dev, struct iw_point *data, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); u8 *ie = extra; int ie_len = data->length, err; @@ -390,7 +390,7 @@ int cfg80211_wext_siwmlme(struct net_device *dev, if (!wdev) return -EOPNOTSUPP; - rdev = wiphy_to_dev(wdev->wiphy); + rdev = wiphy_to_rdev(wdev->wiphy); if (wdev->iftype != NL80211_IFTYPE_STATION) return -EINVAL; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 3bb2cdc13b46..c51e8f7b8653 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -199,6 +199,7 @@ int xfrm_output(struct sk_buff *skb) return xfrm_output2(skb); } +EXPORT_SYMBOL_GPL(xfrm_output); int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) { @@ -213,6 +214,7 @@ int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) return -EAFNOSUPPORT; return inner_mode->afinfo->extract_output(x, skb); } +EXPORT_SYMBOL_GPL(xfrm_inner_extract_output); void xfrm_local_error(struct sk_buff *skb, int mtu) { @@ -233,7 +235,4 @@ void xfrm_local_error(struct sk_buff *skb, int mtu) afinfo->local_error(skb, mtu); xfrm_state_put_afinfo(afinfo); } - -EXPORT_SYMBOL_GPL(xfrm_output); -EXPORT_SYMBOL_GPL(xfrm_inner_extract_output); EXPORT_SYMBOL_GPL(xfrm_local_error); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index c08fbd11ceff..0525d78ba328 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -769,7 +769,7 @@ EXPORT_SYMBOL(xfrm_policy_byid); #ifdef CONFIG_SECURITY_NETWORK_XFRM static inline int -xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info) +xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid) { int dir, err = 0; @@ -783,10 +783,7 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi continue; err = security_xfrm_policy_delete(pol->security); if (err) { - xfrm_audit_policy_delete(pol, 0, - audit_info->loginuid, - audit_info->sessionid, - audit_info->secid); + xfrm_audit_policy_delete(pol, 0, task_valid); return err; } } @@ -800,9 +797,7 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi pol->security); if (err) { xfrm_audit_policy_delete(pol, 0, - audit_info->loginuid, - audit_info->sessionid, - audit_info->secid); + task_valid); return err; } } @@ -812,19 +807,19 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi } #else static inline int -xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info) +xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid) { return 0; } #endif -int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) +int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) { int dir, err = 0, cnt = 0; write_lock_bh(&net->xfrm.xfrm_policy_lock); - err = xfrm_policy_flush_secctx_check(net, type, audit_info); + err = xfrm_policy_flush_secctx_check(net, type, task_valid); if (err) goto out; @@ -841,9 +836,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) write_unlock_bh(&net->xfrm.xfrm_policy_lock); cnt++; - xfrm_audit_policy_delete(pol, 1, audit_info->loginuid, - audit_info->sessionid, - audit_info->secid); + xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); @@ -862,10 +855,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) write_unlock_bh(&net->xfrm.xfrm_policy_lock); cnt++; - xfrm_audit_policy_delete(pol, 1, - audit_info->loginuid, - audit_info->sessionid, - audit_info->secid); + xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); write_lock_bh(&net->xfrm.xfrm_policy_lock); @@ -2107,6 +2097,8 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, goto no_transform; } + dst_hold(&xdst->u.dst); + xdst->u.dst.flags |= DST_NOCACHE; route = xdst->route; } } @@ -2783,21 +2775,19 @@ static struct notifier_block xfrm_dev_notifier = { static int __net_init xfrm_statistics_init(struct net *net) { int rv; - - if (snmp_mib_init((void __percpu **)net->mib.xfrm_statistics, - sizeof(struct linux_xfrm_mib), - __alignof__(struct linux_xfrm_mib)) < 0) + net->mib.xfrm_statistics = alloc_percpu(struct linux_xfrm_mib); + if (!net->mib.xfrm_statistics) return -ENOMEM; rv = xfrm_proc_init(net); if (rv < 0) - snmp_mib_free((void __percpu **)net->mib.xfrm_statistics); + free_percpu(net->mib.xfrm_statistics); return rv; } static void xfrm_statistics_fini(struct net *net) { xfrm_proc_fini(net); - snmp_mib_free((void __percpu **)net->mib.xfrm_statistics); + free_percpu(net->mib.xfrm_statistics); } #else static int __net_init xfrm_statistics_init(struct net *net) @@ -2862,21 +2852,14 @@ out_byidx: static void xfrm_policy_fini(struct net *net) { - struct xfrm_audit audit_info; unsigned int sz; int dir; flush_work(&net->xfrm.policy_hash_work); #ifdef CONFIG_XFRM_SUB_POLICY - audit_info.loginuid = INVALID_UID; - audit_info.sessionid = (unsigned int)-1; - audit_info.secid = 0; - xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, &audit_info); + xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false); #endif - audit_info.loginuid = INVALID_UID; - audit_info.sessionid = (unsigned int)-1; - audit_info.secid = 0; - xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info); + xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, false); WARN_ON(!list_empty(&net->xfrm.policy_all)); @@ -2991,15 +2974,14 @@ static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, } } -void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, - kuid_t auid, unsigned int sessionid, u32 secid) +void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, bool task_valid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SPD-add"); if (audit_buf == NULL) return; - xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); + xfrm_audit_helper_usrinfo(task_valid, audit_buf); audit_log_format(audit_buf, " res=%u", result); xfrm_audit_common_policyinfo(xp, audit_buf); audit_log_end(audit_buf); @@ -3007,14 +2989,14 @@ void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, EXPORT_SYMBOL_GPL(xfrm_audit_policy_add); void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, - kuid_t auid, unsigned int sessionid, u32 secid) + bool task_valid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SPD-delete"); if (audit_buf == NULL) return; - xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); + xfrm_audit_helper_usrinfo(task_valid, audit_buf); audit_log_format(audit_buf, " res=%u", result); xfrm_audit_common_policyinfo(xp, audit_buf); audit_log_end(audit_buf); diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index fc5abd0b456f..9c4fbd8935f4 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -54,8 +54,7 @@ static int xfrm_statistics_seq_show(struct seq_file *seq, void *v) int i; for (i = 0; xfrm_mib_list[i].name; i++) seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name, - snmp_fold_field((void __percpu **) - net->mib.xfrm_statistics, + snmp_fold_field(net->mib.xfrm_statistics, xfrm_mib_list[i].entry)); return 0; } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 8e9c781a6bba..0ab54134bb40 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -463,9 +463,7 @@ expired: if (!err) km_state_expired(x, 1, 0); - xfrm_audit_state_delete(x, err ? 0 : 1, - audit_get_loginuid(current), - audit_get_sessionid(current), 0); + xfrm_audit_state_delete(x, err ? 0 : 1, true); out: spin_unlock(&x->lock); @@ -562,7 +560,7 @@ EXPORT_SYMBOL(xfrm_state_delete); #ifdef CONFIG_SECURITY_NETWORK_XFRM static inline int -xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audit_info) +xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid) { int i, err = 0; @@ -572,10 +570,7 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audi hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { if (xfrm_id_proto_match(x->id.proto, proto) && (err = security_xfrm_state_delete(x)) != 0) { - xfrm_audit_state_delete(x, 0, - audit_info->loginuid, - audit_info->sessionid, - audit_info->secid); + xfrm_audit_state_delete(x, 0, task_valid); return err; } } @@ -585,18 +580,18 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audi } #else static inline int -xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audit_info) +xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid) { return 0; } #endif -int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info) +int xfrm_state_flush(struct net *net, u8 proto, bool task_valid) { int i, err = 0, cnt = 0; spin_lock_bh(&net->xfrm.xfrm_state_lock); - err = xfrm_state_flush_secctx_check(net, proto, audit_info); + err = xfrm_state_flush_secctx_check(net, proto, task_valid); if (err) goto out; @@ -612,9 +607,7 @@ restart: err = xfrm_state_delete(x); xfrm_audit_state_delete(x, err ? 0 : 1, - audit_info->loginuid, - audit_info->sessionid, - audit_info->secid); + task_valid); xfrm_state_put(x); if (!err) cnt++; @@ -2128,14 +2121,10 @@ out_bydst: void xfrm_state_fini(struct net *net) { - struct xfrm_audit audit_info; unsigned int sz; flush_work(&net->xfrm.state_hash_work); - audit_info.loginuid = INVALID_UID; - audit_info.sessionid = (unsigned int)-1; - audit_info.secid = 0; - xfrm_state_flush(net, IPSEC_PROTO_ANY, &audit_info); + xfrm_state_flush(net, IPSEC_PROTO_ANY, false); flush_work(&net->xfrm.state_gc_work); WARN_ON(!list_empty(&net->xfrm.state_all)); @@ -2198,30 +2187,28 @@ static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family, } } -void xfrm_audit_state_add(struct xfrm_state *x, int result, - kuid_t auid, unsigned int sessionid, u32 secid) +void xfrm_audit_state_add(struct xfrm_state *x, int result, bool task_valid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SAD-add"); if (audit_buf == NULL) return; - xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); + xfrm_audit_helper_usrinfo(task_valid, audit_buf); xfrm_audit_helper_sainfo(x, audit_buf); audit_log_format(audit_buf, " res=%u", result); audit_log_end(audit_buf); } EXPORT_SYMBOL_GPL(xfrm_audit_state_add); -void xfrm_audit_state_delete(struct xfrm_state *x, int result, - kuid_t auid, unsigned int sessionid, u32 secid) +void xfrm_audit_state_delete(struct xfrm_state *x, int result, bool task_valid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SAD-delete"); if (audit_buf == NULL) return; - xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); + xfrm_audit_helper_usrinfo(task_valid, audit_buf); xfrm_audit_helper_sainfo(x, audit_buf); audit_log_format(audit_buf, " res=%u", result); audit_log_end(audit_buf); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 8f131c10a6f3..d4db6ebb089d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -177,9 +177,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, attrs[XFRMA_ALG_AEAD] || attrs[XFRMA_ALG_CRYPT] || attrs[XFRMA_ALG_COMP] || - attrs[XFRMA_TFCPAD] || - (ntohl(p->id.spi) >= 0x10000)) - + attrs[XFRMA_TFCPAD]) goto out; break; @@ -207,7 +205,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, attrs[XFRMA_ALG_AUTH] || attrs[XFRMA_ALG_AUTH_TRUNC] || attrs[XFRMA_ALG_CRYPT] || - attrs[XFRMA_TFCPAD]) + attrs[XFRMA_TFCPAD] || + (ntohl(p->id.spi) >= 0x10000)) goto out; break; @@ -597,9 +596,6 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_state *x; int err; struct km_event c; - kuid_t loginuid = audit_get_loginuid(current); - unsigned int sessionid = audit_get_sessionid(current); - u32 sid; err = verify_newsa_info(p, attrs); if (err) @@ -615,8 +611,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, else err = xfrm_state_update(x); - security_task_getsecid(current, &sid); - xfrm_audit_state_add(x, err ? 0 : 1, loginuid, sessionid, sid); + xfrm_audit_state_add(x, err ? 0 : 1, true); if (err < 0) { x->km.state = XFRM_STATE_DEAD; @@ -676,9 +671,6 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, int err = -ESRCH; struct km_event c; struct xfrm_usersa_id *p = nlmsg_data(nlh); - kuid_t loginuid = audit_get_loginuid(current); - unsigned int sessionid = audit_get_sessionid(current); - u32 sid; x = xfrm_user_state_lookup(net, p, attrs, &err); if (x == NULL) @@ -703,8 +695,7 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, km_state_notify(x, &c); out: - security_task_getsecid(current, &sid); - xfrm_audit_state_delete(x, err ? 0 : 1, loginuid, sessionid, sid); + xfrm_audit_state_delete(x, err ? 0 : 1, true); xfrm_state_put(x); return err; } @@ -955,6 +946,20 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, return skb; } +/* A wrapper for nlmsg_multicast() checking that nlsk is still available. + * Must be called with RCU read lock. + */ +static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb, + u32 pid, unsigned int group) +{ + struct sock *nlsk = rcu_dereference(net->xfrm.nlsk); + + if (nlsk) + return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC); + else + return -1; +} + static inline size_t xfrm_spdinfo_msgsize(void) { return NLMSG_ALIGN(4) @@ -1414,9 +1419,6 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct km_event c; int err; int excl; - kuid_t loginuid = audit_get_loginuid(current); - unsigned int sessionid = audit_get_sessionid(current); - u32 sid; err = verify_newpolicy_info(p); if (err) @@ -1435,8 +1437,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, * a type XFRM_MSG_UPDPOLICY - JHS */ excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY; err = xfrm_policy_insert(p->dir, xp, excl); - security_task_getsecid(current, &sid); - xfrm_audit_policy_add(xp, err ? 0 : 1, loginuid, sessionid, sid); + xfrm_audit_policy_add(xp, err ? 0 : 1, true); if (err) { security_xfrm_policy_free(xp->security); @@ -1673,13 +1674,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, NETLINK_CB(skb).portid); } } else { - kuid_t loginuid = audit_get_loginuid(current); - unsigned int sessionid = audit_get_sessionid(current); - u32 sid; - - security_task_getsecid(current, &sid); - xfrm_audit_policy_delete(xp, err ? 0 : 1, loginuid, sessionid, - sid); + xfrm_audit_policy_delete(xp, err ? 0 : 1, true); if (err != 0) goto out; @@ -1704,13 +1699,9 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct km_event c; struct xfrm_usersa_flush *p = nlmsg_data(nlh); - struct xfrm_audit audit_info; int err; - audit_info.loginuid = audit_get_loginuid(current); - audit_info.sessionid = audit_get_sessionid(current); - security_task_getsecid(current, &audit_info.secid); - err = xfrm_state_flush(net, p->proto, &audit_info); + err = xfrm_state_flush(net, p->proto, true); if (err) { if (err == -ESRCH) /* empty table */ return 0; @@ -1894,16 +1885,12 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct km_event c; u8 type = XFRM_POLICY_TYPE_MAIN; int err; - struct xfrm_audit audit_info; err = copy_from_user_policy_type(&type, attrs); if (err) return err; - audit_info.loginuid = audit_get_loginuid(current); - audit_info.sessionid = audit_get_sessionid(current); - security_task_getsecid(current, &audit_info.secid); - err = xfrm_policy_flush(net, type, &audit_info); + err = xfrm_policy_flush(net, type, true); if (err) { if (err == -ESRCH) /* empty table */ return 0; @@ -1969,14 +1956,8 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, err = 0; if (up->hard) { - kuid_t loginuid = audit_get_loginuid(current); - unsigned int sessionid = audit_get_sessionid(current); - u32 sid; - - security_task_getsecid(current, &sid); xfrm_policy_delete(xp, p->dir); - xfrm_audit_policy_delete(xp, 1, loginuid, sessionid, sid); - + xfrm_audit_policy_delete(xp, 1, true); } else { // reset the timers here? WARN(1, "Dont know what to do with soft policy expire\n"); @@ -2012,13 +1993,8 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, km_state_expired(x, ue->hard, nlh->nlmsg_pid); if (ue->hard) { - kuid_t loginuid = audit_get_loginuid(current); - unsigned int sessionid = audit_get_sessionid(current); - u32 sid; - - security_task_getsecid(current, &sid); __xfrm_state_delete(x); - xfrm_audit_state_delete(x, 1, loginuid, sessionid, sid); + xfrm_audit_state_delete(x, 1, true); } err = 0; out: @@ -2265,7 +2241,7 @@ static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, if (build_migrate(skb, m, num_migrate, k, sel, dir, type) < 0) BUG(); - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MIGRATE); } #else static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, @@ -2377,7 +2353,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) link = &xfrm_dispatch[type]; /* All operations require privileges, even GET */ - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + if (!netlink_net_capable(skb, CAP_NET_ADMIN)) return -EPERM; if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) || @@ -2456,7 +2432,7 @@ static int xfrm_exp_state_notify(struct xfrm_state *x, const struct km_event *c) return -EMSGSIZE; } - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_EXPIRE); } static int xfrm_aevent_state_notify(struct xfrm_state *x, const struct km_event *c) @@ -2471,7 +2447,7 @@ static int xfrm_aevent_state_notify(struct xfrm_state *x, const struct km_event if (build_aevent(skb, x, c) < 0) BUG(); - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_AEVENTS, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_AEVENTS); } static int xfrm_notify_sa_flush(const struct km_event *c) @@ -2497,7 +2473,7 @@ static int xfrm_notify_sa_flush(const struct km_event *c) nlmsg_end(skb, nlh); - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA); } static inline size_t xfrm_sa_len(struct xfrm_state *x) @@ -2584,7 +2560,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c) nlmsg_end(skb, nlh); - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA); out_free_skb: kfree_skb(skb); @@ -2675,7 +2651,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, if (build_acquire(skb, x, xt, xp) < 0) BUG(); - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_ACQUIRE); } /* User gives us xfrm_user_policy_info followed by an array of 0 @@ -2789,7 +2765,7 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct if (build_polexpire(skb, xp, dir, c) < 0) BUG(); - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_EXPIRE); } static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c) @@ -2851,7 +2827,7 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_e nlmsg_end(skb, nlh); - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_POLICY); out_free_skb: kfree_skb(skb); @@ -2879,7 +2855,7 @@ static int xfrm_notify_policy_flush(const struct km_event *c) nlmsg_end(skb, nlh); - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_POLICY); out_free_skb: kfree_skb(skb); @@ -2948,7 +2924,7 @@ static int xfrm_send_report(struct net *net, u8 proto, if (build_report(skb, proto, sel, addr) < 0) BUG(); - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_REPORT); } static inline size_t xfrm_mapping_msgsize(void) @@ -3000,7 +2976,7 @@ static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, if (build_mapping(skb, x, ipaddr, sport) < 0) BUG(); - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MAPPING, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MAPPING); } static bool xfrm_is_alive(const struct km_event *c) |
