summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/ip-sysctl.rst8
-rw-r--r--include/linux/sctp.h7
-rw-r--r--include/net/netns/sctp.h3
-rw-r--r--include/net/sctp/command.h1
-rw-r--r--include/net/sctp/constants.h20
-rw-r--r--include/net/sctp/sctp.h57
-rw-r--r--include/net/sctp/sm.h6
-rw-r--r--include/net/sctp/structs.h19
-rw-r--r--include/uapi/linux/sctp.h8
-rw-r--r--net/sctp/associola.c6
-rw-r--r--net/sctp/debug.c1
-rw-r--r--net/sctp/input.c132
-rw-r--r--net/sctp/ipv6.c112
-rw-r--r--net/sctp/output.c33
-rw-r--r--net/sctp/outqueue.c13
-rw-r--r--net/sctp/protocol.c21
-rw-r--r--net/sctp/sm_make_chunk.c31
-rw-r--r--net/sctp/sm_sideeffect.c37
-rw-r--r--net/sctp/sm_statefuns.c37
-rw-r--r--net/sctp/sm_statetable.c43
-rw-r--r--net/sctp/socket.c123
-rw-r--r--net/sctp/sysctl.c35
-rw-r--r--net/sctp/transport.c153
23 files changed, 779 insertions, 127 deletions
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index b0436d3a4f11..8bff728b3a1e 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -2834,6 +2834,14 @@ encap_port - INTEGER
Default: 0
+plpmtud_probe_interval - INTEGER
+ The time interval (in milliseconds) for sending PLPMTUD probe chunks.
+ These chunks are sent at the specified interval with a variable size
+ to probe the mtu of a given path between 2 endpoints. PLPMTUD will
+ be disabled when 0 is set, and other values for it must be >= 5000.
+
+ Default: 0
+
``/proc/sys/net/core/*``
========================
diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index bb1926589693..a86e852507b3 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -98,6 +98,7 @@ enum sctp_cid {
SCTP_CID_I_FWD_TSN = 0xC2,
SCTP_CID_ASCONF_ACK = 0x80,
SCTP_CID_RECONF = 0x82,
+ SCTP_CID_PAD = 0x84,
}; /* enum */
@@ -410,6 +411,12 @@ struct sctp_heartbeat_chunk {
};
+/* PAD chunk could be bundled with heartbeat chunk to probe pmtu */
+struct sctp_pad_chunk {
+ struct sctp_chunkhdr uh;
+};
+
+
/* For the abort and shutdown ACK we must carry the init tag in the
* common header. Just the common header is all that is needed with a
* chunk descriptor.
diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h
index a0f315effa94..40240722cdca 100644
--- a/include/net/netns/sctp.h
+++ b/include/net/netns/sctp.h
@@ -84,6 +84,9 @@ struct netns_sctp {
/* HB.interval - 30 seconds */
unsigned int hb_interval;
+ /* The interval for PLPMTUD probe timer */
+ unsigned int probe_interval;
+
/* Association.Max.Retrans - 10 attempts
* Path.Max.Retrans - 5 attempts (per destination address)
* Max.Init.Retransmits - 8 attempts
diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h
index 5e848884ff61..2058fabffbf6 100644
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -59,6 +59,7 @@ enum sctp_verb {
SCTP_CMD_HB_TIMERS_START, /* Start the heartbeat timers. */
SCTP_CMD_HB_TIMER_UPDATE, /* Update a heartbeat timers. */
SCTP_CMD_HB_TIMERS_STOP, /* Stop the heartbeat timers. */
+ SCTP_CMD_PROBE_TIMER_UPDATE, /* Update a probe timer. */
SCTP_CMD_TRANSPORT_HB_SENT, /* Reset the status of a transport. */
SCTP_CMD_TRANSPORT_IDLE, /* Do manipulations on idle transport */
SCTP_CMD_TRANSPORT_ON, /* Mark the transport as active. */
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 14a0d22c9113..265fffa33dad 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -77,6 +77,7 @@ enum sctp_event_timeout {
SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD,
SCTP_EVENT_TIMEOUT_HEARTBEAT,
SCTP_EVENT_TIMEOUT_RECONF,
+ SCTP_EVENT_TIMEOUT_PROBE,
SCTP_EVENT_TIMEOUT_SACK,
SCTP_EVENT_TIMEOUT_AUTOCLOSE,
};
@@ -200,6 +201,23 @@ enum sctp_sock_state {
SCTP_SS_CLOSING = TCP_CLOSE_WAIT,
};
+enum sctp_plpmtud_state {
+ SCTP_PL_DISABLED,
+ SCTP_PL_BASE,
+ SCTP_PL_SEARCH,
+ SCTP_PL_COMPLETE,
+ SCTP_PL_ERROR,
+};
+
+#define SCTP_BASE_PLPMTU 1200
+#define SCTP_MAX_PLPMTU 9000
+#define SCTP_MIN_PLPMTU 512
+
+#define SCTP_MAX_PROBES 3
+
+#define SCTP_PL_BIG_STEP 32
+#define SCTP_PL_MIN_STEP 4
+
/* These functions map various type to printable names. */
const char *sctp_cname(const union sctp_subtype id); /* chunk types */
const char *sctp_oname(const union sctp_subtype id); /* other events */
@@ -424,4 +442,6 @@ enum {
*/
#define SCTP_AUTH_RANDOM_LENGTH 32
+#define SCTP_PROBE_TIMER_MIN 5000
+
#endif /* __sctp_constants_h__ */
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 86f74f2fe6de..69bab88ad66b 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -145,6 +145,8 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *,
struct sctphdr *, struct sctp_association **,
struct sctp_transport **);
void sctp_err_finish(struct sock *, struct sctp_transport *);
+int sctp_udp_v4_err(struct sock *sk, struct sk_buff *skb);
+int sctp_udp_v6_err(struct sock *sk, struct sk_buff *skb);
void sctp_icmp_frag_needed(struct sock *, struct sctp_association *,
struct sctp_transport *t, __u32 pmtu);
void sctp_icmp_redirect(struct sock *, struct sctp_transport *,
@@ -573,14 +575,15 @@ static inline struct dst_entry *sctp_transport_dst_check(struct sctp_transport *
/* Calculate max payload size given a MTU, or the total overhead if
* given MTU is zero
*/
-static inline __u32 sctp_mtu_payload(const struct sctp_sock *sp,
- __u32 mtu, __u32 extra)
+static inline __u32 __sctp_mtu_payload(const struct sctp_sock *sp,
+ const struct sctp_transport *t,
+ __u32 mtu, __u32 extra)
{
__u32 overhead = sizeof(struct sctphdr) + extra;
if (sp) {
overhead += sp->pf->af->net_header_len;
- if (sp->udp_port)
+ if (sp->udp_port && (!t || t->encap_port))
overhead += sizeof(struct udphdr);
} else {
overhead += sizeof(struct ipv6hdr);
@@ -592,6 +595,12 @@ static inline __u32 sctp_mtu_payload(const struct sctp_sock *sp,
return mtu ? mtu - overhead : overhead;
}
+static inline __u32 sctp_mtu_payload(const struct sctp_sock *sp,
+ __u32 mtu, __u32 extra)
+{
+ return __sctp_mtu_payload(sp, NULL, mtu, extra);
+}
+
static inline __u32 sctp_dst_mtu(const struct dst_entry *dst)
{
return SCTP_TRUNC4(max_t(__u32, dst_mtu(dst),
@@ -615,6 +624,48 @@ static inline __u32 sctp_min_frag_point(struct sctp_sock *sp, __u16 datasize)
return sctp_mtu_payload(sp, SCTP_DEFAULT_MINSEGMENT, datasize);
}
+static inline int sctp_transport_pl_hlen(struct sctp_transport *t)
+{
+ return __sctp_mtu_payload(sctp_sk(t->asoc->base.sk), t, 0, 0);
+}
+
+static inline void sctp_transport_pl_reset(struct sctp_transport *t)
+{
+ if (t->probe_interval && (t->param_flags & SPP_PMTUD_ENABLE) &&
+ (t->state == SCTP_ACTIVE || t->state == SCTP_UNKNOWN)) {
+ if (t->pl.state == SCTP_PL_DISABLED) {
+ t->pl.state = SCTP_PL_BASE;
+ t->pl.pmtu = SCTP_BASE_PLPMTU;
+ t->pl.probe_size = SCTP_BASE_PLPMTU;
+ sctp_transport_reset_probe_timer(t);
+ }
+ } else {
+ if (t->pl.state != SCTP_PL_DISABLED) {
+ if (del_timer(&t->probe_timer))
+ sctp_transport_put(t);
+ t->pl.state = SCTP_PL_DISABLED;
+ }
+ }
+}
+
+static inline void sctp_transport_pl_update(struct sctp_transport *t)
+{
+ if (t->pl.state == SCTP_PL_DISABLED)
+ return;
+
+ if (del_timer(&t->probe_timer))
+ sctp_transport_put(t);
+
+ t->pl.state = SCTP_PL_BASE;
+ t->pl.pmtu = SCTP_BASE_PLPMTU;
+ t->pl.probe_size = SCTP_BASE_PLPMTU;
+}
+
+static inline bool sctp_transport_pl_enabled(struct sctp_transport *t)
+{
+ return t->pl.state != SCTP_PL_DISABLED;
+}
+
static inline bool sctp_newsk_ready(const struct sock *sk)
{
return sock_flag(sk, SOCK_DEAD) || sk->sk_socket;
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index fd223c94589a..2eb6d7c2c931 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -151,6 +151,7 @@ sctp_state_fn_t sctp_sf_cookie_wait_icmp_abort;
/* Prototypes for timeout event state functions. */
sctp_state_fn_t sctp_sf_do_6_3_3_rtx;
sctp_state_fn_t sctp_sf_send_reconf;
+sctp_state_fn_t sctp_sf_send_probe;
sctp_state_fn_t sctp_sf_do_6_2_sack;
sctp_state_fn_t sctp_sf_autoclose_timer_expire;
@@ -225,11 +226,13 @@ struct sctp_chunk *sctp_make_new_encap_port(
const struct sctp_association *asoc,
const struct sctp_chunk *chunk);
struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
- const struct sctp_transport *transport);
+ const struct sctp_transport *transport,
+ __u32 probe_size);
struct sctp_chunk *sctp_make_heartbeat_ack(const struct sctp_association *asoc,
const struct sctp_chunk *chunk,
const void *payload,
const size_t paylen);
+struct sctp_chunk *sctp_make_pad(const struct sctp_association *asoc, int len);
struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc,
const struct sctp_chunk *chunk,
__be16 cause_code, const void *payload,
@@ -310,6 +313,7 @@ int sctp_do_sm(struct net *net, enum sctp_event_type event_type,
void sctp_generate_t3_rtx_event(struct timer_list *t);
void sctp_generate_heartbeat_event(struct timer_list *t);
void sctp_generate_reconf_event(struct timer_list *t);
+void sctp_generate_probe_event(struct timer_list *t);
void sctp_generate_proto_unreach_event(struct timer_list *t);
void sctp_ootb_pkt_free(struct sctp_packet *packet);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 1aa585216f34..9eaa701cda23 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -177,6 +177,7 @@ struct sctp_sock {
* will be inherited by all new associations.
*/
__u32 hbinterval;
+ __u32 probe_interval;
__be16 udp_port;
__be16 encap_port;
@@ -385,6 +386,7 @@ struct sctp_sender_hb_info {
union sctp_addr daddr;
unsigned long sent_at;
__u64 hb_nonce;
+ __u32 probe_size;
};
int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
@@ -656,6 +658,7 @@ struct sctp_chunk {
data_accepted:1, /* At least 1 chunk accepted */
auth:1, /* IN: was auth'ed | OUT: needs auth */
has_asconf:1, /* IN: have seen an asconf before */
+ pmtu_probe:1, /* Used by PLPMTUD, can be set in s HB chunk */
tsn_missing_report:2, /* Data chunk missing counter. */
fast_retransmit:2; /* Is this chunk fast retransmitted? */
};
@@ -858,6 +861,7 @@ struct sctp_transport {
* the destination address every heartbeat interval.
*/
unsigned long hbinterval;
+ unsigned long probe_interval;
/* SACK delay timeout */
unsigned long sackdelay;
@@ -934,6 +938,9 @@ struct sctp_transport {
/* Timer to handler reconf chunk rtx */
struct timer_list reconf_timer;
+ /* Timer to send a probe HB packet for PLPMTUD */
+ struct timer_list probe_timer;
+
/* Since we're using per-destination retransmission timers
* (see above), we're also using per-destination "transmitted"
* queues. This probably ought to be a private struct
@@ -976,6 +983,14 @@ struct sctp_transport {
char cacc_saw_newack;
} cacc;
+ struct {
+ __u16 pmtu;
+ __u16 probe_size;
+ __u16 probe_high;
+ __u8 probe_count;
+ __u8 state;
+ } pl; /* plpmtud related */
+
/* 64-bit random number sent with heartbeat. */
__u64 hb_nonce;
@@ -993,6 +1008,7 @@ void sctp_transport_free(struct sctp_transport *);
void sctp_transport_reset_t3_rtx(struct sctp_transport *);
void sctp_transport_reset_hb_timer(struct sctp_transport *);
void sctp_transport_reset_reconf_timer(struct sctp_transport *transport);
+void sctp_transport_reset_probe_timer(struct sctp_transport *transport);
int sctp_transport_hold(struct sctp_transport *);
void sctp_transport_put(struct sctp_transport *);
void sctp_transport_update_rto(struct sctp_transport *, __u32);
@@ -1007,6 +1023,8 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu);
void sctp_transport_immediate_rtx(struct sctp_transport *);
void sctp_transport_dst_release(struct sctp_transport *t);
void sctp_transport_dst_confirm(struct sctp_transport *t);
+void sctp_transport_pl_send(struct sctp_transport *t);
+void sctp_transport_pl_recv(struct sctp_transport *t);
/* This is the structure we use to queue packets as they come into
@@ -1795,6 +1813,7 @@ struct sctp_association {
* will be inherited by all new transports.
*/
unsigned long hbinterval;
+ unsigned long probe_interval;
__be16 encap_port;
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index cb78e7a739da..c4ff1ebd8bcc 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -141,6 +141,7 @@ typedef __s32 sctp_assoc_t;
#define SCTP_EXPOSE_POTENTIALLY_FAILED_STATE 131
#define SCTP_EXPOSE_PF_STATE SCTP_EXPOSE_POTENTIALLY_FAILED_STATE
#define SCTP_REMOTE_UDP_ENCAPS_PORT 132
+#define SCTP_PLPMTUD_PROBE_INTERVAL 133
/* PR-SCTP policies */
#define SCTP_PR_SCTP_NONE 0x0000
@@ -1213,4 +1214,11 @@ enum sctp_sched_type {
SCTP_SS_MAX = SCTP_SS_RR
};
+/* Probe Interval socket option */
+struct sctp_probeinterval {
+ sctp_assoc_t spi_assoc_id;
+ struct sockaddr_storage spi_address;
+ __u32 spi_interval;
+};
+
#endif /* _UAPI_SCTP_H */
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 336df4b36655..be29da09cc7a 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -98,6 +98,7 @@ static struct sctp_association *sctp_association_init(
* sock configured value.
*/
asoc->hbinterval = msecs_to_jiffies(sp->hbinterval);
+ asoc->probe_interval = msecs_to_jiffies(sp->probe_interval);
asoc->encap_port = sp->encap_port;
@@ -625,6 +626,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
* association configured value.
*/
peer->hbinterval = asoc->hbinterval;
+ peer->probe_interval = asoc->probe_interval;
peer->encap_port = asoc->encap_port;
@@ -714,6 +716,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
return NULL;
}
+ sctp_transport_pl_reset(peer);
+
/* Attach the remote transport to our asoc. */
list_add_tail_rcu(&peer->transports, &asoc->peer.transport_addr_list);
asoc->peer.transport_count++;
@@ -812,6 +816,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
spc_state = SCTP_ADDR_CONFIRMED;
transport->state = SCTP_ACTIVE;
+ sctp_transport_pl_reset(transport);
break;
case SCTP_TRANSPORT_DOWN:
@@ -821,6 +826,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
*/
if (transport->state != SCTP_UNCONFIRMED) {
transport->state = SCTP_INACTIVE;
+ sctp_transport_pl_reset(transport);
spc_state = SCTP_ADDR_UNREACHABLE;
} else {
sctp_transport_dst_release(transport);
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index c4d9c7feffb9..ccd773e4c371 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -154,6 +154,7 @@ static const char *const sctp_timer_tbl[] = {
"TIMEOUT_T5_SHUTDOWN_GUARD",
"TIMEOUT_HEARTBEAT",
"TIMEOUT_RECONF",
+ "TIMEOUT_PROBE",
"TIMEOUT_SACK",
"TIMEOUT_AUTOCLOSE",
};
diff --git a/net/sctp/input.c b/net/sctp/input.c
index d508f6f3dd08..fe6429cc012f 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -385,7 +385,9 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb)
void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
struct sctp_transport *t, __u32 pmtu)
{
- if (!t || (t->pathmtu <= pmtu))
+ if (!t ||
+ (t->pathmtu <= pmtu &&
+ t->pl.probe_size + sctp_transport_pl_hlen(t) <= pmtu))
return;
if (sock_owned_by_user(sk)) {
@@ -554,6 +556,49 @@ void sctp_err_finish(struct sock *sk, struct sctp_transport *t)
sctp_transport_put(t);
}
+static void sctp_v4_err_handle(struct sctp_transport *t, struct sk_buff *skb,
+ __u8 type, __u8 code, __u32 info)
+{
+ struct sctp_association *asoc = t->asoc;
+ struct sock *sk = asoc->base.sk;
+ int err = 0;
+
+ switch (type) {
+ case ICMP_PARAMETERPROB:
+ err = EPROTO;
+ break;
+ case ICMP_DEST_UNREACH:
+ if (code > NR_ICMP_UNREACH)
+ return;
+ if (code == ICMP_FRAG_NEEDED) {
+ sctp_icmp_frag_needed(sk, asoc, t, SCTP_TRUNC4(info));
+ return;
+ }
+ if (code == ICMP_PROT_UNREACH) {
+ sctp_icmp_proto_unreachable(sk, asoc, t);
+ return;
+ }
+ err = icmp_err_convert[code].errno;
+ break;
+ case ICMP_TIME_EXCEEDED:
+ if (code == ICMP_EXC_FRAGTIME)
+ return;
+
+ err = EHOSTUNREACH;
+ break;
+ case ICMP_REDIRECT:
+ sctp_icmp_redirect(sk, t, skb);
+ default:
+ return;
+ }
+ if (!sock_owned_by_user(sk) && inet_sk(sk)->recverr) {
+ sk->sk_err = err;
+ sk->sk_error_report(sk);
+ } else { /* Only an error on timeout */
+ sk->sk_err_soft = err;
+ }
+}
+
/*
* This routine is called by the ICMP module when it gets some
* sort of error condition. If err < 0 then the socket should
@@ -572,22 +617,19 @@ void sctp_err_finish(struct sock *sk, struct sctp_transport *t)
int sctp_v4_err(struct sk_buff *skb, __u32 info)
{
const struct iphdr *iph = (const struct iphdr *)skb->data;
- const int ihlen = iph->ihl * 4;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
- struct sock *sk;
- struct sctp_association *asoc = NULL;
+ struct net *net = dev_net(skb->dev);
struct sctp_transport *transport;
- struct inet_sock *inet;
+ struct sctp_association *asoc;
__u16 saveip, savesctp;
- int err;
- struct net *net = dev_net(skb->dev);
+ struct sock *sk;
/* Fix up skb to look at the embedded net header. */
saveip = skb->network_header;
savesctp = skb->transport_header;
skb_reset_network_header(skb);
- skb_set_transport_header(skb, ihlen);
+ skb_set_transport_header(skb, iph->ihl * 4);
sk = sctp_err_lookup(net, AF_INET, skb, sctp_hdr(skb), &asoc, &transport);
/* Put back, the original values. */
skb->network_header = saveip;
@@ -596,59 +638,41 @@ int sctp_v4_err(struct sk_buff *skb, __u32 info)
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return -ENOENT;
}
- /* Warning: The sock lock is held. Remember to call
- * sctp_err_finish!
- */
- switch (type) {
- case ICMP_PARAMETERPROB:
- err = EPROTO;
- break;
- case ICMP_DEST_UNREACH:
- if (code > NR_ICMP_UNREACH)
- goto out_unlock;
+ sctp_v4_err_handle(transport, skb, type, code, info);
+ sctp_err_finish(sk, transport);
- /* PMTU discovery (RFC1191) */
- if (ICMP_FRAG_NEEDED == code) {
- sctp_icmp_frag_needed(sk, asoc, transport,
- SCTP_TRUNC4(info));
- goto out_unlock;
- } else {
- if (ICMP_PROT_UNREACH == code) {
- sctp_icmp_proto_unreachable(sk, asoc,
- transport);
- goto out_unlock;
- }
- }
- err = icmp_err_convert[code].errno;
- break;
- case ICMP_TIME_EXCEEDED:
- /* Ignore any time exceeded errors due to fragment reassembly
- * timeouts.
- */
- if (ICMP_EXC_FRAGTIME == code)
- goto out_unlock;
+ return 0;
+}
- err = EHOSTUNREACH;
- break;
- case ICMP_REDIRECT:
- sctp_icmp_redirect(sk, transport, skb);
- /* Fall through to out_unlock. */
- default:
- goto out_unlock;
+int sctp_udp_v4_err(struct sock *sk, struct sk_buff *skb)
+{
+ struct net *net = dev_net(skb->dev);
+ struct sctp_association *asoc;
+ struct sctp_transport *t;
+ struct icmphdr *hdr;
+ __u32 info = 0;
+
+ skb->transport_header += sizeof(struct udphdr);
+ sk = sctp_err_lookup(net, AF_INET, skb, sctp_hdr(skb), &asoc, &t);
+ if (!sk) {
+ __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
+ return -ENOENT;
}
- inet = inet_sk(sk);
- if (!sock_owned_by_user(sk) && inet->recverr) {
- sk->sk_err = err;
- sk->sk_error_report(sk);
- } else { /* Only an error on timeout */
- sk->sk_err_soft = err;
+ skb->transport_header -= sizeof(struct udphdr);
+ hdr = (struct icmphdr *)(skb_network_header(skb) - sizeof(struct icmphdr));
+ if (hdr->type == ICMP_REDIRECT) {
+ /* can't be handled without outer iphdr known, leave it to udp_err */
+ sctp_err_finish(sk, t);
+ return 0;
}
+ if (hdr->type == ICMP_DEST_UNREACH && hdr->code == ICMP_FRAG_NEEDED)
+ info = ntohs(hdr->un.frag.mtu);
+ sctp_v4_err_handle(t, skb, hdr->type, hdr->code, info);
-out_unlock:
- sctp_err_finish(sk, transport);
- return 0;
+ sctp_err_finish(sk, t);
+ return 1;
}
/*
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index bd08807c9e44..05f81a4d0ee7 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -122,54 +122,28 @@ static struct notifier_block sctp_inet6addr_notifier = {
.notifier_call = sctp_inet6addr_event,
};
-/* ICMP error handler. */
-static int sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- u8 type, u8 code, int offset, __be32 info)
+static void sctp_v6_err_handle(struct sctp_transport *t, struct sk_buff *skb,
+ __u8 type, __u8 code, __u32 info)
{
- struct inet6_dev *idev;
- struct sock *sk;
- struct sctp_association *asoc;
- struct sctp_transport *transport;
+ struct sctp_association *asoc = t->asoc;
+ struct sock *sk = asoc->base.sk;
struct ipv6_pinfo *np;
- __u16 saveip, savesctp;
- int err, ret = 0;
- struct net *net = dev_net(skb->dev);
-
- idev = in6_dev_get(skb->dev);
-
- /* Fix up skb to look at the embedded net header. */
- saveip = skb->network_header;
- savesctp = skb->transport_header;
- skb_reset_network_header(skb);
- skb_set_transport_header(skb, offset);
- sk = sctp_err_lookup(net, AF_INET6, skb, sctp_hdr(skb), &asoc, &transport);
- /* Put back, the original pointers. */
- skb->network_header = saveip;
- skb->transport_header = savesctp;
- if (!sk) {
- __ICMP6_INC_STATS(net, idev, ICMP6_MIB_INERRORS);
- ret = -ENOENT;
- goto out;
- }
-
- /* Warning: The sock lock is held. Remember to call
- * sctp_err_finish!
- */
+ int err = 0;
switch (type) {
case ICMPV6_PKT_TOOBIG:
if (ip6_sk_accept_pmtu(sk))
- sctp_icmp_frag_needed(sk, asoc, transport, ntohl(info));
- goto out_unlock;
+ sctp_icmp_frag_needed(sk, asoc, t, info);
+ return;
case ICMPV6_PARAMPROB:
if (ICMPV6_UNK_NEXTHDR == code) {
- sctp_icmp_proto_unreachable(sk, asoc, transport);
- goto out_unlock;
+ sctp_icmp_proto_unreachable(sk, asoc, t);
+ return;
}
break;
case NDISC_REDIRECT:
- sctp_icmp_redirect(sk, transport, skb);
- goto out_unlock;
+ sctp_icmp_redirect(sk, t, skb);
+ return;
default:
break;
}
@@ -179,17 +153,69 @@ static int sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (!sock_owned_by_user(sk) && np->recverr) {
sk->sk_err = err;
sk->sk_error_report(sk);
- } else { /* Only an error on timeout */
+ } else {
sk->sk_err_soft = err;
}
+}
+
+/* ICMP error handler. */
+static int sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct net *net = dev_net(skb->dev);
+ struct sctp_transport *transport;
+ struct sctp_association *asoc;
+ __u16 saveip, savesctp;
+ struct sock *sk;
+
+ /* Fix up skb to look at the embedded net header. */
+ saveip = skb->network_header;
+ savesctp = skb->transport_header;
+ skb_reset_network_header(skb);
+ skb_set_transport_header(skb, offset);
+ sk = sctp_err_lookup(net, AF_INET6, skb, sctp_hdr(skb), &asoc, &transport);
+ /* Put back, the original pointers. */
+ skb->network_header = saveip;
+ skb->transport_header = savesctp;
+ if (!sk) {
+ __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
+ return -ENOENT;
+ }
-out_unlock:
+ sctp_v6_err_handle(transport, skb, type, code, ntohl(info));
sctp_err_finish(sk, transport);
-out:
- if (likely(idev != NULL))
- in6_dev_put(idev);
- return ret;
+ return 0;
+}
+
+int sctp_udp_v6_err(struct sock *sk, struct sk_buff *skb)
+{
+ struct net *net = dev_net(skb->dev);
+ struct sctp_association *asoc;
+ struct sctp_transport *t;
+ struct icmp6hdr *hdr;
+ __u32 info = 0;
+
+ skb->transport_header += sizeof(struct udphdr);
+ sk = sctp_err_lookup(net, AF_INET6, skb, sctp_hdr(skb), &asoc, &t);
+ if (!sk) {
+ __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
+ return -ENOENT;
+ }
+
+ skb->transport_header -= sizeof(struct udphdr);
+ hdr = (struct icmp6hdr *)(skb_network_header(skb) - sizeof(struct icmp6hdr));
+ if (hdr->icmp6_type == NDISC_REDIRECT) {
+ /* can't be handled without outer ip6hdr known, leave it to udpv6_err */
+ sctp_err_finish(sk, t);
+ return 0;
+ }
+ if (hdr->icmp6_type == ICMPV6_PKT_TOOBIG)
+ info = ntohl(hdr->icmp6_mtu);
+ sctp_v6_err_handle(t, skb, hdr->icmp6_type, hdr->icmp6_code, info);
+
+ sctp_err_finish(sk, t);
+ return 1;
}
static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *t)
diff --git a/net/sctp/output.c b/net/sctp/output.c
index a6aa17df09ef..9032ce60d50e 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -103,7 +103,8 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
sctp_transport_route(tp, NULL, sp);
if (asoc->param_flags & SPP_PMTUD_ENABLE)
sctp_assoc_sync_pmtu(asoc);
- } else if (!sctp_transport_pmtu_check(tp)) {
+ } else if (!sctp_transport_pl_enabled(tp) &&
+ !sctp_transport_pmtu_check(tp)) {
if (asoc->param_flags & SPP_PMTUD_ENABLE)
sctp_assoc_sync_pmtu(asoc);
}
@@ -211,6 +212,30 @@ enum sctp_xmit sctp_packet_transmit_chunk(struct sctp_packet *packet,
return retval;
}
+/* Try to bundle a pad chunk into a packet with a heartbeat chunk for PLPMTUTD probe */
+static enum sctp_xmit sctp_packet_bundle_pad(struct sctp_packet *pkt, struct sctp_chunk *chunk)
+{
+ struct sctp_transport *t = pkt->transport;
+ struct sctp_chunk *pad;
+ int overhead = 0;
+
+ if (!chunk->pmtu_probe)
+ return SCTP_XMIT_OK;
+
+ /* calculate the Padding Data size for the pad chunk */
+ overhead += sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+ overhead += sizeof(struct sctp_sender_hb_info) + sizeof(struct sctp_pad_chunk);
+ pad = sctp_make_pad(t->asoc, t->pl.probe_size - overhead);
+ if (!pad)
+ return SCTP_XMIT_DELAY;
+
+ list_add_tail(&pad->list, &pkt->chunk_list);
+ pkt->size += SCTP_PAD4(ntohs(pad->chunk_hdr->length));
+ chunk->transport = t;
+
+ return SCTP_XMIT_OK;
+}
+
/* Try to bundle an auth chunk into the packet. */
static enum sctp_xmit sctp_packet_bundle_auth(struct sctp_packet *pkt,
struct sctp_chunk *chunk)
@@ -382,6 +407,10 @@ enum sctp_xmit sctp_packet_append_chunk(struct sctp_packet *packet,
goto finish;
retval = __sctp_packet_append_chunk(packet, chunk);
+ if (retval != SCTP_XMIT_OK)
+ goto finish;
+
+ retval = sctp_packet_bundle_pad(packet, chunk);
finish:
return retval;
@@ -553,7 +582,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
sk = chunk->skb->sk;
/* check gso */
- if (packet->size > tp->pathmtu && !packet->ipfragok) {
+ if (packet->size > tp->pathmtu && !packet->ipfragok && !chunk->pmtu_probe) {
if (!sk_can_gso(sk)) {
pr_err_once("Trying to GSO but underlying device doesn't support it.");
goto out;
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 5cb1aa5f067b..ff47091c385e 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -769,7 +769,11 @@ static int sctp_packet_singleton(struct sctp_transport *transport,
sctp_packet_init(&singleton, transport, sport, dport);
sctp_packet_config(&singleton, vtag, 0);
- sctp_packet_append_chunk(&singleton, chunk);
+ if (sctp_packet_append_chunk(&singleton, chunk) != SCTP_XMIT_OK) {
+ list_del_init(&chunk->list);
+ sctp_chunk_free(chunk);
+ return -ENOMEM;
+ }
return sctp_packet_transmit(&singleton, gfp);
}
@@ -929,8 +933,13 @@ static void sctp_outq_flush_ctrl(struct sctp_flush_ctx *ctx)
one_packet = 1;
fallthrough;
- case SCTP_CID_SACK:
case SCTP_CID_HEARTBEAT:
+ if (chunk->pmtu_probe) {
+ sctp_packet_singleton(ctx->transport, chunk, ctx->gfp);
+ break;
+ }
+ fallthrough;
+ case SCTP_CID_SACK:
case SCTP_CID_SHUTDOWN:
case SCTP_CID_ECN_ECNE:
case SCTP_CID_ASCONF:
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index baa4e770e4ba..bc5db0b404ce 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -850,23 +850,6 @@ static int sctp_udp_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
}
-static int sctp_udp_err_lookup(struct sock *sk, struct sk_buff *skb)
-{
- struct sctp_association *asoc;
- struct sctp_transport *t;
- int family;
-
- skb->transport_header += sizeof(struct udphdr);
- family = (ip_hdr(skb)->version == 4) ? AF_INET : AF_INET6;
- sk = sctp_err_lookup(dev_net(skb->dev), family, skb, sctp_hdr(skb),
- &asoc, &t);
- if (!sk)
- return -ENOENT;
-
- sctp_err_finish(sk, t);
- return 0;
-}
-
int sctp_udp_sock_start(struct net *net)
{
struct udp_tunnel_sock_cfg tuncfg = {NULL};
@@ -885,7 +868,7 @@ int sctp_udp_sock_start(struct net *net)
tuncfg.encap_type = 1;
tuncfg.encap_rcv = sctp_udp_rcv;
- tuncfg.encap_err_lookup = sctp_udp_err_lookup;
+ tuncfg.encap_err_lookup = sctp_udp_v4_err;
setup_udp_tunnel_sock(net, sock, &tuncfg);
net->sctp.udp4_sock = sock->sk;
@@ -907,7 +890,7 @@ int sctp_udp_sock_start(struct net *net)
tuncfg.encap_type = 1;
tuncfg.encap_rcv = sctp_udp_rcv;
- tuncfg.encap_err_lookup = sctp_udp_err_lookup;
+ tuncfg.encap_err_lookup = sctp_udp_v6_err;
setup_udp_tunnel_sock(net, sock, &tuncfg);
net->sctp.udp6_sock = sock->sk;
#endif
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 5b44d228b6ca..b0eaa93a9cc6 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1160,7 +1160,8 @@ nodata:
/* Make a HEARTBEAT chunk. */
struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
- const struct sctp_transport *transport)
+ const struct sctp_transport *transport,
+ __u32 probe_size)
{
struct sctp_sender_hb_info hbinfo;
struct sctp_chunk *retval;
@@ -1176,6 +1177,7 @@ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
hbinfo.daddr = transport->ipaddr;
hbinfo.sent_at = jiffies;
hbinfo.hb_nonce = transport->hb_nonce;
+ hbinfo.probe_size = probe_size;
/* Cast away the 'const', as this is just telling the chunk
* what transport it belongs to.
@@ -1183,6 +1185,7 @@ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
retval->transport = (struct sctp_transport *) transport;
retval->subh.hbs_hdr = sctp_addto_chunk(retval, sizeof(hbinfo),
&hbinfo);
+ retval->pmtu_probe = !!probe_size;
nodata:
return retval;
@@ -1218,6 +1221,32 @@ nodata:
return retval;
}
+/* RFC4820 3. Padding Chunk (PAD)
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Type = 0x84 | Flags=0 | Length |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | |
+ * \ Padding Data /
+ * / \
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct sctp_chunk *sctp_make_pad(const struct sctp_association *asoc, int len)
+{
+ struct sctp_chunk *retval;
+
+ retval = sctp_make_control(asoc, SCTP_CID_PAD, 0, len, GFP_ATOMIC);
+ if (!retval)
+ return NULL;
+
+ skb_put_zero(retval->skb, len);
+ retval->chunk_hdr->length = htons(ntohs(retval->chunk_hdr->length) + len);
+ retval->chunk_end = skb_tail_pointer(retval->skb);
+
+ return retval;
+}
+
/* Create an Operation Error chunk with the specified space reserved.
* This routine can be used for containing multiple causes in the chunk.
*/
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index ce15d590a615..b3815b568e8e 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -471,6 +471,38 @@ out_unlock:
sctp_transport_put(transport);
}
+/* Handle the timeout of the probe timer. */
+void sctp_generate_probe_event(struct timer_list *t)
+{
+ struct sctp_transport *transport = from_timer(transport, t, probe_timer);
+ struct sctp_association *asoc = transport->asoc;
+ struct sock *sk = asoc->base.sk;
+ struct net *net = sock_net(sk);
+ int error = 0;
+
+ bh_lock_sock(sk);
+ if (sock_owned_by_user(sk)) {
+ pr_debug("%s: sock is busy\n", __func__);
+
+ /* Try again later. */
+ if (!mod_timer(&transport->probe_timer, jiffies + (HZ / 20)))
+ sctp_transport_hold(transport);
+ goto out_unlock;
+ }
+
+ error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
+ SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_PROBE),
+ asoc->state, asoc->ep, asoc,
+ transport, GFP_ATOMIC);
+
+ if (error)
+ sk->sk_err = -error;
+
+out_unlock:
+ bh_unlock_sock(sk);
+ sctp_transport_put(transport);
+}
+
/* Inject a SACK Timeout event into the state machine. */
static void sctp_generate_sack_event(struct timer_list *t)
{
@@ -1641,6 +1673,11 @@ static int sctp_cmd_interpreter(enum sctp_event_type event_type,
sctp_cmd_hb_timers_stop(commands, asoc);
break;
+ case SCTP_CMD_PROBE_TIMER_UPDATE:
+ t = cmd->obj.transport;
+ sctp_transport_reset_probe_timer(t);
+ break;
+
case SCTP_CMD_REPORT_ERROR:
error = cmd->obj.error;
break;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 4f30388a0dd0..d29b579da904 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1004,7 +1004,7 @@ static enum sctp_disposition sctp_sf_heartbeat(
struct sctp_chunk *reply;
/* Send a heartbeat to our peer. */
- reply = sctp_make_heartbeat(asoc, transport);
+ reply = sctp_make_heartbeat(asoc, transport, 0);
if (!reply)
return SCTP_DISPOSITION_NOMEM;
@@ -1095,6 +1095,32 @@ enum sctp_disposition sctp_sf_send_reconf(struct net *net,
return SCTP_DISPOSITION_CONSUME;
}
+/* send hb chunk with padding for PLPMUTD. */
+enum sctp_disposition sctp_sf_send_probe(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
+{
+ struct sctp_transport *transport = (struct sctp_transport *)arg;
+ struct sctp_chunk *reply;
+
+ if (!sctp_transport_pl_enabled(transport))
+ return SCTP_DISPOSITION_CONSUME;
+
+ sctp_transport_pl_send(transport);
+
+ reply = sctp_make_heartbeat(asoc, transport, transport->pl.probe_size);
+ if (!reply)
+ return SCTP_DISPOSITION_NOMEM;
+ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply));
+ sctp_add_cmd_sf(commands, SCTP_CMD_PROBE_TIMER_UPDATE,
+ SCTP_TRANSPORT(transport));
+
+ return SCTP_DISPOSITION_CONSUME;
+}
+
/*
* Process an heartbeat request.
*
@@ -1243,6 +1269,15 @@ enum sctp_disposition sctp_sf_backbeat_8_3(struct net *net,
if (hbinfo->hb_nonce != link->hb_nonce)
return SCTP_DISPOSITION_DISCARD;
+ if (hbinfo->probe_size) {
+ if (hbinfo->probe_size != link->pl.probe_size ||
+ !sctp_transport_pl_enabled(link))
+ return SCTP_DISPOSITION_DISCARD;
+
+ sctp_transport_pl_recv(link);
+ return SCTP_DISPOSITION_CONSUME;
+ }
+
max_interval = link->hbinterval + link->rto;
/* Check if the timestamp looks valid. */
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 88ea87f4f0e7..1816a4410b2b 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -527,6 +527,26 @@ auth_chunk_event_table[SCTP_NUM_AUTH_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
}; /*state_fn_t auth_chunk_event_table[][] */
static const struct sctp_sm_table_entry
+pad_chunk_event_table[SCTP_STATE_NUM_STATES] = {
+ /* SCTP_STATE_CLOSED */
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+ /* SCTP_STATE_COOKIE_WAIT */
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+ /* SCTP_STATE_COOKIE_ECHOED */
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+ /* SCTP_STATE_ESTABLISHED */
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+ /* SCTP_STATE_SHUTDOWN_PENDING */
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+ /* SCTP_STATE_SHUTDOWN_SENT */
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+ /* SCTP_STATE_SHUTDOWN_RECEIVED */
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+ /* SCTP_STATE_SHUTDOWN_ACK_SENT */
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+}; /* chunk pad */
+
+static const struct sctp_sm_table_entry
chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = {
/* SCTP_STATE_CLOSED */
TYPE_SCTP_FUNC(sctp_sf_ootb),
@@ -947,6 +967,25 @@ other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES] = {
TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
}
+#define TYPE_SCTP_EVENT_TIMEOUT_PROBE { \
+ /* SCTP_STATE_CLOSED */ \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
+ /* SCTP_STATE_COOKIE_WAIT */ \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
+ /* SCTP_STATE_COOKIE_ECHOED */ \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
+ /* SCTP_STATE_ESTABLISHED */ \
+ TYPE_SCTP_FUNC(sctp_sf_send_probe), \
+ /* SCTP_STATE_SHUTDOWN_PENDING */ \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
+ /* SCTP_STATE_SHUTDOWN_SENT */ \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
+ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
+ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
+}
+
static const struct sctp_sm_table_entry
timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES] = {
TYPE_SCTP_EVENT_TIMEOUT_NONE,
@@ -958,6 +997,7 @@ timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES] = {
TYPE_SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD,
TYPE_SCTP_EVENT_TIMEOUT_HEARTBEAT,
TYPE_SCTP_EVENT_TIMEOUT_RECONF,
+ TYPE_SCTP_EVENT_TIMEOUT_PROBE,
TYPE_SCTP_EVENT_TIMEOUT_SACK,
TYPE_SCTP_EVENT_TIMEOUT_AUTOCLOSE,
};
@@ -992,6 +1032,9 @@ static const struct sctp_sm_table_entry *sctp_chunk_event_lookup(
case SCTP_CID_AUTH:
return &auth_chunk_event_table[0][state];
+
+ case SCTP_CID_PAD:
+ return &pad_chunk_event_table[state];
}
return &chunk_event_table_unknown[state];
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index a79d193ff872..e64e01f61b11 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2496,6 +2496,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
sctp_transport_pmtu(trans, sctp_opt2sk(sp));
sctp_assoc_sync_pmtu(asoc);
}
+ sctp_transport_pl_reset(trans);
} else if (asoc) {
asoc->param_flags =
(asoc->param_flags & ~SPP_PMTUD) | pmtud_change;
@@ -4481,6 +4482,61 @@ static int sctp_setsockopt_encap_port(struct sock *sk,
return 0;
}
+static int sctp_setsockopt_probe_interval(struct sock *sk,
+ struct sctp_probeinterval *params,
+ unsigned int optlen)
+{
+ struct sctp_association *asoc;
+ struct sctp_transport *t;
+ __u32 probe_interval;
+
+ if (optlen != sizeof(*params))
+ return -EINVAL;
+
+ probe_interval = params->spi_interval;
+ if (probe_interval && probe_interval < SCTP_PROBE_TIMER_MIN)
+ return -EINVAL;
+
+ /* If an address other than INADDR_ANY is specified, and
+ * no transport is found, then the request is invalid.
+ */
+ if (!sctp_is_any(sk, (union sctp_addr *)&params->spi_address)) {
+ t = sctp_addr_id2transport(sk, &params->spi_address,
+ params->spi_assoc_id);
+ if (!t)
+ return -EINVAL;
+
+ t->probe_interval = msecs_to_jiffies(probe_interval);
+ sctp_transport_pl_reset(t);
+ return 0;
+ }
+
+ /* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the
+ * socket is a one to many style socket, and an association
+ * was not found, then the id was invalid.
+ */
+ asoc = sctp_id2assoc(sk, params->spi_assoc_id);
+ if (!asoc && params->spi_assoc_id != SCTP_FUTURE_ASSOC &&
+ sctp_style(sk, UDP))
+ return -EINVAL;
+
+ /* If changes are for association, also apply probe_interval to
+ * each transport.
+ */
+ if (asoc) {
+ list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) {
+ t->probe_interval = msecs_to_jiffies(probe_interval);
+ sctp_transport_pl_reset(t);
+ }
+
+ asoc->probe_interval = msecs_to_jiffies(probe_interval);
+ return 0;
+ }
+
+ sctp_sk(sk)->probe_interval = probe_interval;
+ return 0;
+}
+
/* API 6.2 setsockopt(), getsockopt()
*
* Applications use setsockopt() and getsockopt() to set or retrieve
@@ -4703,6 +4759,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
case SCTP_REMOTE_UDP_ENCAPS_PORT:
retval = sctp_setsockopt_encap_port(sk, kopt, optlen);
break;
+ case SCTP_PLPMTUD_PROBE_INTERVAL:
+ retval = sctp_setsockopt_probe_interval(sk, kopt, optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
@@ -4989,6 +5048,7 @@ static int sctp_init_sock(struct sock *sk)
atomic_set(&sp->pd_mode, 0);
skb_queue_head_init(&sp->pd_lobby);
sp->frag_interleave = 0;
+ sp->probe_interval = net->sctp.probe_interval;
/* Create a per socket endpoint structure. Even if we
* change the data structure relationships, this may still
@@ -7905,6 +7965,66 @@ out:
return 0;
}
+static int sctp_getsockopt_probe_interval(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_probeinterval params;
+ struct sctp_association *asoc;
+ struct sctp_transport *t;
+ __u32 probe_interval;
+
+ if (len < sizeof(params))
+ return -EINVAL;
+
+ len = sizeof(params);
+ if (copy_from_user(&params, optval, len))
+ return -EFAULT;
+
+ /* If an address other than INADDR_ANY is specified, and
+ * no transport is found, then the request is invalid.
+ */
+ if (!sctp_is_any(sk, (union sctp_addr *)&params.spi_address)) {
+ t = sctp_addr_id2transport(sk, &params.spi_address,
+ params.spi_assoc_id);
+ if (!t) {
+ pr_debug("%s: failed no transport\n", __func__);
+ return -EINVAL;
+ }
+
+ probe_interval = jiffies_to_msecs(t->probe_interval);
+ goto out;
+ }
+
+ /* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the
+ * socket is a one to many style socket, and an association
+ * was not found, then the id was invalid.
+ */
+ asoc = sctp_id2assoc(sk, params.spi_assoc_id);
+ if (!asoc && params.spi_assoc_id != SCTP_FUTURE_ASSOC &&
+ sctp_style(sk, UDP)) {
+ pr_debug("%s: failed no association\n", __func__);
+ return -EINVAL;
+ }
+
+ if (asoc) {
+ probe_interval = jiffies_to_msecs(asoc->probe_interval);
+ goto out;
+ }
+
+ probe_interval = sctp_sk(sk)->probe_interval;
+
+out:
+ params.spi_interval = probe_interval;
+ if (copy_to_user(optval, &params, len))
+ return -EFAULT;
+
+ if (put_user(len, optlen))
+ return -EFAULT;
+
+ return 0;
+}
+
static int sctp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
@@ -8128,6 +8248,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
case SCTP_REMOTE_UDP_ENCAPS_PORT:
retval = sctp_getsockopt_encap_port(sk, len, optval, optlen);
break;
+ case SCTP_PLPMTUD_PROBE_INTERVAL:
+ retval = sctp_getsockopt_probe_interval(sk, len, optval, optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 55871b277f47..b46a416787ec 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -55,6 +55,8 @@ static int proc_sctp_do_alpha_beta(struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos);
static int proc_sctp_do_auth(struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos);
+static int proc_sctp_do_probe_interval(struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *ppos);
static struct ctl_table sctp_table[] = {
{
@@ -294,6 +296,13 @@ static struct ctl_table sctp_net_table[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "plpmtud_probe_interval",
+ .data = &init_net.sctp.probe_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_sctp_do_probe_interval,
+ },
+ {
.procname = "udp_port",
.data = &init_net.sctp.udp_port,
.maxlen = sizeof(int),
@@ -539,6 +548,32 @@ static int proc_sctp_do_udp_port(struct ctl_table *ctl, int write,
return ret;
}
+static int proc_sctp_do_probe_interval(struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct net *net = current->nsproxy->net_ns;
+ struct ctl_table tbl;
+ int ret, new_value;
+
+ memset(&tbl, 0, sizeof(struct ctl_table));
+ tbl.maxlen = sizeof(unsigned int);
+
+ if (write)
+ tbl.data = &new_value;
+ else
+ tbl.data = &net->sctp.probe_interval;
+
+ ret = proc_dointvec(&tbl, write, buffer, lenp, ppos);
+ if (write && ret == 0) {
+ if (new_value && new_value < SCTP_PROBE_TIMER_MIN)
+ return -EINVAL;
+
+ net->sctp.probe_interval = new_value;
+ }
+
+ return ret;
+}
+
int sctp_sysctl_net_register(struct net *net)
{
struct ctl_table *table;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index bf0ac467e757..f27b856ea8ce 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -75,6 +75,7 @@ static struct sctp_transport *sctp_transport_init(struct net *net,
timer_setup(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event, 0);
timer_setup(&peer->hb_timer, sctp_generate_heartbeat_event, 0);
timer_setup(&peer->reconf_timer, sctp_generate_reconf_event, 0);
+ timer_setup(&peer->probe_timer, sctp_generate_probe_event, 0);
timer_setup(&peer->proto_unreach_timer,
sctp_generate_proto_unreach_event, 0);
@@ -131,6 +132,9 @@ void sctp_transport_free(struct sctp_transport *transport)
if (del_timer(&transport->reconf_timer))
sctp_transport_put(transport);
+ if (del_timer(&transport->probe_timer))
+ sctp_transport_put(transport);
+
/* Delete the ICMP proto unreachable timer if it's active. */
if (del_timer(&transport->proto_unreach_timer))
sctp_transport_put(transport);
@@ -207,6 +211,20 @@ void sctp_transport_reset_reconf_timer(struct sctp_transport *transport)
sctp_transport_hold(transport);
}
+void sctp_transport_reset_probe_timer(struct sctp_transport *transport)
+{
+ int scale = 1;
+
+ if (timer_pending(&transport->probe_timer))
+ return;
+ if (transport->pl.state == SCTP_PL_COMPLETE &&
+ transport->pl.probe_count == 1)
+ scale = 30; /* works as PMTU_RAISE_TIMER */
+ if (!mod_timer(&transport->probe_timer,
+ jiffies + transport->probe_interval * scale))
+ sctp_transport_hold(transport);
+}
+
/* This transport has been assigned to an association.
* Initialize fields from the association or from the sock itself.
* Register the reference count in the association.
@@ -241,12 +259,141 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
transport->pathmtu = sctp_dst_mtu(transport->dst);
else
transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
+
+ sctp_transport_pl_update(transport);
+}
+
+void sctp_transport_pl_send(struct sctp_transport *t)
+{
+ pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n",
+ __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high);
+
+ if (t->pl.probe_count < SCTP_MAX_PROBES) {
+ t->pl.probe_count++;
+ return;
+ }
+
+ if (t->pl.state == SCTP_PL_BASE) {
+ if (t->pl.probe_size == SCTP_BASE_PLPMTU) { /* BASE_PLPMTU Confirmation Failed */
+ t->pl.state = SCTP_PL_ERROR; /* Base -> Error */
+
+ t->pl.pmtu = SCTP_MIN_PLPMTU;
+ t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+ sctp_assoc_sync_pmtu(t->asoc);
+ }
+ } else if (t->pl.state == SCTP_PL_SEARCH) {
+ if (t->pl.pmtu == t->pl.probe_size) { /* Black Hole Detected */
+ t->pl.state = SCTP_PL_BASE; /* Search -> Base */
+ t->pl.probe_size = SCTP_BASE_PLPMTU;
+ t->pl.probe_high = 0;
+
+ t->pl.pmtu = SCTP_BASE_PLPMTU;
+ t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+ sctp_assoc_sync_pmtu(t->asoc);
+ } else { /* Normal probe failure. */
+ t->pl.probe_high = t->pl.probe_size;
+ t->pl.probe_size = t->pl.pmtu;
+ }
+ } else if (t->pl.state == SCTP_PL_COMPLETE) {
+ if (t->pl.pmtu == t->pl.probe_size) { /* Black Hole Detected */
+ t->pl.state = SCTP_PL_BASE; /* Search Complete -> Base */
+ t->pl.probe_size = SCTP_BASE_PLPMTU;
+
+ t->pl.pmtu = SCTP_BASE_PLPMTU;
+ t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+ sctp_assoc_sync_pmtu(t->asoc);
+ }
+ }
+ t->pl.probe_count = 1;
+}
+
+void sctp_transport_pl_recv(struct sctp_transport *t)
+{
+ pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n",
+ __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high);
+
+ t->pl.pmtu = t->pl.probe_size;
+ t->pl.probe_count = 0;
+ if (t->pl.state == SCTP_PL_BASE) {
+ t->pl.state = SCTP_PL_SEARCH; /* Base -> Search */
+ t->pl.probe_size += SCTP_PL_BIG_STEP;
+ } else if (t->pl.state == SCTP_PL_ERROR) {
+ t->pl.state = SCTP_PL_SEARCH; /* Error -> Search */
+
+ t->pl.pmtu = t->pl.probe_size;
+ t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+ sctp_assoc_sync_pmtu(t->asoc);
+ t->pl.probe_size += SCTP_PL_BIG_STEP;
+ } else if (t->pl.state == SCTP_PL_SEARCH) {
+ if (!t->pl.probe_high) {
+ t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_BIG_STEP,
+ SCTP_MAX_PLPMTU);
+ return;
+ }
+ t->pl.probe_size += SCTP_PL_MIN_STEP;
+ if (t->pl.probe_size >= t->pl.probe_high) {
+ t->pl.probe_high = 0;
+ t->pl.state = SCTP_PL_COMPLETE; /* Search -> Search Complete */
+
+ t->pl.probe_size = t->pl.pmtu;
+ t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+ sctp_assoc_sync_pmtu(t->asoc);
+ }
+ } else if (t->pl.state == SCTP_PL_COMPLETE) {
+ t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */
+ t->pl.probe_size += SCTP_PL_MIN_STEP;
+ }
+}
+
+static bool sctp_transport_pl_toobig(struct sctp_transport *t, u32 pmtu)
+{
+ pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, ptb: %d\n",
+ __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, pmtu);
+
+ if (pmtu < SCTP_MIN_PLPMTU || pmtu >= t->pl.probe_size)
+ return false;
+
+ if (t->pl.state == SCTP_PL_BASE) {
+ if (pmtu >= SCTP_MIN_PLPMTU && pmtu < SCTP_BASE_PLPMTU) {
+ t->pl.state = SCTP_PL_ERROR; /* Base -> Error */
+
+ t->pl.pmtu = SCTP_MIN_PLPMTU;
+ t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+ }
+ } else if (t->pl.state == SCTP_PL_SEARCH) {
+ if (pmtu >= SCTP_BASE_PLPMTU && pmtu < t->pl.pmtu) {
+ t->pl.state = SCTP_PL_BASE; /* Search -> Base */
+ t->pl.probe_size = SCTP_BASE_PLPMTU;
+ t->pl.probe_count = 0;
+
+ t->pl.probe_high = 0;
+ t->pl.pmtu = SCTP_BASE_PLPMTU;
+ t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+ } else if (pmtu > t->pl.pmtu && pmtu < t->pl.probe_size) {
+ t->pl.probe_size = pmtu;
+ t->pl.probe_count = 0;
+
+ return false;
+ }
+ } else if (t->pl.state == SCTP_PL_COMPLETE) {
+ if (pmtu >= SCTP_BASE_PLPMTU && pmtu < t->pl.pmtu) {
+ t->pl.state = SCTP_PL_BASE; /* Complete -> Base */
+ t->pl.probe_size = SCTP_BASE_PLPMTU;
+ t->pl.probe_count = 0;
+
+ t->pl.probe_high = 0;
+ t->pl.pmtu = SCTP_BASE_PLPMTU;
+ t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+ }
+ }
+
+ return true;
}
bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
{
- struct dst_entry *dst = sctp_transport_dst_check(t);
struct sock *sk = t->asoc->base.sk;
+ struct dst_entry *dst;
bool change = true;
if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
@@ -257,6 +404,10 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
}
pmtu = SCTP_TRUNC4(pmtu);
+ if (sctp_transport_pl_enabled(t))
+ return sctp_transport_pl_toobig(t, pmtu - sctp_transport_pl_hlen(t));
+
+ dst = sctp_transport_dst_check(t);
if (dst) {
struct sctp_pf *pf = sctp_get_pf_specific(dst->ops->family);
union sctp_addr addr;