diff options
Diffstat (limited to 'net/tipc/socket.c')
| -rw-r--r-- | net/tipc/socket.c | 1271 | 
1 files changed, 1036 insertions, 235 deletions
diff --git a/net/tipc/socket.c b/net/tipc/socket.c index ff8c8118d56e..4731cad99d1c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -35,17 +35,67 @@   */  #include "core.h" -#include "port.h"  #include "name_table.h"  #include "node.h"  #include "link.h"  #include <linux/export.h> +#include "config.h" +#include "socket.h"  #define SS_LISTENING	-1	/* socket is listening */  #define SS_READY	-2	/* socket is connectionless */ -#define CONN_TIMEOUT_DEFAULT	8000	/* default connect timeout = 8s */ -#define TIPC_FWD_MSG	        1 +#define CONN_TIMEOUT_DEFAULT  8000	/* default connect timeout = 8s */ +#define CONN_PROBING_INTERVAL 3600000	/* [ms] => 1 h */ +#define TIPC_FWD_MSG	      1 +#define TIPC_CONN_OK          0 +#define TIPC_CONN_PROBING     1 + +/** + * struct tipc_sock - TIPC socket structure + * @sk: socket - interacts with 'port' and with user via the socket API + * @connected: non-zero if port is currently connected to a peer port + * @conn_type: TIPC type used when connection was established + * @conn_instance: TIPC instance used when connection was established + * @published: non-zero if port has one or more associated names + * @max_pkt: maximum packet size "hint" used when building messages sent by port + * @ref: unique reference to port in TIPC object registry + * @phdr: preformatted message header used when sending messages + * @port_list: adjacent ports in TIPC's global list of ports + * @publications: list of publications for port + * @pub_count: total # of publications port has made during its lifetime + * @probing_state: + * @probing_interval: + * @timer: + * @port: port - interacts with 'sk' and with the rest of the TIPC stack + * @peer_name: the peer of the connection, if any + * @conn_timeout: the time we can wait for an unresponded setup request + * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue + * @link_cong: non-zero if owner must sleep because of link congestion + * @sent_unacked: # messages sent by socket, and not yet acked by peer + * @rcv_unacked: # messages read by user, but not yet acked back to peer + */ +struct tipc_sock { +	struct sock sk; +	int connected; +	u32 conn_type; +	u32 conn_instance; +	int published; +	u32 max_pkt; +	u32 ref; +	struct tipc_msg phdr; +	struct list_head sock_list; +	struct list_head publications; +	u32 pub_count; +	u32 probing_state; +	u32 probing_interval; +	struct timer_list timer; +	uint conn_timeout; +	atomic_t dupl_rcvcnt; +	bool link_cong; +	uint sent_unacked; +	uint rcv_unacked; +};  static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb);  static void tipc_data_ready(struct sock *sk); @@ -53,6 +103,16 @@ static void tipc_write_space(struct sock *sk);  static int tipc_release(struct socket *sock);  static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags);  static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p); +static void tipc_sk_timeout(unsigned long ref); +static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, +			   struct tipc_name_seq const *seq); +static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, +			    struct tipc_name_seq const *seq); +static u32 tipc_sk_ref_acquire(struct tipc_sock *tsk); +static void tipc_sk_ref_discard(u32 ref); +static struct tipc_sock *tipc_sk_get(u32 ref); +static struct tipc_sock *tipc_sk_get_next(u32 *ref); +static void tipc_sk_put(struct tipc_sock *tsk);  static const struct proto_ops packet_ops;  static const struct proto_ops stream_ops; @@ -61,6 +121,14 @@ static const struct proto_ops msg_ops;  static struct proto tipc_proto;  static struct proto tipc_proto_kern; +static const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = { +	[TIPC_NLA_SOCK_UNSPEC]		= { .type = NLA_UNSPEC }, +	[TIPC_NLA_SOCK_ADDR]		= { .type = NLA_U32 }, +	[TIPC_NLA_SOCK_REF]		= { .type = NLA_U32 }, +	[TIPC_NLA_SOCK_CON]		= { .type = NLA_NESTED }, +	[TIPC_NLA_SOCK_HAS_PUBL]	= { .type = NLA_FLAG } +}; +  /*   * Revised TIPC socket locking policy:   * @@ -106,34 +174,117 @@ static struct proto tipc_proto_kern;   *   - port reference   */ -#include "socket.h" +static u32 tsk_peer_node(struct tipc_sock *tsk) +{ +	return msg_destnode(&tsk->phdr); +} + +static u32 tsk_peer_port(struct tipc_sock *tsk) +{ +	return msg_destport(&tsk->phdr); +} + +static  bool tsk_unreliable(struct tipc_sock *tsk) +{ +	return msg_src_droppable(&tsk->phdr) != 0; +} + +static void tsk_set_unreliable(struct tipc_sock *tsk, bool unreliable) +{ +	msg_set_src_droppable(&tsk->phdr, unreliable ? 1 : 0); +} + +static bool tsk_unreturnable(struct tipc_sock *tsk) +{ +	return msg_dest_droppable(&tsk->phdr) != 0; +} + +static void tsk_set_unreturnable(struct tipc_sock *tsk, bool unreturnable) +{ +	msg_set_dest_droppable(&tsk->phdr, unreturnable ? 1 : 0); +} + +static int tsk_importance(struct tipc_sock *tsk) +{ +	return msg_importance(&tsk->phdr); +} + +static int tsk_set_importance(struct tipc_sock *tsk, int imp) +{ +	if (imp > TIPC_CRITICAL_IMPORTANCE) +		return -EINVAL; +	msg_set_importance(&tsk->phdr, (u32)imp); +	return 0; +} + +static struct tipc_sock *tipc_sk(const struct sock *sk) +{ +	return container_of(sk, struct tipc_sock, sk); +} + +static int tsk_conn_cong(struct tipc_sock *tsk) +{ +	return tsk->sent_unacked >= TIPC_FLOWCTRL_WIN; +}  /** - * advance_rx_queue - discard first buffer in socket receive queue + * tsk_advance_rx_queue - discard first buffer in socket receive queue   *   * Caller must hold socket lock   */ -static void advance_rx_queue(struct sock *sk) +static void tsk_advance_rx_queue(struct sock *sk)  {  	kfree_skb(__skb_dequeue(&sk->sk_receive_queue));  }  /** - * reject_rx_queue - reject all buffers in socket receive queue + * tsk_rej_rx_queue - reject all buffers in socket receive queue   *   * Caller must hold socket lock   */ -static void reject_rx_queue(struct sock *sk) +static void tsk_rej_rx_queue(struct sock *sk)  { -	struct sk_buff *buf; +	struct sk_buff *skb;  	u32 dnode; -	while ((buf = __skb_dequeue(&sk->sk_receive_queue))) { -		if (tipc_msg_reverse(buf, &dnode, TIPC_ERR_NO_PORT)) -			tipc_link_xmit(buf, dnode, 0); +	while ((skb = __skb_dequeue(&sk->sk_receive_queue))) { +		if (tipc_msg_reverse(skb, &dnode, TIPC_ERR_NO_PORT)) +			tipc_link_xmit_skb(skb, dnode, 0);  	}  } +/* tsk_peer_msg - verify if message was sent by connected port's peer + * + * Handles cases where the node's network address has changed from + * the default of <0.0.0> to its configured setting. + */ +static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) +{ +	u32 peer_port = tsk_peer_port(tsk); +	u32 orig_node; +	u32 peer_node; + +	if (unlikely(!tsk->connected)) +		return false; + +	if (unlikely(msg_origport(msg) != peer_port)) +		return false; + +	orig_node = msg_orignode(msg); +	peer_node = tsk_peer_node(tsk); + +	if (likely(orig_node == peer_node)) +		return true; + +	if (!orig_node && (peer_node == tipc_own_addr)) +		return true; + +	if (!peer_node && (orig_node == tipc_own_addr)) +		return true; + +	return false; +} +  /**   * tipc_sk_create - create a TIPC socket   * @net: network namespace (must be default network) @@ -153,7 +304,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,  	socket_state state;  	struct sock *sk;  	struct tipc_sock *tsk; -	struct tipc_port *port; +	struct tipc_msg *msg;  	u32 ref;  	/* Validate arguments */ @@ -188,20 +339,24 @@ static int tipc_sk_create(struct net *net, struct socket *sock,  		return -ENOMEM;  	tsk = tipc_sk(sk); -	port = &tsk->port; - -	ref = tipc_port_init(port, TIPC_LOW_IMPORTANCE); +	ref = tipc_sk_ref_acquire(tsk);  	if (!ref) { -		pr_warn("Socket registration failed, ref. table exhausted\n"); -		sk_free(sk); +		pr_warn("Socket create failed; reference table exhausted\n");  		return -ENOMEM;  	} +	tsk->max_pkt = MAX_PKT_DEFAULT; +	tsk->ref = ref; +	INIT_LIST_HEAD(&tsk->publications); +	msg = &tsk->phdr; +	tipc_msg_init(msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, +		      NAMED_H_SIZE, 0); +	msg_set_origport(msg, ref);  	/* Finish initializing socket data structures */  	sock->ops = ops;  	sock->state = state; -  	sock_init_data(sock, sk); +	k_init_timer(&tsk->timer, (Handler)tipc_sk_timeout, ref);  	sk->sk_backlog_rcv = tipc_backlog_rcv;  	sk->sk_rcvbuf = sysctl_tipc_rmem[1];  	sk->sk_data_ready = tipc_data_ready; @@ -209,12 +364,11 @@ static int tipc_sk_create(struct net *net, struct socket *sock,  	tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;  	tsk->sent_unacked = 0;  	atomic_set(&tsk->dupl_rcvcnt, 0); -	tipc_port_unlock(port);  	if (sock->state == SS_READY) { -		tipc_port_set_unreturnable(port, true); +		tsk_set_unreturnable(tsk, true);  		if (sock->type == SOCK_DGRAM) -			tipc_port_set_unreliable(port, true); +			tsk_set_unreliable(tsk, true);  	}  	return 0;  } @@ -308,8 +462,7 @@ static int tipc_release(struct socket *sock)  {  	struct sock *sk = sock->sk;  	struct tipc_sock *tsk; -	struct tipc_port *port; -	struct sk_buff *buf; +	struct sk_buff *skb;  	u32 dnode;  	/* @@ -320,34 +473,44 @@ static int tipc_release(struct socket *sock)  		return 0;  	tsk = tipc_sk(sk); -	port = &tsk->port;  	lock_sock(sk);  	/*  	 * Reject all unreceived messages, except on an active connection  	 * (which disconnects locally & sends a 'FIN+' to peer)  	 */ +	dnode = tsk_peer_node(tsk);  	while (sock->state != SS_DISCONNECTING) { -		buf = __skb_dequeue(&sk->sk_receive_queue); -		if (buf == NULL) +		skb = __skb_dequeue(&sk->sk_receive_queue); +		if (skb == NULL)  			break; -		if (TIPC_SKB_CB(buf)->handle != NULL) -			kfree_skb(buf); +		if (TIPC_SKB_CB(skb)->handle != NULL) +			kfree_skb(skb);  		else {  			if ((sock->state == SS_CONNECTING) ||  			    (sock->state == SS_CONNECTED)) {  				sock->state = SS_DISCONNECTING; -				tipc_port_disconnect(port->ref); +				tsk->connected = 0; +				tipc_node_remove_conn(dnode, tsk->ref);  			} -			if (tipc_msg_reverse(buf, &dnode, TIPC_ERR_NO_PORT)) -				tipc_link_xmit(buf, dnode, 0); +			if (tipc_msg_reverse(skb, &dnode, TIPC_ERR_NO_PORT)) +				tipc_link_xmit_skb(skb, dnode, 0);  		}  	} -	/* Destroy TIPC port; also disconnects an active connection and -	 * sends a 'FIN-' to peer. -	 */ -	tipc_port_destroy(port); +	tipc_sk_withdraw(tsk, 0, NULL); +	tipc_sk_ref_discard(tsk->ref); +	k_cancel_timer(&tsk->timer); +	if (tsk->connected) { +		skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, +				      SHORT_H_SIZE, 0, dnode, tipc_own_addr, +				      tsk_peer_port(tsk), +				      tsk->ref, TIPC_ERR_NO_PORT); +		if (skb) +			tipc_link_xmit_skb(skb, dnode, tsk->ref); +		tipc_node_remove_conn(dnode, tsk->ref); +	} +	k_term_timer(&tsk->timer);  	/* Discard any remaining (connection-based) messages in receive queue */  	__skb_queue_purge(&sk->sk_receive_queue); @@ -355,7 +518,6 @@ static int tipc_release(struct socket *sock)  	/* Reject any messages that accumulated in backlog queue */  	sock->state = SS_DISCONNECTING;  	release_sock(sk); -  	sock_put(sk);  	sock->sk = NULL; @@ -387,7 +549,7 @@ static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,  	lock_sock(sk);  	if (unlikely(!uaddr_len)) { -		res = tipc_withdraw(&tsk->port, 0, NULL); +		res = tipc_sk_withdraw(tsk, 0, NULL);  		goto exit;  	} @@ -415,8 +577,8 @@ static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,  	}  	res = (addr->scope > 0) ? -		tipc_publish(&tsk->port, addr->scope, &addr->addr.nameseq) : -		tipc_withdraw(&tsk->port, -addr->scope, &addr->addr.nameseq); +		tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) : +		tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq);  exit:  	release_sock(sk);  	return res; @@ -446,10 +608,10 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,  		if ((sock->state != SS_CONNECTED) &&  			((peer != 2) || (sock->state != SS_DISCONNECTING)))  			return -ENOTCONN; -		addr->addr.id.ref = tipc_port_peerport(&tsk->port); -		addr->addr.id.node = tipc_port_peernode(&tsk->port); +		addr->addr.id.ref = tsk_peer_port(tsk); +		addr->addr.id.node = tsk_peer_node(tsk);  	} else { -		addr->addr.id.ref = tsk->port.ref; +		addr->addr.id.ref = tsk->ref;  		addr->addr.id.node = tipc_own_addr;  	} @@ -518,7 +680,7 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,  		break;  	case SS_READY:  	case SS_CONNECTED: -		if (!tsk->link_cong && !tipc_sk_conn_cong(tsk)) +		if (!tsk->link_cong && !tsk_conn_cong(tsk))  			mask |= POLLOUT;  		/* fall thru' */  	case SS_CONNECTING: @@ -538,7 +700,7 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,   * tipc_sendmcast - send multicast message   * @sock: socket structure   * @seq: destination address - * @iov: message data to send + * @msg: message to send   * @dsz: total length of message data   * @timeo: timeout to wait for wakeup   * @@ -546,11 +708,11 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,   * Returns the number of bytes sent on success, or errno   */  static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq, -			  struct iovec *iov, size_t dsz, long timeo) +			  struct msghdr *msg, size_t dsz, long timeo)  {  	struct sock *sk = sock->sk; -	struct tipc_msg *mhdr = &tipc_sk(sk)->port.phdr; -	struct sk_buff *buf; +	struct tipc_msg *mhdr = &tipc_sk(sk)->phdr; +	struct sk_buff_head head;  	uint mtu;  	int rc; @@ -565,12 +727,13 @@ static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,  new_mtu:  	mtu = tipc_bclink_get_mtu(); -	rc = tipc_msg_build(mhdr, iov, 0, dsz, mtu, &buf); +	__skb_queue_head_init(&head); +	rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, &head);  	if (unlikely(rc < 0))  		return rc;  	do { -		rc = tipc_bclink_xmit(buf); +		rc = tipc_bclink_xmit(&head);  		if (likely(rc >= 0)) {  			rc = dsz;  			break; @@ -579,9 +742,10 @@ new_mtu:  			goto new_mtu;  		if (rc != -ELINKCONG)  			break; +		tipc_sk(sk)->link_cong = 1;  		rc = tipc_wait_for_sndmsg(sock, &timeo);  		if (rc) -			kfree_skb_list(buf); +			__skb_queue_purge(&head);  	} while (!rc);  	return rc;  } @@ -638,20 +802,19 @@ static int tipc_sk_proto_rcv(struct tipc_sock *tsk, u32 *dnode,  			     struct sk_buff *buf)  {  	struct tipc_msg *msg = buf_msg(buf); -	struct tipc_port *port = &tsk->port;  	int conn_cong;  	/* Ignore if connection cannot be validated: */ -	if (!port->connected || !tipc_port_peer_msg(port, msg)) +	if (!tsk_peer_msg(tsk, msg))  		goto exit; -	port->probing_state = TIPC_CONN_OK; +	tsk->probing_state = TIPC_CONN_OK;  	if (msg_type(msg) == CONN_ACK) { -		conn_cong = tipc_sk_conn_cong(tsk); +		conn_cong = tsk_conn_cong(tsk);  		tsk->sent_unacked -= msg_msgcnt(msg);  		if (conn_cong) -			tipc_sock_wakeup(tsk); +			tsk->sk.sk_write_space(&tsk->sk);  	} else if (msg_type(msg) == CONN_PROBE) {  		if (!tipc_msg_reverse(buf, dnode, TIPC_OK))  			return TIPC_OK; @@ -664,39 +827,6 @@ exit:  	return TIPC_OK;  } -/** - * dest_name_check - verify user is permitted to send to specified port name - * @dest: destination address - * @m: descriptor for message to be sent - * - * Prevents restricted configuration commands from being issued by - * unauthorized users. - * - * Returns 0 if permission is granted, otherwise errno - */ -static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m) -{ -	struct tipc_cfg_msg_hdr hdr; - -	if (unlikely(dest->addrtype == TIPC_ADDR_ID)) -		return 0; -	if (likely(dest->addr.name.name.type >= TIPC_RESERVED_TYPES)) -		return 0; -	if (likely(dest->addr.name.name.type == TIPC_TOP_SRV)) -		return 0; -	if (likely(dest->addr.name.name.type != TIPC_CFG_SRV)) -		return -EACCES; - -	if (!m->msg_iovlen || (m->msg_iov[0].iov_len < sizeof(hdr))) -		return -EMSGSIZE; -	if (copy_from_user(&hdr, m->msg_iov[0].iov_base, sizeof(hdr))) -		return -EFAULT; -	if ((ntohs(hdr.tcm_type) & 0xC000) && (!capable(CAP_NET_ADMIN))) -		return -EACCES; - -	return 0; -} -  static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p)  {  	struct sock *sk = sock->sk; @@ -742,15 +872,14 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,  	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);  	struct sock *sk = sock->sk;  	struct tipc_sock *tsk = tipc_sk(sk); -	struct tipc_port *port = &tsk->port; -	struct tipc_msg *mhdr = &port->phdr; -	struct iovec *iov = m->msg_iov; +	struct tipc_msg *mhdr = &tsk->phdr;  	u32 dnode, dport; -	struct sk_buff *buf; +	struct sk_buff_head head; +	struct sk_buff *skb;  	struct tipc_name_seq *seq = &dest->addr.nameseq;  	u32 mtu;  	long timeo; -	int rc = -EINVAL; +	int rc;  	if (unlikely(!dest))  		return -EDESTADDRREQ; @@ -774,23 +903,20 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,  			rc = -EISCONN;  			goto exit;  		} -		if (tsk->port.published) { +		if (tsk->published) {  			rc = -EOPNOTSUPP;  			goto exit;  		}  		if (dest->addrtype == TIPC_ADDR_NAME) { -			tsk->port.conn_type = dest->addr.name.name.type; -			tsk->port.conn_instance = dest->addr.name.name.instance; +			tsk->conn_type = dest->addr.name.name.type; +			tsk->conn_instance = dest->addr.name.name.instance;  		}  	} -	rc = dest_name_check(dest, m); -	if (rc) -		goto exit;  	timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);  	if (dest->addrtype == TIPC_ADDR_MCAST) { -		rc = tipc_sendmcast(sock, seq, iov, dsz, timeo); +		rc = tipc_sendmcast(sock, seq, m, dsz, timeo);  		goto exit;  	} else if (dest->addrtype == TIPC_ADDR_NAME) {  		u32 type = dest->addr.name.name.type; @@ -820,13 +946,16 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,  	}  new_mtu: -	mtu = tipc_node_get_mtu(dnode, tsk->port.ref); -	rc = tipc_msg_build(mhdr, iov, 0, dsz, mtu, &buf); +	mtu = tipc_node_get_mtu(dnode, tsk->ref); +	__skb_queue_head_init(&head); +	rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, &head);  	if (rc < 0)  		goto exit;  	do { -		rc = tipc_link_xmit(buf, dnode, tsk->port.ref); +		skb = skb_peek(&head); +		TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong; +		rc = tipc_link_xmit(&head, dnode, tsk->ref);  		if (likely(rc >= 0)) {  			if (sock->state != SS_READY)  				sock->state = SS_CONNECTING; @@ -835,13 +964,12 @@ new_mtu:  		}  		if (rc == -EMSGSIZE)  			goto new_mtu; -  		if (rc != -ELINKCONG)  			break; - +		tsk->link_cong = 1;  		rc = tipc_wait_for_sndmsg(sock, &timeo);  		if (rc) -			kfree_skb_list(buf); +			__skb_queue_purge(&head);  	} while (!rc);  exit:  	if (iocb) @@ -873,8 +1001,8 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p)  		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);  		done = sk_wait_event(sk, timeo_p,  				     (!tsk->link_cong && -				      !tipc_sk_conn_cong(tsk)) || -				     !tsk->port.connected); +				      !tsk_conn_cong(tsk)) || +				     !tsk->connected);  		finish_wait(sk_sleep(sk), &wait);  	} while (!done);  	return 0; @@ -897,11 +1025,10 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock,  {  	struct sock *sk = sock->sk;  	struct tipc_sock *tsk = tipc_sk(sk); -	struct tipc_port *port = &tsk->port; -	struct tipc_msg *mhdr = &port->phdr; -	struct sk_buff *buf; +	struct tipc_msg *mhdr = &tsk->phdr; +	struct sk_buff_head head;  	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); -	u32 ref = port->ref; +	u32 ref = tsk->ref;  	int rc = -EINVAL;  	long timeo;  	u32 dnode; @@ -929,17 +1056,18 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock,  	}  	timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); -	dnode = tipc_port_peernode(port); +	dnode = tsk_peer_node(tsk);  next: -	mtu = port->max_pkt; +	mtu = tsk->max_pkt;  	send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE); -	rc = tipc_msg_build(mhdr, m->msg_iov, sent, send, mtu, &buf); +	__skb_queue_head_init(&head); +	rc = tipc_msg_build(mhdr, m, sent, send, mtu, &head);  	if (unlikely(rc < 0))  		goto exit;  	do { -		if (likely(!tipc_sk_conn_cong(tsk))) { -			rc = tipc_link_xmit(buf, dnode, ref); +		if (likely(!tsk_conn_cong(tsk))) { +			rc = tipc_link_xmit(&head, dnode, ref);  			if (likely(!rc)) {  				tsk->sent_unacked++;  				sent += send; @@ -948,15 +1076,16 @@ next:  				goto next;  			}  			if (rc == -EMSGSIZE) { -				port->max_pkt = tipc_node_get_mtu(dnode, ref); +				tsk->max_pkt = tipc_node_get_mtu(dnode, ref);  				goto next;  			}  			if (rc != -ELINKCONG)  				break; +			tsk->link_cong = 1;  		}  		rc = tipc_wait_for_sndpkt(sock, &timeo);  		if (rc) -			kfree_skb_list(buf); +			__skb_queue_purge(&head);  	} while (!rc);  exit:  	if (iocb) @@ -984,29 +1113,25 @@ static int tipc_send_packet(struct kiocb *iocb, struct socket *sock,  	return tipc_send_stream(iocb, sock, m, dsz);  } -/** - * auto_connect - complete connection setup to a remote port - * @tsk: tipc socket structure - * @msg: peer's response message - * - * Returns 0 on success, errno otherwise +/* tipc_sk_finish_conn - complete the setup of a connection   */ -static int auto_connect(struct tipc_sock *tsk, struct tipc_msg *msg) +static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, +				u32 peer_node)  { -	struct tipc_port *port = &tsk->port; -	struct socket *sock = tsk->sk.sk_socket; -	struct tipc_portid peer; - -	peer.ref = msg_origport(msg); -	peer.node = msg_orignode(msg); - -	__tipc_port_connect(port->ref, port, &peer); - -	if (msg_importance(msg) > TIPC_CRITICAL_IMPORTANCE) -		return -EINVAL; -	msg_set_importance(&port->phdr, (u32)msg_importance(msg)); -	sock->state = SS_CONNECTED; -	return 0; +	struct tipc_msg *msg = &tsk->phdr; + +	msg_set_destnode(msg, peer_node); +	msg_set_destport(msg, peer_port); +	msg_set_type(msg, TIPC_CONN_MSG); +	msg_set_lookup_scope(msg, 0); +	msg_set_hdr_sz(msg, SHORT_H_SIZE); + +	tsk->probing_interval = CONN_PROBING_INTERVAL; +	tsk->probing_state = TIPC_CONN_OK; +	tsk->connected = 1; +	k_start_timer(&tsk->timer, tsk->probing_interval); +	tipc_node_add_conn(peer_node, tsk->ref, peer_port); +	tsk->max_pkt = tipc_node_get_mtu(peer_node, tsk->ref);  }  /** @@ -1033,17 +1158,17 @@ static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg)  }  /** - * anc_data_recv - optionally capture ancillary data for received message + * tipc_sk_anc_data_recv - optionally capture ancillary data for received message   * @m: descriptor for message info   * @msg: received message header - * @tport: TIPC port associated with message + * @tsk: TIPC port associated with message   *   * Note: Ancillary data is not captured if not requested by receiver.   *   * Returns 0 if successful, otherwise errno   */ -static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg, -			 struct tipc_port *tport) +static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg, +				 struct tipc_sock *tsk)  {  	u32 anc_data[3];  	u32 err; @@ -1086,10 +1211,10 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg,  		anc_data[2] = msg_nameupper(msg);  		break;  	case TIPC_CONN_MSG: -		has_name = (tport->conn_type != 0); -		anc_data[0] = tport->conn_type; -		anc_data[1] = tport->conn_instance; -		anc_data[2] = tport->conn_instance; +		has_name = (tsk->conn_type != 0); +		anc_data[0] = tsk->conn_type; +		anc_data[1] = tsk->conn_instance; +		anc_data[2] = tsk->conn_instance;  		break;  	default:  		has_name = 0; @@ -1103,6 +1228,24 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg,  	return 0;  } +static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack) +{ +	struct sk_buff *skb = NULL; +	struct tipc_msg *msg; +	u32 peer_port = tsk_peer_port(tsk); +	u32 dnode = tsk_peer_node(tsk); + +	if (!tsk->connected) +		return; +	skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, dnode, +			      tipc_own_addr, peer_port, tsk->ref, TIPC_OK); +	if (!skb) +		return; +	msg = buf_msg(skb); +	msg_set_msgcnt(msg, ack); +	tipc_link_xmit_skb(skb, dnode, msg_link_selector(msg)); +} +  static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)  {  	struct sock *sk = sock->sk; @@ -1153,7 +1296,6 @@ static int tipc_recvmsg(struct kiocb *iocb, struct socket *sock,  {  	struct sock *sk = sock->sk;  	struct tipc_sock *tsk = tipc_sk(sk); -	struct tipc_port *port = &tsk->port;  	struct sk_buff *buf;  	struct tipc_msg *msg;  	long timeo; @@ -1188,7 +1330,7 @@ restart:  	/* Discard an empty non-errored message & try again */  	if ((!sz) && (!err)) { -		advance_rx_queue(sk); +		tsk_advance_rx_queue(sk);  		goto restart;  	} @@ -1196,7 +1338,7 @@ restart:  	set_orig_addr(m, msg);  	/* Capture ancillary data (optional) */ -	res = anc_data_recv(m, msg, port); +	res = tipc_sk_anc_data_recv(m, msg, tsk);  	if (res)  		goto exit; @@ -1206,8 +1348,7 @@ restart:  			sz = buf_len;  			m->msg_flags |= MSG_TRUNC;  		} -		res = skb_copy_datagram_iovec(buf, msg_hdr_sz(msg), -					      m->msg_iov, sz); +		res = skb_copy_datagram_msg(buf, msg_hdr_sz(msg), m, sz);  		if (res)  			goto exit;  		res = sz; @@ -1223,10 +1364,10 @@ restart:  	if (likely(!(flags & MSG_PEEK))) {  		if ((sock->state != SS_READY) &&  		    (++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) { -			tipc_acknowledge(port->ref, tsk->rcv_unacked); +			tipc_sk_send_ack(tsk, tsk->rcv_unacked);  			tsk->rcv_unacked = 0;  		} -		advance_rx_queue(sk); +		tsk_advance_rx_queue(sk);  	}  exit:  	release_sock(sk); @@ -1250,7 +1391,6 @@ static int tipc_recv_stream(struct kiocb *iocb, struct socket *sock,  {  	struct sock *sk = sock->sk;  	struct tipc_sock *tsk = tipc_sk(sk); -	struct tipc_port *port = &tsk->port;  	struct sk_buff *buf;  	struct tipc_msg *msg;  	long timeo; @@ -1288,14 +1428,14 @@ restart:  	/* Discard an empty non-errored message & try again */  	if ((!sz) && (!err)) { -		advance_rx_queue(sk); +		tsk_advance_rx_queue(sk);  		goto restart;  	}  	/* Optionally capture sender's address & ancillary data of first msg */  	if (sz_copied == 0) {  		set_orig_addr(m, msg); -		res = anc_data_recv(m, msg, port); +		res = tipc_sk_anc_data_recv(m, msg, tsk);  		if (res)  			goto exit;  	} @@ -1308,8 +1448,8 @@ restart:  		needed = (buf_len - sz_copied);  		sz_to_copy = (sz <= needed) ? sz : needed; -		res = skb_copy_datagram_iovec(buf, msg_hdr_sz(msg) + offset, -					      m->msg_iov, sz_to_copy); +		res = skb_copy_datagram_msg(buf, msg_hdr_sz(msg) + offset, +					    m, sz_to_copy);  		if (res)  			goto exit; @@ -1334,10 +1474,10 @@ restart:  	/* Consume received message (optional) */  	if (likely(!(flags & MSG_PEEK))) {  		if (unlikely(++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) { -			tipc_acknowledge(port->ref, tsk->rcv_unacked); +			tipc_sk_send_ack(tsk, tsk->rcv_unacked);  			tsk->rcv_unacked = 0;  		} -		advance_rx_queue(sk); +		tsk_advance_rx_queue(sk);  	}  	/* Loop around if more data is required */ @@ -1391,17 +1531,14 @@ static void tipc_data_ready(struct sock *sk)   * @tsk: TIPC socket   * @msg: message   * - * Returns 0 (TIPC_OK) if everyting ok, -TIPC_ERR_NO_PORT otherwise + * Returns 0 (TIPC_OK) if everything ok, -TIPC_ERR_NO_PORT otherwise   */  static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf)  {  	struct sock *sk = &tsk->sk; -	struct tipc_port *port = &tsk->port;  	struct socket *sock = sk->sk_socket;  	struct tipc_msg *msg = buf_msg(*buf); -  	int retval = -TIPC_ERR_NO_PORT; -	int res;  	if (msg_mcast(msg))  		return retval; @@ -1409,16 +1546,23 @@ static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf)  	switch ((int)sock->state) {  	case SS_CONNECTED:  		/* Accept only connection-based messages sent by peer */ -		if (msg_connected(msg) && tipc_port_peer_msg(port, msg)) { +		if (tsk_peer_msg(tsk, msg)) {  			if (unlikely(msg_errcode(msg))) {  				sock->state = SS_DISCONNECTING; -				__tipc_port_disconnect(port); +				tsk->connected = 0; +				/* let timer expire on it's own */ +				tipc_node_remove_conn(tsk_peer_node(tsk), +						      tsk->ref);  			}  			retval = TIPC_OK;  		}  		break;  	case SS_CONNECTING:  		/* Accept only ACK or NACK message */ + +		if (unlikely(!msg_connected(msg))) +			break; +  		if (unlikely(msg_errcode(msg))) {  			sock->state = SS_DISCONNECTING;  			sk->sk_err = ECONNREFUSED; @@ -1426,17 +1570,17 @@ static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf)  			break;  		} -		if (unlikely(!msg_connected(msg))) -			break; - -		res = auto_connect(tsk, msg); -		if (res) { +		if (unlikely(msg_importance(msg) > TIPC_CRITICAL_IMPORTANCE)) {  			sock->state = SS_DISCONNECTING; -			sk->sk_err = -res; +			sk->sk_err = EINVAL;  			retval = TIPC_OK;  			break;  		} +		tipc_sk_finish_conn(tsk, msg_origport(msg), msg_orignode(msg)); +		msg_set_importance(&tsk->phdr, msg_importance(msg)); +		sock->state = SS_CONNECTED; +  		/* If an incoming message is an 'ACK-', it should be  		 * discarded here because it doesn't contain useful  		 * data. In addition, we should try to wake up @@ -1518,6 +1662,13 @@ static int filter_rcv(struct sock *sk, struct sk_buff *buf)  	if (unlikely(msg_user(msg) == CONN_MANAGER))  		return tipc_sk_proto_rcv(tsk, &onode, buf); +	if (unlikely(msg_user(msg) == SOCK_WAKEUP)) { +		kfree_skb(buf); +		tsk->link_cong = 0; +		sk->sk_write_space(sk); +		return TIPC_OK; +	} +  	/* Reject message if it is wrong sort of message for socket */  	if (msg_type(msg) > TIPC_DIRECT_MSG)  		return -TIPC_ERR_NO_PORT; @@ -1547,20 +1698,20 @@ static int filter_rcv(struct sock *sk, struct sk_buff *buf)  /**   * tipc_backlog_rcv - handle incoming message from backlog queue   * @sk: socket - * @buf: message + * @skb: message   *   * Caller must hold socket lock, but not port lock.   *   * Returns 0   */ -static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *buf) +static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)  {  	int rc;  	u32 onode;  	struct tipc_sock *tsk = tipc_sk(sk); -	uint truesize = buf->truesize; +	uint truesize = skb->truesize; -	rc = filter_rcv(sk, buf); +	rc = filter_rcv(sk, skb);  	if (likely(!rc)) {  		if (atomic_read(&tsk->dupl_rcvcnt) < TIPC_CONN_OVERLOAD_LIMIT) @@ -1568,62 +1719,58 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *buf)  		return 0;  	} -	if ((rc < 0) && !tipc_msg_reverse(buf, &onode, -rc)) +	if ((rc < 0) && !tipc_msg_reverse(skb, &onode, -rc))  		return 0; -	tipc_link_xmit(buf, onode, 0); +	tipc_link_xmit_skb(skb, onode, 0);  	return 0;  }  /**   * tipc_sk_rcv - handle incoming message - * @buf: buffer containing arriving message + * @skb: buffer containing arriving message   * Consumes buffer   * Returns 0 if success, or errno: -EHOSTUNREACH   */ -int tipc_sk_rcv(struct sk_buff *buf) +int tipc_sk_rcv(struct sk_buff *skb)  {  	struct tipc_sock *tsk; -	struct tipc_port *port;  	struct sock *sk; -	u32 dport = msg_destport(buf_msg(buf)); +	u32 dport = msg_destport(buf_msg(skb));  	int rc = TIPC_OK;  	uint limit;  	u32 dnode;  	/* Validate destination and message */ -	port = tipc_port_lock(dport); -	if (unlikely(!port)) { -		rc = tipc_msg_eval(buf, &dnode); +	tsk = tipc_sk_get(dport); +	if (unlikely(!tsk)) { +		rc = tipc_msg_eval(skb, &dnode);  		goto exit;  	} - -	tsk = tipc_port_to_sock(port);  	sk = &tsk->sk;  	/* Queue message */ -	bh_lock_sock(sk); +	spin_lock_bh(&sk->sk_lock.slock);  	if (!sock_owned_by_user(sk)) { -		rc = filter_rcv(sk, buf); +		rc = filter_rcv(sk, skb);  	} else {  		if (sk->sk_backlog.len == 0)  			atomic_set(&tsk->dupl_rcvcnt, 0); -		limit = rcvbuf_limit(sk, buf) + atomic_read(&tsk->dupl_rcvcnt); -		if (sk_add_backlog(sk, buf, limit)) +		limit = rcvbuf_limit(sk, skb) + atomic_read(&tsk->dupl_rcvcnt); +		if (sk_add_backlog(sk, skb, limit))  			rc = -TIPC_ERR_OVERLOAD;  	} -	bh_unlock_sock(sk); -	tipc_port_unlock(port); - +	spin_unlock_bh(&sk->sk_lock.slock); +	tipc_sk_put(tsk);  	if (likely(!rc))  		return 0;  exit: -	if ((rc < 0) && !tipc_msg_reverse(buf, &dnode, -rc)) +	if ((rc < 0) && !tipc_msg_reverse(skb, &dnode, -rc))  		return -EHOSTUNREACH; -	tipc_link_xmit(buf, dnode, 0); +	tipc_link_xmit_skb(skb, dnode, 0);  	return (rc < 0) ? -EHOSTUNREACH : 0;  } @@ -1803,10 +1950,8 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)  {  	struct sock *new_sk, *sk = sock->sk;  	struct sk_buff *buf; -	struct tipc_port *new_port; +	struct tipc_sock *new_tsock;  	struct tipc_msg *msg; -	struct tipc_portid peer; -	u32 new_ref;  	long timeo;  	int res; @@ -1828,8 +1973,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)  		goto exit;  	new_sk = new_sock->sk; -	new_port = &tipc_sk(new_sk)->port; -	new_ref = new_port->ref; +	new_tsock = tipc_sk(new_sk);  	msg = buf_msg(buf);  	/* we lock on new_sk; but lockdep sees the lock on sk */ @@ -1839,18 +1983,16 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)  	 * Reject any stray messages received by new socket  	 * before the socket lock was taken (very, very unlikely)  	 */ -	reject_rx_queue(new_sk); +	tsk_rej_rx_queue(new_sk);  	/* Connect new socket to it's peer */ -	peer.ref = msg_origport(msg); -	peer.node = msg_orignode(msg); -	tipc_port_connect(new_ref, &peer); +	tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg));  	new_sock->state = SS_CONNECTED; -	tipc_port_set_importance(new_port, msg_importance(msg)); +	tsk_set_importance(new_tsock, msg_importance(msg));  	if (msg_named(msg)) { -		new_port->conn_type = msg_nametype(msg); -		new_port->conn_instance = msg_nameinst(msg); +		new_tsock->conn_type = msg_nametype(msg); +		new_tsock->conn_instance = msg_nameinst(msg);  	}  	/* @@ -1860,7 +2002,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)  	if (!msg_data_sz(msg)) {  		struct msghdr m = {NULL,}; -		advance_rx_queue(sk); +		tsk_advance_rx_queue(sk);  		tipc_send_packet(NULL, new_sock, &m, 0);  	} else {  		__skb_dequeue(&sk->sk_receive_queue); @@ -1886,9 +2028,8 @@ static int tipc_shutdown(struct socket *sock, int how)  {  	struct sock *sk = sock->sk;  	struct tipc_sock *tsk = tipc_sk(sk); -	struct tipc_port *port = &tsk->port; -	struct sk_buff *buf; -	u32 peer; +	struct sk_buff *skb; +	u32 dnode;  	int res;  	if (how != SHUT_RDWR) @@ -1902,21 +2043,27 @@ static int tipc_shutdown(struct socket *sock, int how)  restart:  		/* Disconnect and send a 'FIN+' or 'FIN-' message to peer */ -		buf = __skb_dequeue(&sk->sk_receive_queue); -		if (buf) { -			if (TIPC_SKB_CB(buf)->handle != NULL) { -				kfree_skb(buf); +		skb = __skb_dequeue(&sk->sk_receive_queue); +		if (skb) { +			if (TIPC_SKB_CB(skb)->handle != NULL) { +				kfree_skb(skb);  				goto restart;  			} -			tipc_port_disconnect(port->ref); -			if (tipc_msg_reverse(buf, &peer, TIPC_CONN_SHUTDOWN)) -				tipc_link_xmit(buf, peer, 0); +			if (tipc_msg_reverse(skb, &dnode, TIPC_CONN_SHUTDOWN)) +				tipc_link_xmit_skb(skb, dnode, tsk->ref); +			tipc_node_remove_conn(dnode, tsk->ref);  		} else { -			tipc_port_shutdown(port->ref); +			dnode = tsk_peer_node(tsk); +			skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, +					      TIPC_CONN_MSG, SHORT_H_SIZE, +					      0, dnode, tipc_own_addr, +					      tsk_peer_port(tsk), +					      tsk->ref, TIPC_CONN_SHUTDOWN); +			tipc_link_xmit_skb(skb, dnode, tsk->ref);  		} - +		tsk->connected = 0;  		sock->state = SS_DISCONNECTING; - +		tipc_node_remove_conn(dnode, tsk->ref);  		/* fall through */  	case SS_DISCONNECTING: @@ -1937,6 +2084,432 @@ restart:  	return res;  } +static void tipc_sk_timeout(unsigned long ref) +{ +	struct tipc_sock *tsk; +	struct sock *sk; +	struct sk_buff *skb = NULL; +	u32 peer_port, peer_node; + +	tsk = tipc_sk_get(ref); +	if (!tsk) +		return; + +	sk = &tsk->sk; +	bh_lock_sock(sk); +	if (!tsk->connected) { +		bh_unlock_sock(sk); +		goto exit; +	} +	peer_port = tsk_peer_port(tsk); +	peer_node = tsk_peer_node(tsk); + +	if (tsk->probing_state == TIPC_CONN_PROBING) { +		/* Previous probe not answered -> self abort */ +		skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, +				      SHORT_H_SIZE, 0, tipc_own_addr, +				      peer_node, ref, peer_port, +				      TIPC_ERR_NO_PORT); +	} else { +		skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, +				      0, peer_node, tipc_own_addr, +				      peer_port, ref, TIPC_OK); +		tsk->probing_state = TIPC_CONN_PROBING; +		k_start_timer(&tsk->timer, tsk->probing_interval); +	} +	bh_unlock_sock(sk); +	if (skb) +		tipc_link_xmit_skb(skb, peer_node, ref); +exit: +	tipc_sk_put(tsk); +} + +static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, +			   struct tipc_name_seq const *seq) +{ +	struct publication *publ; +	u32 key; + +	if (tsk->connected) +		return -EINVAL; +	key = tsk->ref + tsk->pub_count + 1; +	if (key == tsk->ref) +		return -EADDRINUSE; + +	publ = tipc_nametbl_publish(seq->type, seq->lower, seq->upper, +				    scope, tsk->ref, key); +	if (unlikely(!publ)) +		return -EINVAL; + +	list_add(&publ->pport_list, &tsk->publications); +	tsk->pub_count++; +	tsk->published = 1; +	return 0; +} + +static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, +			    struct tipc_name_seq const *seq) +{ +	struct publication *publ; +	struct publication *safe; +	int rc = -EINVAL; + +	list_for_each_entry_safe(publ, safe, &tsk->publications, pport_list) { +		if (seq) { +			if (publ->scope != scope) +				continue; +			if (publ->type != seq->type) +				continue; +			if (publ->lower != seq->lower) +				continue; +			if (publ->upper != seq->upper) +				break; +			tipc_nametbl_withdraw(publ->type, publ->lower, +					      publ->ref, publ->key); +			rc = 0; +			break; +		} +		tipc_nametbl_withdraw(publ->type, publ->lower, +				      publ->ref, publ->key); +		rc = 0; +	} +	if (list_empty(&tsk->publications)) +		tsk->published = 0; +	return rc; +} + +static int tipc_sk_show(struct tipc_sock *tsk, char *buf, +			int len, int full_id) +{ +	struct publication *publ; +	int ret; + +	if (full_id) +		ret = tipc_snprintf(buf, len, "<%u.%u.%u:%u>:", +				    tipc_zone(tipc_own_addr), +				    tipc_cluster(tipc_own_addr), +				    tipc_node(tipc_own_addr), tsk->ref); +	else +		ret = tipc_snprintf(buf, len, "%-10u:", tsk->ref); + +	if (tsk->connected) { +		u32 dport = tsk_peer_port(tsk); +		u32 destnode = tsk_peer_node(tsk); + +		ret += tipc_snprintf(buf + ret, len - ret, +				     " connected to <%u.%u.%u:%u>", +				     tipc_zone(destnode), +				     tipc_cluster(destnode), +				     tipc_node(destnode), dport); +		if (tsk->conn_type != 0) +			ret += tipc_snprintf(buf + ret, len - ret, +					     " via {%u,%u}", tsk->conn_type, +					     tsk->conn_instance); +	} else if (tsk->published) { +		ret += tipc_snprintf(buf + ret, len - ret, " bound to"); +		list_for_each_entry(publ, &tsk->publications, pport_list) { +			if (publ->lower == publ->upper) +				ret += tipc_snprintf(buf + ret, len - ret, +						     " {%u,%u}", publ->type, +						     publ->lower); +			else +				ret += tipc_snprintf(buf + ret, len - ret, +						     " {%u,%u,%u}", publ->type, +						     publ->lower, publ->upper); +		} +	} +	ret += tipc_snprintf(buf + ret, len - ret, "\n"); +	return ret; +} + +struct sk_buff *tipc_sk_socks_show(void) +{ +	struct sk_buff *buf; +	struct tlv_desc *rep_tlv; +	char *pb; +	int pb_len; +	struct tipc_sock *tsk; +	int str_len = 0; +	u32 ref = 0; + +	buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN)); +	if (!buf) +		return NULL; +	rep_tlv = (struct tlv_desc *)buf->data; +	pb = TLV_DATA(rep_tlv); +	pb_len = ULTRA_STRING_MAX_LEN; + +	tsk = tipc_sk_get_next(&ref); +	for (; tsk; tsk = tipc_sk_get_next(&ref)) { +		lock_sock(&tsk->sk); +		str_len += tipc_sk_show(tsk, pb + str_len, +					pb_len - str_len, 0); +		release_sock(&tsk->sk); +		tipc_sk_put(tsk); +	} +	str_len += 1;	/* for "\0" */ +	skb_put(buf, TLV_SPACE(str_len)); +	TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len); + +	return buf; +} + +/* tipc_sk_reinit: set non-zero address in all existing sockets + *                 when we go from standalone to network mode. + */ +void tipc_sk_reinit(void) +{ +	struct tipc_msg *msg; +	u32 ref = 0; +	struct tipc_sock *tsk = tipc_sk_get_next(&ref); + +	for (; tsk; tsk = tipc_sk_get_next(&ref)) { +		lock_sock(&tsk->sk); +		msg = &tsk->phdr; +		msg_set_prevnode(msg, tipc_own_addr); +		msg_set_orignode(msg, tipc_own_addr); +		release_sock(&tsk->sk); +		tipc_sk_put(tsk); +	} +} + +/** + * struct reference - TIPC socket reference entry + * @tsk: pointer to socket associated with reference entry + * @ref: reference value for socket (combines instance & array index info) + */ +struct reference { +	struct tipc_sock *tsk; +	u32 ref; +}; + +/** + * struct tipc_ref_table - table of TIPC socket reference entries + * @entries: pointer to array of reference entries + * @capacity: array index of first unusable entry + * @init_point: array index of first uninitialized entry + * @first_free: array index of first unused socket reference entry + * @last_free: array index of last unused socket reference entry + * @index_mask: bitmask for array index portion of reference values + * @start_mask: initial value for instance value portion of reference values + */ +struct ref_table { +	struct reference *entries; +	u32 capacity; +	u32 init_point; +	u32 first_free; +	u32 last_free; +	u32 index_mask; +	u32 start_mask; +}; + +/* Socket reference table consists of 2**N entries. + * + * State	Socket ptr	Reference + * -----        ----------      --------- + * In use        non-NULL       XXXX|own index + *				(XXXX changes each time entry is acquired) + * Free            NULL         YYYY|next free index + *				(YYYY is one more than last used XXXX) + * Uninitialized   NULL         0 + * + * Entry 0 is not used; this allows index 0 to denote the end of the free list. + * + * Note that a reference value of 0 does not necessarily indicate that an + * entry is uninitialized, since the last entry in the free list could also + * have a reference value of 0 (although this is unlikely). + */ + +static struct ref_table tipc_ref_table; + +static DEFINE_RWLOCK(ref_table_lock); + +/** + * tipc_ref_table_init - create reference table for sockets + */ +int tipc_sk_ref_table_init(u32 req_sz, u32 start) +{ +	struct reference *table; +	u32 actual_sz; + +	/* account for unused entry, then round up size to a power of 2 */ + +	req_sz++; +	for (actual_sz = 16; actual_sz < req_sz; actual_sz <<= 1) { +		/* do nothing */ +	}; + +	/* allocate table & mark all entries as uninitialized */ +	table = vzalloc(actual_sz * sizeof(struct reference)); +	if (table == NULL) +		return -ENOMEM; + +	tipc_ref_table.entries = table; +	tipc_ref_table.capacity = req_sz; +	tipc_ref_table.init_point = 1; +	tipc_ref_table.first_free = 0; +	tipc_ref_table.last_free = 0; +	tipc_ref_table.index_mask = actual_sz - 1; +	tipc_ref_table.start_mask = start & ~tipc_ref_table.index_mask; + +	return 0; +} + +/** + * tipc_ref_table_stop - destroy reference table for sockets + */ +void tipc_sk_ref_table_stop(void) +{ +	if (!tipc_ref_table.entries) +		return; +	vfree(tipc_ref_table.entries); +	tipc_ref_table.entries = NULL; +} + +/* tipc_ref_acquire - create reference to a socket + * + * Register an socket pointer in the reference table. + * Returns a unique reference value that is used from then on to retrieve the + * socket pointer, or to determine if the socket has been deregistered. + */ +u32 tipc_sk_ref_acquire(struct tipc_sock *tsk) +{ +	u32 index; +	u32 index_mask; +	u32 next_plus_upper; +	u32 ref = 0; +	struct reference *entry; + +	if (unlikely(!tsk)) { +		pr_err("Attempt to acquire ref. to non-existent obj\n"); +		return 0; +	} +	if (unlikely(!tipc_ref_table.entries)) { +		pr_err("Ref. table not found in acquisition attempt\n"); +		return 0; +	} + +	/* Take a free entry, if available; otherwise initialize a new one */ +	write_lock_bh(&ref_table_lock); +	index = tipc_ref_table.first_free; +	entry = &tipc_ref_table.entries[index]; + +	if (likely(index)) { +		index = tipc_ref_table.first_free; +		entry = &tipc_ref_table.entries[index]; +		index_mask = tipc_ref_table.index_mask; +		next_plus_upper = entry->ref; +		tipc_ref_table.first_free = next_plus_upper & index_mask; +		ref = (next_plus_upper & ~index_mask) + index; +		entry->tsk = tsk; +	} else if (tipc_ref_table.init_point < tipc_ref_table.capacity) { +		index = tipc_ref_table.init_point++; +		entry = &tipc_ref_table.entries[index]; +		ref = tipc_ref_table.start_mask + index; +	} + +	if (ref) { +		entry->ref = ref; +		entry->tsk = tsk; +	} +	write_unlock_bh(&ref_table_lock); +	return ref; +} + +/* tipc_sk_ref_discard - invalidate reference to an socket + * + * Disallow future references to an socket and free up the entry for re-use. + */ +void tipc_sk_ref_discard(u32 ref) +{ +	struct reference *entry; +	u32 index; +	u32 index_mask; + +	if (unlikely(!tipc_ref_table.entries)) { +		pr_err("Ref. table not found during discard attempt\n"); +		return; +	} + +	index_mask = tipc_ref_table.index_mask; +	index = ref & index_mask; +	entry = &tipc_ref_table.entries[index]; + +	write_lock_bh(&ref_table_lock); + +	if (unlikely(!entry->tsk)) { +		pr_err("Attempt to discard ref. to non-existent socket\n"); +		goto exit; +	} +	if (unlikely(entry->ref != ref)) { +		pr_err("Attempt to discard non-existent reference\n"); +		goto exit; +	} + +	/* Mark entry as unused; increment instance part of entry's +	 *   reference to invalidate any subsequent references +	 */ + +	entry->tsk = NULL; +	entry->ref = (ref & ~index_mask) + (index_mask + 1); + +	/* Append entry to free entry list */ +	if (unlikely(tipc_ref_table.first_free == 0)) +		tipc_ref_table.first_free = index; +	else +		tipc_ref_table.entries[tipc_ref_table.last_free].ref |= index; +	tipc_ref_table.last_free = index; +exit: +	write_unlock_bh(&ref_table_lock); +} + +/* tipc_sk_get - find referenced socket and return pointer to it + */ +struct tipc_sock *tipc_sk_get(u32 ref) +{ +	struct reference *entry; +	struct tipc_sock *tsk; + +	if (unlikely(!tipc_ref_table.entries)) +		return NULL; +	read_lock_bh(&ref_table_lock); +	entry = &tipc_ref_table.entries[ref & tipc_ref_table.index_mask]; +	tsk = entry->tsk; +	if (likely(tsk && (entry->ref == ref))) +		sock_hold(&tsk->sk); +	else +		tsk = NULL; +	read_unlock_bh(&ref_table_lock); +	return tsk; +} + +/* tipc_sk_get_next - lock & return next socket after referenced one +*/ +struct tipc_sock *tipc_sk_get_next(u32 *ref) +{ +	struct reference *entry; +	struct tipc_sock *tsk = NULL; +	uint index = *ref & tipc_ref_table.index_mask; + +	read_lock_bh(&ref_table_lock); +	while (++index < tipc_ref_table.capacity) { +		entry = &tipc_ref_table.entries[index]; +		if (!entry->tsk) +			continue; +		tsk = entry->tsk; +		sock_hold(&tsk->sk); +		*ref = entry->ref; +		break; +	} +	read_unlock_bh(&ref_table_lock); +	return tsk; +} + +static void tipc_sk_put(struct tipc_sock *tsk) +{ +	sock_put(&tsk->sk); +} +  /**   * tipc_setsockopt - set socket option   * @sock: socket structure @@ -1955,7 +2528,6 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,  {  	struct sock *sk = sock->sk;  	struct tipc_sock *tsk = tipc_sk(sk); -	struct tipc_port *port = &tsk->port;  	u32 value;  	int res; @@ -1973,16 +2545,16 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,  	switch (opt) {  	case TIPC_IMPORTANCE: -		res = tipc_port_set_importance(port, value); +		res = tsk_set_importance(tsk, value);  		break;  	case TIPC_SRC_DROPPABLE:  		if (sock->type != SOCK_STREAM) -			tipc_port_set_unreliable(port, value); +			tsk_set_unreliable(tsk, value);  		else  			res = -ENOPROTOOPT;  		break;  	case TIPC_DEST_DROPPABLE: -		tipc_port_set_unreturnable(port, value); +		tsk_set_unreturnable(tsk, value);  		break;  	case TIPC_CONN_TIMEOUT:  		tipc_sk(sk)->conn_timeout = value; @@ -2015,7 +2587,6 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt,  {  	struct sock *sk = sock->sk;  	struct tipc_sock *tsk = tipc_sk(sk); -	struct tipc_port *port = &tsk->port;  	int len;  	u32 value;  	int res; @@ -2032,16 +2603,16 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt,  	switch (opt) {  	case TIPC_IMPORTANCE: -		value = tipc_port_importance(port); +		value = tsk_importance(tsk);  		break;  	case TIPC_SRC_DROPPABLE: -		value = tipc_port_unreliable(port); +		value = tsk_unreliable(tsk);  		break;  	case TIPC_DEST_DROPPABLE: -		value = tipc_port_unreturnable(port); +		value = tsk_unreturnable(tsk);  		break;  	case TIPC_CONN_TIMEOUT: -		value = tipc_sk(sk)->conn_timeout; +		value = tsk->conn_timeout;  		/* no need to set "res", since already 0 at this point */  		break;  	case TIPC_NODE_RECVQ_DEPTH: @@ -2077,7 +2648,7 @@ static int tipc_ioctl(struct socket *sk, unsigned int cmd, unsigned long arg)  	case SIOCGETLINKNAME:  		if (copy_from_user(&lnr, argp, sizeof(lnr)))  			return -EFAULT; -		if (!tipc_node_get_linkname(lnr.bearer_id, lnr.peer, +		if (!tipc_node_get_linkname(lnr.bearer_id & 0xffff, lnr.peer,  					    lnr.linkname, TIPC_MAX_LINK_NAME)) {  			if (copy_to_user(argp, &lnr, sizeof(lnr)))  				return -EFAULT; @@ -2206,3 +2777,233 @@ void tipc_socket_stop(void)  	sock_unregister(tipc_family_ops.family);  	proto_unregister(&tipc_proto);  } + +/* Caller should hold socket lock for the passed tipc socket. */ +static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk) +{ +	u32 peer_node; +	u32 peer_port; +	struct nlattr *nest; + +	peer_node = tsk_peer_node(tsk); +	peer_port = tsk_peer_port(tsk); + +	nest = nla_nest_start(skb, TIPC_NLA_SOCK_CON); + +	if (nla_put_u32(skb, TIPC_NLA_CON_NODE, peer_node)) +		goto msg_full; +	if (nla_put_u32(skb, TIPC_NLA_CON_SOCK, peer_port)) +		goto msg_full; + +	if (tsk->conn_type != 0) { +		if (nla_put_flag(skb, TIPC_NLA_CON_FLAG)) +			goto msg_full; +		if (nla_put_u32(skb, TIPC_NLA_CON_TYPE, tsk->conn_type)) +			goto msg_full; +		if (nla_put_u32(skb, TIPC_NLA_CON_INST, tsk->conn_instance)) +			goto msg_full; +	} +	nla_nest_end(skb, nest); + +	return 0; + +msg_full: +	nla_nest_cancel(skb, nest); + +	return -EMSGSIZE; +} + +/* Caller should hold socket lock for the passed tipc socket. */ +static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb, +			    struct tipc_sock *tsk) +{ +	int err; +	void *hdr; +	struct nlattr *attrs; + +	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, +			  &tipc_genl_v2_family, NLM_F_MULTI, TIPC_NL_SOCK_GET); +	if (!hdr) +		goto msg_cancel; + +	attrs = nla_nest_start(skb, TIPC_NLA_SOCK); +	if (!attrs) +		goto genlmsg_cancel; +	if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->ref)) +		goto attr_msg_cancel; +	if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tipc_own_addr)) +		goto attr_msg_cancel; + +	if (tsk->connected) { +		err = __tipc_nl_add_sk_con(skb, tsk); +		if (err) +			goto attr_msg_cancel; +	} else if (!list_empty(&tsk->publications)) { +		if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL)) +			goto attr_msg_cancel; +	} +	nla_nest_end(skb, attrs); +	genlmsg_end(skb, hdr); + +	return 0; + +attr_msg_cancel: +	nla_nest_cancel(skb, attrs); +genlmsg_cancel: +	genlmsg_cancel(skb, hdr); +msg_cancel: +	return -EMSGSIZE; +} + +int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ +	int err; +	struct tipc_sock *tsk; +	u32 prev_ref = cb->args[0]; +	u32 ref = prev_ref; + +	tsk = tipc_sk_get_next(&ref); +	for (; tsk; tsk = tipc_sk_get_next(&ref)) { +		lock_sock(&tsk->sk); +		err = __tipc_nl_add_sk(skb, cb, tsk); +		release_sock(&tsk->sk); +		tipc_sk_put(tsk); +		if (err) +			break; + +		prev_ref = ref; +	} + +	cb->args[0] = prev_ref; + +	return skb->len; +} + +/* Caller should hold socket lock for the passed tipc socket. */ +static int __tipc_nl_add_sk_publ(struct sk_buff *skb, +				 struct netlink_callback *cb, +				 struct publication *publ) +{ +	void *hdr; +	struct nlattr *attrs; + +	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, +			  &tipc_genl_v2_family, NLM_F_MULTI, TIPC_NL_PUBL_GET); +	if (!hdr) +		goto msg_cancel; + +	attrs = nla_nest_start(skb, TIPC_NLA_PUBL); +	if (!attrs) +		goto genlmsg_cancel; + +	if (nla_put_u32(skb, TIPC_NLA_PUBL_KEY, publ->key)) +		goto attr_msg_cancel; +	if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->type)) +		goto attr_msg_cancel; +	if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->lower)) +		goto attr_msg_cancel; +	if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->upper)) +		goto attr_msg_cancel; + +	nla_nest_end(skb, attrs); +	genlmsg_end(skb, hdr); + +	return 0; + +attr_msg_cancel: +	nla_nest_cancel(skb, attrs); +genlmsg_cancel: +	genlmsg_cancel(skb, hdr); +msg_cancel: +	return -EMSGSIZE; +} + +/* Caller should hold socket lock for the passed tipc socket. */ +static int __tipc_nl_list_sk_publ(struct sk_buff *skb, +				  struct netlink_callback *cb, +				  struct tipc_sock *tsk, u32 *last_publ) +{ +	int err; +	struct publication *p; + +	if (*last_publ) { +		list_for_each_entry(p, &tsk->publications, pport_list) { +			if (p->key == *last_publ) +				break; +		} +		if (p->key != *last_publ) { +			/* We never set seq or call nl_dump_check_consistent() +			 * this means that setting prev_seq here will cause the +			 * consistence check to fail in the netlink callback +			 * handler. Resulting in the last NLMSG_DONE message +			 * having the NLM_F_DUMP_INTR flag set. +			 */ +			cb->prev_seq = 1; +			*last_publ = 0; +			return -EPIPE; +		} +	} else { +		p = list_first_entry(&tsk->publications, struct publication, +				     pport_list); +	} + +	list_for_each_entry_from(p, &tsk->publications, pport_list) { +		err = __tipc_nl_add_sk_publ(skb, cb, p); +		if (err) { +			*last_publ = p->key; +			return err; +		} +	} +	*last_publ = 0; + +	return 0; +} + +int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ +	int err; +	u32 tsk_ref = cb->args[0]; +	u32 last_publ = cb->args[1]; +	u32 done = cb->args[2]; +	struct tipc_sock *tsk; + +	if (!tsk_ref) { +		struct nlattr **attrs; +		struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1]; + +		err = tipc_nlmsg_parse(cb->nlh, &attrs); +		if (err) +			return err; + +		err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, +				       attrs[TIPC_NLA_SOCK], +				       tipc_nl_sock_policy); +		if (err) +			return err; + +		if (!sock[TIPC_NLA_SOCK_REF]) +			return -EINVAL; + +		tsk_ref = nla_get_u32(sock[TIPC_NLA_SOCK_REF]); +	} + +	if (done) +		return 0; + +	tsk = tipc_sk_get(tsk_ref); +	if (!tsk) +		return -EINVAL; + +	lock_sock(&tsk->sk); +	err = __tipc_nl_list_sk_publ(skb, cb, tsk, &last_publ); +	if (!err) +		done = 1; +	release_sock(&tsk->sk); +	tipc_sk_put(tsk); + +	cb->args[0] = tsk_ref; +	cb->args[1] = last_publ; +	cb->args[2] = done; + +	return skb->len; +}  | 
