/* * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. *
*/ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/in.h> #include <linux/module.h> #include <net/tcp.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/addrconf.h>
#include"rds.h" #include"tcp.h"
/* only for info exporting */ static DEFINE_SPINLOCK(rds_tcp_tc_list_lock); static LIST_HEAD(rds_tcp_tc_list);
/* rds_tcp_tc_count counts only IPv4 connections. * rds6_tcp_tc_count counts both IPv4 and IPv6 connections.
*/ staticunsignedint rds_tcp_tc_count; #if IS_ENABLED(CONFIG_IPV6) staticunsignedint rds6_tcp_tc_count; #endif
/* Track rds_tcp_connection structs so they can be cleaned up */ static DEFINE_SPINLOCK(rds_tcp_conn_lock); static LIST_HEAD(rds_tcp_conn_list); static atomic_t rds_tcp_unloading = ATOMIC_INIT(0);
u32 rds_tcp_write_seq(struct rds_tcp_connection *tc)
{ /* seq# of the last byte of data in tcp send buffer */ return tcp_sk(tc->t_sock->sk)->write_seq;
}
/* done under the callback_lock to serialize with write_space */
spin_lock(&rds_tcp_tc_list_lock);
list_del_init(&tc->t_list_item); #if IS_ENABLED(CONFIG_IPV6)
rds6_tcp_tc_count--; #endif if (!tc->t_cpath->cp_conn->c_isv6)
rds_tcp_tc_count--;
spin_unlock(&rds_tcp_tc_list_lock);
/* * rds_tcp_reset_callbacks() switches the to the new sock and * returns the existing tc->t_sock. * * The only functions that set tc->t_sock are rds_tcp_set_callbacks * and rds_tcp_reset_callbacks. Send and receive trust that * it is set. The absence of RDS_CONN_UP bit protects those paths * from being called while it isn't set.
*/ void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp)
{ struct rds_tcp_connection *tc = cp->cp_transport_data; struct socket *osock = tc->t_sock;
if (!osock) goto newsock;
/* Need to resolve a duelling SYN between peers. * We have an outstanding SYN to this peer, which may * potentially have transitioned to the RDS_CONN_UP state, * so we must quiesce any send threads before resetting * cp_transport_data. We quiesce these threads by setting * cp_state to something other than RDS_CONN_UP, and then * waiting for any existing threads in rds_send_xmit to * complete release_in_xmit(). (Subsequent threads entering * rds_send_xmit() will bail on !rds_conn_up(). * * However an incoming syn-ack at this point would end up * marking the conn as RDS_CONN_UP, and would again permit * rds_send_xmi() threads through, so ideally we would * synchronize on RDS_CONN_UP after lock_sock(), but cannot * do that: waiting on !RDS_IN_XMIT after lock_sock() may * end up deadlocking with tcp_sendmsg(), and the RDS_IN_XMIT * would not get set. As a result, we set c_state to * RDS_CONN_RESETTTING, to ensure that rds_tcp_state_change * cannot mark rds_conn_path_up() in the window before lock_sock()
*/
atomic_set(&cp->cp_state, RDS_CONN_RESETTING);
wait_event(cp->cp_waitq, !test_bit(RDS_IN_XMIT, &cp->cp_flags)); /* reset receive side state for rds_tcp_data_recv() for osock */
cancel_delayed_work_sync(&cp->cp_send_w);
cancel_delayed_work_sync(&cp->cp_recv_w);
lock_sock(osock->sk); if (tc->t_tinc) {
rds_inc_put(&tc->t_tinc->ti_inc);
tc->t_tinc = NULL;
}
tc->t_tinc_hdr_rem = sizeof(struct rds_header);
tc->t_tinc_data_rem = 0;
rds_tcp_restore_callbacks(osock, tc);
release_sock(osock->sk);
sock_release(osock);
newsock:
rds_send_path_reset(cp);
lock_sock(sock->sk);
rds_tcp_set_callbacks(sock, cp);
release_sock(sock->sk);
}
/* Add tc to rds_tcp_tc_list and set tc->t_sock. See comments * above rds_tcp_reset_callbacks for notes about synchronization * with data path
*/ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp)
{ struct rds_tcp_connection *tc = cp->cp_transport_data;
rdsdebug("setting sock %p callbacks to tc %p\n", sock, tc);
write_lock_bh(&sock->sk->sk_callback_lock);
/* done under the callback_lock to serialize with write_space */
spin_lock(&rds_tcp_tc_list_lock);
list_add_tail(&tc->t_list_item, &rds_tcp_tc_list); #if IS_ENABLED(CONFIG_IPV6)
rds6_tcp_tc_count++; #endif if (!tc->t_cpath->cp_conn->c_isv6)
rds_tcp_tc_count++;
spin_unlock(&rds_tcp_tc_list_lock);
/* accepted sockets need our listen data ready undone */ if (sock->sk->sk_data_ready == rds_tcp_listen_data_ready)
sock->sk->sk_data_ready = sock->sk->sk_user_data;
int rds_tcp_laddr_check(struct net *net, conststruct in6_addr *addr,
__u32 scope_id)
{ struct net_device *dev = NULL; #if IS_ENABLED(CONFIG_IPV6) int ret; #endif
if (ipv6_addr_v4mapped(addr)) { if (inet_addr_type(net, addr->s6_addr32[3]) == RTN_LOCAL) return 0; return -EADDRNOTAVAIL;
}
/* If the scope_id is specified, check only those addresses * hosted on the specified interface.
*/ if (scope_id != 0) {
rcu_read_lock();
dev = dev_get_by_index_rcu(net, scope_id); /* scope_id is not valid... */ if (!dev) {
rcu_read_unlock(); return -EADDRNOTAVAIL;
}
rcu_read_unlock();
} #if IS_ENABLED(CONFIG_IPV6)
ret = ipv6_chk_addr(net, addr, dev, 0); if (ret) return 0; #endif return -EADDRNOTAVAIL;
}
/* per-network namespace private data for this module */ struct rds_tcp_net { struct socket *rds_tcp_listen_sock; struct work_struct rds_tcp_accept_w; struct ctl_table_header *rds_tcp_sysctl; struct ctl_table *ctl_table; int sndbuf_size; int rcvbuf_size;
};
/* All module specific customizations to the RDS-TCP socket should be done in * rds_tcp_tune() and applied after socket creation.
*/ bool rds_tcp_tune(struct socket *sock)
{ struct sock *sk = sock->sk; struct net *net = sock_net(sk); struct rds_tcp_net *rtn;
tcp_sock_set_nodelay(sock->sk);
lock_sock(sk); /* TCP timer functions might access net namespace even after * a process which created this net namespace terminated.
*/ if (!sk->sk_net_refcnt) { if (!maybe_get_net(net)) {
release_sock(sk); returnfalse;
}
sk_net_refcnt_upgrade(sk);
put_net(net);
}
rtn = net_generic(net, rds_tcp_netid); if (rtn->sndbuf_size > 0) {
sk->sk_sndbuf = rtn->sndbuf_size;
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
} if (rtn->rcvbuf_size > 0) {
sk->sk_rcvbuf = rtn->rcvbuf_size;
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
}
release_sock(sk); returntrue;
}
/* when sysctl is used to modify some kernel socket parameters,this * function resets the RDS connections in that netns so that we can * restart with new parameters. The assumption is that such reset * events are few and far-between.
*/ staticvoid rds_tcp_sysctl_reset(struct net *net)
{ struct rds_tcp_connection *tc, *_tc;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.