// SPDX-License-Identifier: GPL-2.0-or-later /* SCTP kernel implementation * (C) Copyright IBM Corp. 2001, 2004 * Copyright (c) 1999-2000 Cisco, Inc. * Copyright (c) 1999-2001 Motorola, Inc. * Copyright (c) 2001 Intel Corp. * Copyright (c) 2001 Nokia, Inc. * Copyright (c) 2001 La Monte H.P. Yarroll * * This file is part of the SCTP kernel implementation * * Initialization/cleanup for SCTP protocol support. * * Please send any bug reports or fixes you make to the * email address(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * La Monte H.P. Yarroll <piggy@acm.org> * Karl Knutson <karl@athena.chicago.il.us> * Jon Grimm <jgrimm@us.ibm.com> * Sridhar Samudrala <sri@us.ibm.com> * Daisy Chang <daisyc@us.ibm.com> * Ardelle Fan <ardelle.fan@intel.com>
*/
in_dev_for_each_ifa_rcu(ifa, in_dev) { /* Add the address to the local list. */
addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) {
addr->a.v4.sin_family = AF_INET;
addr->a.v4.sin_addr.s_addr = ifa->ifa_local;
addr->valid = 1;
INIT_LIST_HEAD(&addr->list);
list_add_tail(&addr->list, addrlist);
}
}
rcu_read_unlock();
}
/* Extract our IP addresses from the system and stash them in the * protocol structure.
*/ staticvoid sctp_get_local_addr_list(struct net *net)
{ struct net_device *dev; struct list_head *pos; struct sctp_af *af;
/* Copy the local addresses which are valid for 'scope' into 'bp'. */ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp, enum sctp_scope scope, gfp_t gfp, int copy_flags)
{ struct sctp_sockaddr_entry *addr; union sctp_addr laddr; int error = 0;
rcu_read_lock();
list_for_each_entry_rcu(addr, &net->sctp.local_addr_list, list) { if (!addr->valid) continue; if (!sctp_in_scope(net, &addr->a, scope)) continue;
/* Now that the address is in scope, check to see if * the address type is really supported by the local * sock as well as the remote peer.
*/ if (addr->a.sa.sa_family == AF_INET &&
(!(copy_flags & SCTP_ADDR4_ALLOWED) ||
!(copy_flags & SCTP_ADDR4_PEERSUPP))) continue; if (addr->a.sa.sa_family == AF_INET6 &&
(!(copy_flags & SCTP_ADDR6_ALLOWED) ||
!(copy_flags & SCTP_ADDR6_PEERSUPP))) continue;
laddr = addr->a; /* also works for setting ipv6 address port */
laddr.v4.sin_port = htons(bp->port); if (sctp_bind_addr_state(bp, &laddr) != -1) continue;
/* Copy over any ip options */ staticvoid sctp_v4_copy_ip_options(struct sock *sk, struct sock *newsk)
{ struct inet_sock *newinet, *inet = inet_sk(sk); struct ip_options_rcu *inet_opt, *newopt = NULL;
newinet = inet_sk(newsk);
rcu_read_lock();
inet_opt = rcu_dereference(inet->inet_opt); if (inet_opt) {
newopt = sock_kmemdup(newsk, inet_opt, sizeof(*inet_opt) +
inet_opt->opt.optlen, GFP_ATOMIC); if (!newopt)
pr_err("%s: Failed to copy ip options\n", __func__);
}
RCU_INIT_POINTER(newinet->inet_opt, newopt);
rcu_read_unlock();
}
/* Account for the IP options */ staticint sctp_v4_ip_options_len(struct sock *sk)
{ struct inet_sock *inet = inet_sk(sk); struct ip_options_rcu *inet_opt; int len = 0;
rcu_read_lock();
inet_opt = rcu_dereference(inet->inet_opt); if (inet_opt)
len = inet_opt->opt.optlen;
rcu_read_unlock(); return len;
}
/* Initialize a sctp_addr from in incoming skb. */ staticvoid sctp_v4_from_skb(union sctp_addr *addr, struct sk_buff *skb, int is_saddr)
{ /* Always called on head skb, so this is safe */ struct sctphdr *sh = sctp_hdr(skb); struct sockaddr_in *sa = &addr->v4;
/* Initialize a sctp_addr from an address parameter. */ staticbool sctp_v4_from_addr_param(union sctp_addr *addr, union sctp_addr_param *param,
__be16 port, int iif)
{ if (ntohs(param->v4.param_hdr.length) < sizeof(struct sctp_ipv4addr_param)) returnfalse;
/* Initialize an address parameter from a sctp_addr and return the length * of the address parameter.
*/ staticint sctp_v4_to_addr_param(constunion sctp_addr *addr, union sctp_addr_param *param)
{ int length = sizeof(struct sctp_ipv4addr_param);
/* Is this a wildcard address? */ staticint sctp_v4_is_any(constunion sctp_addr *addr)
{ return htonl(INADDR_ANY) == addr->v4.sin_addr.s_addr;
}
/* This function checks if the address is a valid address to be used for * SCTP binding. * * Output: * Return 0 - If the address is a non-unicast or an illegal address. * Return 1 - If the address is a unicast.
*/ staticint sctp_v4_addr_valid(union sctp_addr *addr, struct sctp_sock *sp, conststruct sk_buff *skb)
{ /* IPv4 addresses not allowed */ if (sp && ipv6_only_sock(sctp_opt2sk(sp))) return 0;
/* Is this a non-unicast address or a unusable SCTP address? */ if (IS_IPV4_UNUSABLE_ADDRESS(addr->v4.sin_addr.s_addr)) return 0;
/* Is this a broadcast address? */ if (skb && skb_rtable(skb)->rt_flags & RTCF_BROADCAST) return 0;
return 1;
}
/* Should this be available for binding? */ staticint sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
{ struct sock *sk = &sp->inet.sk; struct net *net = sock_net(sk); int tb_id = RT_TABLE_LOCAL; int ret;
tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ?: tb_id;
ret = inet_addr_type_table(net, addr->v4.sin_addr.s_addr, tb_id); if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) &&
ret != RTN_LOCAL &&
!inet_test_bit(FREEBIND, sk) &&
!READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind)) return 0;
if (ipv6_only_sock(sctp_opt2sk(sp))) return 0;
return 1;
}
/* Checking the loopback, private and other address scopes as defined in * RFC 1918. The IPv4 scoping is based on the draft for SCTP IPv4 * scoping <draft-stewart-tsvwg-sctp-ipv4-00.txt>. * * Level 0 - unusable SCTP addresses * Level 1 - loopback address * Level 2 - link-local addresses * Level 3 - private addresses. * Level 4 - global addresses * For INIT and INIT-ACK address list, let L be the level of * requested destination address, sender and receiver * SHOULD include all of its addresses with level greater * than or equal to L. * * IPv4 scoping can be controlled through sysctl option * net.sctp.addr_scope_policy
*/ staticenum sctp_scope sctp_v4_scope(union sctp_addr *addr)
{ enum sctp_scope retval;
/* Returns a valid dst cache entry for the given source and destination ip * addresses. If an association is passed, trys to get a dst entry with a * source address that matches an address in the bind address list.
*/ staticvoid sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, struct flowi *fl, struct sock *sk)
{ struct sctp_association *asoc = t->asoc; struct rtable *rt; struct flowi _fl; struct flowi4 *fl4 = &_fl.u.ip4; struct sctp_bind_addr *bp; struct sctp_sockaddr_entry *laddr; struct dst_entry *dst = NULL; union sctp_addr *daddr = &t->ipaddr; union sctp_addr dst_saddr;
dscp_t dscp;
/* If there is no association or if a source address is passed, no * more validation is required.
*/ if (!asoc || saddr) goto out;
bp = &asoc->base.bind_addr;
if (dst) { /* Walk through the bind address list and look for a bind * address that matches the source address of the returned dst.
*/
sctp_v4_dst_saddr(&dst_saddr, fl4, htons(bp->port));
rcu_read_lock();
list_for_each_entry_rcu(laddr, &bp->address_list, list) { if (!laddr->valid || (laddr->state == SCTP_ADDR_DEL) ||
(laddr->state != SCTP_ADDR_SRC &&
!asoc->src_out_of_asoc_ok)) continue; if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a)) goto out_unlock;
}
rcu_read_unlock();
/* None of the bound addresses match the source address of the * dst. So release it.
*/
dst_release(dst);
dst = NULL;
}
/* Walk through the bind address list and try to get a dst that * matches a bind address as the source address.
*/
rcu_read_lock();
list_for_each_entry_rcu(laddr, &bp->address_list, list) { struct net_device *odev;
if (!laddr->valid) continue; if (laddr->state != SCTP_ADDR_SRC ||
AF_INET != laddr->a.sa.sa_family) continue;
/* For v4, the source address is cached in the route entry(dst). So no need * to cache it separately and hence this is an empty routine.
*/ staticvoid sctp_v4_get_saddr(struct sctp_sock *sk, struct sctp_transport *t, struct flowi *fl)
{ union sctp_addr *saddr = &t->saddr; struct rtable *rt = dst_rtable(t->dst);
/* Was this packet marked by Explicit Congestion Notification? */ staticint sctp_v4_is_ce(conststruct sk_buff *skb)
{ return INET_ECN_is_ce(ip_hdr(skb)->tos);
}
/* Create and initialize a new sk for the socket returned by accept(). */ staticstruct sock *sctp_v4_create_accept_sk(struct sock *sk, struct sctp_association *asoc, bool kern)
{ struct sock *newsk = sk_alloc(sock_net(sk), PF_INET, GFP_KERNEL,
sk->sk_prot, kern); struct inet_sock *newinet;
list_for_each_entry_safe(addrw, temp, &net->sctp.addr_waitq, list) {
pr_debug("%s: the first ent in wq:%p is addr:%pISc for cmd:%d at " "entry:%p\n", __func__, &net->sctp.addr_waitq, &addrw->a.sa,
addrw->state, addrw);
#if IS_ENABLED(CONFIG_IPV6) /* Now we send an ASCONF for each association */ /* Note. we currently don't handle link local IPv6 addressees */ if (addrw->a.sa.sa_family == AF_INET6) { struct in6_addr *in6;
if (ipv6_addr_type(&addrw->a.v6.sin6_addr) &
IPV6_ADDR_LINKLOCAL) goto free_next;
/* lookup the entry for the same address in the addr_waitq * sctp_addr_wq MUST be locked
*/ staticstruct sctp_sockaddr_entry *sctp_addr_wq_lookup(struct net *net, struct sctp_sockaddr_entry *addr)
{ struct sctp_sockaddr_entry *addrw;
list_for_each_entry(addrw, &net->sctp.addr_waitq, list) { if (addrw->a.sa.sa_family != addr->a.sa.sa_family) continue; if (addrw->a.sa.sa_family == AF_INET) { if (addrw->a.v4.sin_addr.s_addr ==
addr->a.v4.sin_addr.s_addr) return addrw;
} elseif (addrw->a.sa.sa_family == AF_INET6) { if (ipv6_addr_equal(&addrw->a.v6.sin6_addr,
&addr->a.v6.sin6_addr)) return addrw;
}
} return NULL;
}
void sctp_addr_wq_mgmt(struct net *net, struct sctp_sockaddr_entry *addr, int cmd)
{ struct sctp_sockaddr_entry *addrw; unsignedlong timeo_val;
/* first, we check if an opposite message already exist in the queue. * If we found such message, it is removed. * This operation is a bit stupid, but the DHCP client attaches the * new address after a couple of addition and deletion of that address
*/
spin_lock_bh(&net->sctp.addr_wq_lock);
/* Avoid searching the queue or modifying it if there are no consumers, * as it can lead to performance degradation if addresses are modified * en-masse. * * If the queue already contains some events, update it anyway to avoid * ugly races between new sessions and new address events.
*/ if (list_empty(&net->sctp.auto_asconf_splist) &&
list_empty(&net->sctp.addr_waitq)) {
spin_unlock_bh(&net->sctp.addr_wq_lock); return;
}
/* Offsets existing events in addr_wq */
addrw = sctp_addr_wq_lookup(net, addr); if (addrw) { if (addrw->state != cmd) {
pr_debug("%s: offsets existing entry for %d, addr:%pISc " "in wq:%p\n", __func__, addrw->state, &addrw->a.sa,
&net->sctp.addr_waitq);
/* OK, we have to add the new address to the wait queue */
addrw = kmemdup(addr, sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC); if (addrw == NULL) {
spin_unlock_bh(&net->sctp.addr_wq_lock); return;
}
addrw->state = cmd;
list_add_tail(&addrw->list, &net->sctp.addr_waitq);
pr_debug("%s: add new entry for cmd:%d, addr:%pISc in wq:%p\n",
__func__, addrw->state, &addrw->a.sa, &net->sctp.addr_waitq);
/* Event handler for inet address addition/deletion events. * The sctp_local_addr_list needs to be protocted by a spin lock since * multiple notifiers (say IPv4 and IPv6) may be running at the same * time and thus corrupt the list. * The reader side is protected with RCU.
*/ staticint sctp_inetaddr_event(struct notifier_block *this, unsignedlong ev, void *ptr)
{ struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; struct sctp_sockaddr_entry *addr = NULL; struct sctp_sockaddr_entry *temp; struct net *net = dev_net(ifa->ifa_dev->dev); int found = 0;
/* * Initialize the control inode/socket with a control endpoint data * structure. This endpoint is reserved exclusively for the OOTB processing.
*/ staticint sctp_ctl_sock_init(struct net *net)
{ int err;
sa_family_t family = PF_INET;
if (sctp_get_pf_specific(PF_INET6))
family = PF_INET6;
/* If IPv6 socket could not be created, try the IPv4 socket */ if (err < 0 && family == PF_INET6)
err = inet_ctl_sock_create(&net->sctp.ctl_sock, AF_INET,
SOCK_SEQPACKET, IPPROTO_SCTP,
net);
if (err < 0) {
pr_err("Failed to create the SCTP control socket\n"); return err;
} return 0;
}
/* Get the table of functions for manipulating a particular address * family.
*/ struct sctp_af *sctp_get_af_specific(sa_family_t family)
{ switch (family) { case AF_INET: return sctp_af_v4_specific; case AF_INET6: return sctp_af_v6_specific; default: return NULL;
}
}
/* Common code to initialize a AF_INET msg_name. */ staticvoid sctp_inet_msgname(char *msgname, int *addr_len)
{ struct sockaddr_in *sin;
/* Copy the primary address of the peer primary address as the msg_name. */ staticvoid sctp_inet_event_msgname(struct sctp_ulpevent *event, char *msgname, int *addr_len)
{ struct sockaddr_in *sin, *sinfrom;
/* Do we support this AF? */ staticint sctp_inet_af_supported(sa_family_t family, struct sctp_sock *sp)
{ /* PF_INET only supports AF_INET addresses. */ return AF_INET == family;
}
/* Address matching with wildcards allowed. */ staticint sctp_inet_cmp_addr(constunion sctp_addr *addr1, constunion sctp_addr *addr2, struct sctp_sock *opt)
{ /* PF_INET only supports AF_INET addresses. */ if (addr1->sa.sa_family != addr2->sa.sa_family) return 0; if (htonl(INADDR_ANY) == addr1->v4.sin_addr.s_addr ||
htonl(INADDR_ANY) == addr2->v4.sin_addr.s_addr) return 1; if (addr1->v4.sin_addr.s_addr == addr2->v4.sin_addr.s_addr) return 1;
return 0;
}
/* Verify that provided sockaddr looks bindable. Common verification has * already been taken care of.
*/ staticint sctp_inet_bind_verify(struct sctp_sock *opt, union sctp_addr *addr)
{ return sctp_v4_available(addr, opt);
}
/* Verify that sockaddr looks sendable. Common verification has already * been taken care of.
*/ staticint sctp_inet_send_verify(struct sctp_sock *opt, union sctp_addr *addr)
{ return 1;
}
/* Fill in Supported Address Type information for INIT and INIT-ACK * chunks. Returns number of addresses supported.
*/ staticint sctp_inet_supported_addrs(conststruct sctp_sock *opt,
__be16 *types)
{
types[0] = SCTP_PARAM_IPV4_ADDRESS; return 1;
}
status = sctp_sysctl_net_register(net); if (status) goto err_sysctl_register;
/* Allocate and initialise sctp mibs. */
status = init_sctp_mibs(net); if (status) goto err_init_mibs;
#ifdef CONFIG_PROC_FS /* Initialize proc fs directory. */
status = sctp_proc_init(net); if (status) goto err_init_proc; #endif
sctp_dbg_objcnt_init(net);
/* Initialize the local address list. */
INIT_LIST_HEAD(&net->sctp.local_addr_list);
spin_lock_init(&net->sctp.local_addr_lock);
sctp_get_local_addr_list(net);
/* Initialize the address event list */
INIT_LIST_HEAD(&net->sctp.addr_waitq);
INIT_LIST_HEAD(&net->sctp.auto_asconf_splist);
spin_lock_init(&net->sctp.addr_wq_lock);
net->sctp.addr_wq_timer.expires = 0;
timer_setup(&net->sctp.addr_wq_timer, sctp_addr_wq_timeout_handler, 0);
staticint __net_init sctp_ctrlsock_init(struct net *net)
{ int status;
/* Initialize the control inode/socket for handling OOTB packets. */
status = sctp_ctl_sock_init(net); if (status)
pr_err("Failed to initialize the SCTP control sock\n");
return status;
}
staticvoid __net_exit sctp_ctrlsock_exit(struct net *net)
{ /* Free the control endpoint. */
inet_ctl_sock_destroy(net->sctp.ctl_sock);
}
/* Initialize the universe into something sensible. */ static __init int sctp_init(void)
{ unsignedlong nr_pages = totalram_pages(); unsignedlong limit; unsignedlong goal; int max_entry_order; int num_entries; int max_share; int status; int order; int i;
/* Set per-socket limits to no more than 1/128 the pressure threshold*/
limit = (sysctl_sctp_mem[1]) << (PAGE_SHIFT - 7);
max_share = min(4UL*1024*1024, limit);
sysctl_sctp_rmem[0] = PAGE_SIZE; /* give each asoc 1 page min */
sysctl_sctp_rmem[1] = 1500 * SKB_TRUESIZE(1);
sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share);
/* Size and allocate the association hash table. * The methodology is similar to that of the tcp hash tables. * Though not identical. Start by getting a goal size
*/ if (nr_pages >= (128 * 1024))
goal = nr_pages >> (22 - PAGE_SHIFT); else
goal = nr_pages >> (24 - PAGE_SHIFT);
/* Then compute the page order for said goal */
order = get_order(goal);
/* Now compute the required page order for the maximum sized table we * want to create
*/
max_entry_order = get_order(MAX_SCTP_PORT_HASH_ENTRIES * sizeof(struct sctp_bind_hashbucket));
/* Limit the page order by that maximum hash table size */
order = min(order, max_entry_order);
/* Allocate and initialize the endpoint hash table. */
sctp_ep_hashsize = 64;
sctp_ep_hashtable =
kmalloc_array(64, sizeof(struct sctp_hashbucket), GFP_KERNEL); if (!sctp_ep_hashtable) {
pr_err("Failed endpoint_hash alloc\n");
status = -ENOMEM; goto err_ehash_alloc;
} for (i = 0; i < sctp_ep_hashsize; i++) {
rwlock_init(&sctp_ep_hashtable[i].lock);
INIT_HLIST_HEAD(&sctp_ep_hashtable[i].chain);
}
/* Allocate and initialize the SCTP port hash table. * Note that order is initalized to start at the max sized * table we want to support. If we can't get that many pages * reduce the order and try again
*/ do {
sctp_port_hashtable = (struct sctp_bind_hashbucket *)
__get_free_pages(GFP_KERNEL | __GFP_NOWARN, order);
} while (!sctp_port_hashtable && --order > 0);
if (!sctp_port_hashtable) {
pr_err("Failed bind hash alloc\n");
status = -ENOMEM; goto err_bhash_alloc;
}
/* Now compute the number of entries that will fit in the * port hash space we allocated
*/
num_entries = (1UL << order) * PAGE_SIZE / sizeof(struct sctp_bind_hashbucket);
/* And finish by rounding it down to the nearest power of two. * This wastes some memory of course, but it's needed because * the hash function operates based on the assumption that * the number of entries is a power of two.
*/
sctp_port_hashsize = rounddown_pow_of_two(num_entries);
for (i = 0; i < sctp_port_hashsize; i++) {
spin_lock_init(&sctp_port_hashtable[i].lock);
INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain);
}
status = sctp_transport_hashtable_init(); if (status) goto err_thash_alloc;
/* Exit handler for the SCTP protocol. */ static __exitvoid sctp_exit(void)
{ /* BUG. This should probably do something useful like clean * up all the remaining associations and all that memory.
*/
/* Unregister with inet6/inet layers. */
sctp_v6_del_protocol();
sctp_v4_del_protocol();
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.