// SPDX-License-Identifier: GPL-2.0 /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * The IP to API glue. * * Authors: see ip.c * * Fixes: * Many : Split from ip.c , see ip.c for history. * Martin Mares : TOS setting fixed. * Alan Cox : Fixed a couple of oopses in Martin's * TOS tweaks. * Mike McLagan : Routing by source
*/
/* All current transport protocols have the port numbers in the * first four bytes of the transport header and this function is * written with this assumption in mind.
*/
ports = skb_header_pointer(skb, skb_transport_offset(skb), sizeof(_ports), &_ports); if (!ports) return;
int ip_ra_control(struct sock *sk, unsignedchar on, void (*destructor)(struct sock *))
{ struct ip_ra_chain *ra, *new_ra; struct ip_ra_chain __rcu **rap; struct net *net = sock_net(sk);
if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW) return -EINVAL;
new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; if (on && !new_ra) return -ENOMEM;
mutex_lock(&net->ipv4.ra_mutex); for (rap = &net->ipv4.ra_chain;
(ra = rcu_dereference_protected(*rap,
lockdep_is_held(&net->ipv4.ra_mutex))) != NULL;
rap = &ra->next) { if (ra->sk == sk) { if (on) {
mutex_unlock(&net->ipv4.ra_mutex);
kfree(new_ra); return -EADDRINUSE;
} /* dont let ip_call_ra_chain() use sk again */
ra->sk = NULL;
RCU_INIT_POINTER(*rap, ra->next);
mutex_unlock(&net->ipv4.ra_mutex);
if (ra->destructor)
ra->destructor(sk); /* * Delay sock_put(sk) and kfree(ra) after one rcu grace * period. This guarantee ip_call_ra_chain() dont need * to mess with socket refcounts.
*/
ra->saved_sk = sk;
call_rcu(&ra->rcu, ip_ra_destroy_rcu); return 0;
}
} if (!new_ra) {
mutex_unlock(&net->ipv4.ra_mutex); return -ENOBUFS;
}
new_ra->sk = sk;
new_ra->destructor = destructor;
if (sock_queue_err_skb(sk, skb))
kfree_skb(skb);
}
/* For some errors we have valid addr_offset even with zero payload and * zero port. Also, addr_offset should be supported if port is set.
*/ staticinlinebool ipv4_datagram_support_addr(struct sock_exterr_skb *serr)
{ return serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL || serr->port;
}
/* IPv4 supports cmsg on all imcp errors and some timestamps * * Timestamp code paths do not initialize the fields expected by cmsg: * the PKTINFO fields in skb->cb[]. Fill those in here.
*/ staticbool ipv4_datagram_support_cmsg(conststruct sock *sk, struct sk_buff *skb, int ee_origin)
{ struct in_pktinfo *info;
if (ee_origin == SO_EE_ORIGIN_ICMP) returntrue;
if (ee_origin == SO_EE_ORIGIN_LOCAL) returnfalse;
/* Support IP_PKTINFO on tstamp packets if requested, to correlate * timestamp with egress dev. Not possible for packets without iif * or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
*/
info = PKTINFO_SKB_CB(skb); if (!(READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_CMSG) ||
!info->ipi_ifindex) returnfalse;
/* * Socket option code for IP. This is the end of the line after any * TCP,UDP etc options on an IP socket.
*/ staticbool setsockopt_needs_rtnl(int optname)
{ switch (optname) { case IP_ADD_MEMBERSHIP: case IP_ADD_SOURCE_MEMBERSHIP: case IP_BLOCK_SOURCE: case IP_DROP_MEMBERSHIP: case IP_DROP_SOURCE_MEMBERSHIP: case IP_MSFILTER: case IP_UNBLOCK_SOURCE: case MCAST_BLOCK_SOURCE: case MCAST_MSFILTER: case MCAST_JOIN_GROUP: case MCAST_JOIN_SOURCE_GROUP: case MCAST_LEAVE_GROUP: case MCAST_LEAVE_SOURCE_GROUP: case MCAST_UNBLOCK_SOURCE: returntrue;
} returnfalse;
}
staticint set_mcast_msfilter(struct sock *sk, int ifindex, int numsrc, int fmode, struct sockaddr_storage *group, struct sockaddr_storage *list)
{ struct ip_msfilter *msf; struct sockaddr_in *psin; int err, i;
msf = kmalloc(IP_MSFILTER_SIZE(numsrc), GFP_KERNEL); if (!msf) return -ENOBUFS;
psin = (struct sockaddr_in *)group; if (psin->sin_family != AF_INET) goto Eaddrnotavail;
msf->imsf_multiaddr = psin->sin_addr.s_addr;
msf->imsf_interface = 0;
msf->imsf_fmode = fmode;
msf->imsf_numsrc = numsrc; for (i = 0; i < numsrc; ++i) {
psin = (struct sockaddr_in *)&list[i];
int do_ip_setsockopt(struct sock *sk, int level, int optname,
sockptr_t optval, unsignedint optlen)
{ struct inet_sock *inet = inet_sk(sk); struct net *net = sock_net(sk); int val = 0, err, retv; bool needs_rtnl = setsockopt_needs_rtnl(optname);
switch (optname) { case IP_PKTINFO: case IP_RECVTTL: case IP_RECVOPTS: case IP_RECVTOS: case IP_RETOPTS: case IP_TOS: case IP_TTL: case IP_HDRINCL: case IP_MTU_DISCOVER: case IP_RECVERR: case IP_ROUTER_ALERT: case IP_FREEBIND: case IP_PASSSEC: case IP_TRANSPARENT: case IP_MINTTL: case IP_NODEFRAG: case IP_BIND_ADDRESS_NO_PORT: case IP_UNICAST_IF: case IP_MULTICAST_TTL: case IP_MULTICAST_ALL: case IP_MULTICAST_LOOP: case IP_RECVORIGDSTADDR: case IP_CHECKSUM: case IP_RECVFRAGSIZE: case IP_RECVERR_RFC4884: case IP_LOCAL_PORT_RANGE: if (optlen >= sizeof(int)) { if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT;
} elseif (optlen >= sizeof(char)) { unsignedchar ucval;
if (copy_from_sockptr(&ucval, optval, sizeof(ucval))) return -EFAULT;
val = (int) ucval;
}
}
/* If optlen==0, it is equivalent to val == 0 */
if (optname == IP_ROUTER_ALERT) {
retv = ip_ra_control(sk, val ? 1 : 0, NULL); if (retv == 0)
inet_assign_bit(RTALERT, sk, val); return retv;
} if (ip_mroute_opt(optname)) return ip_mroute_setsockopt(sk, optname, optval, optlen);
/* Handle options that can be set without locking the socket. */ switch (optname) { case IP_PKTINFO:
inet_assign_bit(PKTINFO, sk, val); return 0; case IP_RECVTTL:
inet_assign_bit(TTL, sk, val); return 0; case IP_RECVTOS:
inet_assign_bit(TOS, sk, val); return 0; case IP_RECVOPTS:
inet_assign_bit(RECVOPTS, sk, val); return 0; case IP_RETOPTS:
inet_assign_bit(RETOPTS, sk, val); return 0; case IP_PASSSEC:
inet_assign_bit(PASSSEC, sk, val); return 0; case IP_RECVORIGDSTADDR:
inet_assign_bit(ORIGDSTADDR, sk, val); return 0; case IP_RECVFRAGSIZE: if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM) return -EINVAL;
inet_assign_bit(RECVFRAGSIZE, sk, val); return 0; case IP_RECVERR:
inet_assign_bit(RECVERR, sk, val); if (!val)
skb_errqueue_purge(&sk->sk_error_queue); return 0; case IP_RECVERR_RFC4884: if (val < 0 || val > 1) return -EINVAL;
inet_assign_bit(RECVERR_RFC4884, sk, val); return 0; case IP_FREEBIND: if (optlen < 1) return -EINVAL;
inet_assign_bit(FREEBIND, sk, val); return 0; case IP_HDRINCL: if (sk->sk_type != SOCK_RAW) return -ENOPROTOOPT;
inet_assign_bit(HDRINCL, sk, val); return 0; case IP_MULTICAST_LOOP: if (optlen < 1) return -EINVAL;
inet_assign_bit(MC_LOOP, sk, val); return 0; case IP_MULTICAST_ALL: if (optlen < 1) return -EINVAL; if (val != 0 && val != 1) return -EINVAL;
inet_assign_bit(MC_ALL, sk, val); return 0; case IP_TRANSPARENT: if (!!val && !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) return -EPERM; if (optlen < 1) return -EINVAL;
inet_assign_bit(TRANSPARENT, sk, val); return 0; case IP_NODEFRAG: if (sk->sk_type != SOCK_RAW) return -ENOPROTOOPT;
inet_assign_bit(NODEFRAG, sk, val); return 0; case IP_BIND_ADDRESS_NO_PORT:
inet_assign_bit(BIND_ADDRESS_NO_PORT, sk, val); return 0; case IP_TTL: if (optlen < 1) return -EINVAL; if (val != -1 && (val < 1 || val > 255)) return -EINVAL;
WRITE_ONCE(inet->uc_ttl, val); return 0; case IP_MINTTL: if (optlen < 1) return -EINVAL; if (val < 0 || val > 255) return -EINVAL;
if (val)
static_branch_enable(&ip4_min_ttl);
WRITE_ONCE(inet->min_ttl, val); return 0; case IP_MULTICAST_TTL: if (sk->sk_type == SOCK_STREAM) return -EINVAL; if (optlen < 1) return -EINVAL; if (val == -1)
val = 1; if (val < 0 || val > 255) return -EINVAL;
WRITE_ONCE(inet->mc_ttl, val); return 0; case IP_MTU_DISCOVER: return ip_sock_set_mtu_discover(sk, val); case IP_TOS: /* This sets both TOS and Precedence */
ip_sock_set_tos(sk, val); return 0; case IP_LOCAL_PORT_RANGE:
{
u16 lo = val;
u16 hi = val >> 16;
if (optlen != sizeof(u32)) return -EINVAL; if (lo != 0 && hi != 0 && lo > hi) return -EINVAL;
e_inval:
sockopt_release_sock(sk); if (needs_rtnl)
rtnl_unlock(); return -EINVAL;
}
/** * ipv4_pktinfo_prepare - transfer some info from rtable to skb * @sk: socket * @skb: buffer * @drop_dst: if true, drops skb dst * * To support IP_CMSG_PKTINFO option, we store rt_iif and specific * destination in skb->cb[] before dst drop. * This way, receiver doesn't make cache line misses to read rtable.
*/ void ipv4_pktinfo_prepare(conststruct sock *sk, struct sk_buff *skb, bool drop_dst)
{ struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb); bool prepare = inet_test_bit(PKTINFO, sk) ||
ipv6_sk_rxinfo(sk);
if (prepare && skb_rtable(skb)) { /* skb->cb is overloaded: prior to this point it is IP{6}CB * which has interface index (iif) as the first member of the * underlying inet{6}_skb_parm struct. This code then overlays * PKTINFO_SKB_CB and in_pktinfo also has iif as the first * element so the iif is picked up from the prior IPCB. If iif * is the loopback interface, then return the sending interface * (e.g., process binds socket to eth0 for Tx which is * redirected to loopback in the rtable/dst).
*/ struct rtable *rt = skb_rtable(skb); bool l3slave = ipv4_l3mdev_skb(IPCB(skb)->flags);
err = ip_mc_gsfget(sk, &gf, optval,
offsetof(struct compat_group_filter, gf_slist_flex)); if (err) return err; if (gf.gf_numsrc < num)
num = gf.gf_numsrc;
len = GROUP_FILTER_SIZE(num) - (sizeof(gf) - sizeof(gf32)); if (copy_to_sockptr(optlen, &len, sizeof(int)) ||
copy_to_sockptr_offset(optval, offsetof(struct compat_group_filter, gf_fmode),
&gf.gf_fmode, sizeof(gf.gf_fmode)) ||
copy_to_sockptr_offset(optval, offsetof(struct compat_group_filter, gf_numsrc),
&gf.gf_numsrc, sizeof(gf.gf_numsrc))) return -EFAULT; return 0;
}
int do_ip_getsockopt(struct sock *sk, int level, int optname,
sockptr_t optval, sockptr_t optlen)
{ struct inet_sock *inet = inet_sk(sk); bool needs_rtnl = getsockopt_needs_rtnl(optname); int val, err = 0; int len;
if (level != SOL_IP) return -EOPNOTSUPP;
if (ip_mroute_opt(optname)) return ip_mroute_getsockopt(sk, optname, optval, optlen);
if (copy_from_sockptr(&len, optlen, sizeof(int))) return -EFAULT; if (len < 0) return -EINVAL;
/* Handle options that can be read without locking the socket. */ switch (optname) { case IP_PKTINFO:
val = inet_test_bit(PKTINFO, sk); goto copyval; case IP_RECVTTL:
val = inet_test_bit(TTL, sk); goto copyval; case IP_RECVTOS:
val = inet_test_bit(TOS, sk); goto copyval; case IP_RECVOPTS:
val = inet_test_bit(RECVOPTS, sk); goto copyval; case IP_RETOPTS:
val = inet_test_bit(RETOPTS, sk); goto copyval; case IP_PASSSEC:
val = inet_test_bit(PASSSEC, sk); goto copyval; case IP_RECVORIGDSTADDR:
val = inet_test_bit(ORIGDSTADDR, sk); goto copyval; case IP_CHECKSUM:
val = inet_test_bit(CHECKSUM, sk); goto copyval; case IP_RECVFRAGSIZE:
val = inet_test_bit(RECVFRAGSIZE, sk); goto copyval; case IP_RECVERR:
val = inet_test_bit(RECVERR, sk); goto copyval; case IP_RECVERR_RFC4884:
val = inet_test_bit(RECVERR_RFC4884, sk); goto copyval; case IP_FREEBIND:
val = inet_test_bit(FREEBIND, sk); goto copyval; case IP_HDRINCL:
val = inet_test_bit(HDRINCL, sk); goto copyval; case IP_MULTICAST_LOOP:
val = inet_test_bit(MC_LOOP, sk); goto copyval; case IP_MULTICAST_ALL:
val = inet_test_bit(MC_ALL, sk); goto copyval; case IP_TRANSPARENT:
val = inet_test_bit(TRANSPARENT, sk); goto copyval; case IP_NODEFRAG:
val = inet_test_bit(NODEFRAG, sk); goto copyval; case IP_BIND_ADDRESS_NO_PORT:
val = inet_test_bit(BIND_ADDRESS_NO_PORT, sk); goto copyval; case IP_ROUTER_ALERT:
val = inet_test_bit(RTALERT, sk); goto copyval; case IP_TTL:
val = READ_ONCE(inet->uc_ttl); if (val < 0)
val = READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_default_ttl); goto copyval; case IP_MINTTL:
val = READ_ONCE(inet->min_ttl); goto copyval; case IP_MULTICAST_TTL:
val = READ_ONCE(inet->mc_ttl); goto copyval; case IP_MTU_DISCOVER:
val = READ_ONCE(inet->pmtudisc); goto copyval; case IP_TOS:
val = READ_ONCE(inet->tos); goto copyval; case IP_OPTIONS:
{ unsignedchar optbuf[sizeof(struct ip_options)+40]; struct ip_options *opt = (struct ip_options *)optbuf; struct ip_options_rcu *inet_opt;
#ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
!ip_mroute_opt(optname)) { int len;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.