/* * Changes: * * Mike Kershaw <dragorn@kismetwireless.net> 2005/08/14 * Add TUNSETLINK ioctl to set the link encapsulation * * Mark Smith <markzzzsmith@yahoo.com.au> * Use eth_random_addr() for tap MAC address. * * Harald Roelle <harald.roelle@ifi.lmu.de> 2004/04/20 * Fixes in packet dropping, queue length setting and queue wakeup. * Increased default tx queue length. * Added ethtool API. * Minor cleanups * * Daniel Podlejski <underley@underley.eu.org> * Modifications for 2.3.99-pre5 kernel.
*/
#define FLT_EXACT_COUNT 8 struct tap_filter { unsignedint count; /* Number of addrs. Zero means disabled */
u32 mask[2]; /* Mask of the hashed addrs */ unsignedchar addr[FLT_EXACT_COUNT][ETH_ALEN];
};
/* MAX_TAP_QUEUES 256 is chosen to allow rx/tx queues to be equal
* to max number of VCPUs in guest. */ #define MAX_TAP_QUEUES 256 #define MAX_TAP_FLOWS 4096
#define TUN_FLOW_EXPIRE (3 * HZ)
/* A tun_file connects an open character device to a tuntap netdevice. It * also contains all socket related structures (except sock_fprog and tap_filter) * to serve as one transmit queue for tuntap device. The sock_fprog and * tap_filter were kept in tun_struct since they were used for filtering for the * netdevice not for a specific queue (at least I didn't see the requirement for * this). * * RCU usage: * The tun_file and tun_struct are loosely coupled, the pointer from one to the * other can only be read while rcu_read_lock or rtnl_lock is held.
*/ struct tun_file { struct sock sk; struct socket socket; struct tun_struct __rcu *tun; struct fasync_struct *fasync; /* only used for fasnyc */ unsignedint flags; union {
u16 queue_index; unsignedint ifindex;
}; struct napi_struct napi; bool napi_enabled; bool napi_frags_enabled; struct mutex napi_mutex; /* Protects access to the above napi */ struct list_head next; struct tun_struct *detached; struct ptr_ring tx_ring; struct xdp_rxq_info xdp_rxq;
};
struct tun_page { struct page *page; int count;
};
/* Since the socket were moved to tun_file, to preserve the behavior of persist * device, socket filter, sndbuf and vnet header size were restore when the * file were attached to a persist device.
*/ struct tun_struct { struct tun_file __rcu *tfiles[MAX_TAP_QUEUES]; unsignedint numqueues; unsignedint flags;
kuid_t owner;
kgid_t group;
e = tun_flow_find(head, rxhash); if (likely(e)) { /* TODO: keep queueing to old queue until it's empty? */ if (READ_ONCE(e->queue_index) != queue_index)
WRITE_ONCE(e->queue_index, queue_index); if (e->updated != jiffies)
e->updated = jiffies;
sock_rps_record_flow_hash(e->rps_rxhash);
} else {
spin_lock_bh(&tun->lock); if (!tun_flow_find(head, rxhash) &&
tun->flow_count < MAX_TAP_FLOWS)
tun_flow_create(tun, head, rxhash, queue_index);
if (!timer_pending(&tun->flow_gc_timer))
mod_timer(&tun->flow_gc_timer,
round_jiffies_up(jiffies + delay));
spin_unlock_bh(&tun->lock);
}
rcu_read_unlock();
}
/* Save the hash received in the stack receive path and update the * flow_hash table accordingly.
*/ staticinlinevoid tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash)
{ if (unlikely(e->rps_rxhash != hash))
e->rps_rxhash = hash;
}
/* We try to identify a flow through its rxhash. The reason that * we do not check rxq no. is because some cards(e.g 82599), chooses * the rxq based on the txq where the last packet of the flow comes. As * the userspace application move between processors, we may get a * different rxq no. here.
*/ static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
{ struct tun_flow_entry *e;
u32 txq, numqueues;
rcu_read_lock(); if (rcu_dereference(tun->steering_prog))
ret = tun_ebpf_select_queue(tun, skb); else
ret = tun_automq_select_queue(tun, skb);
rcu_read_unlock();
rtnl_lock();
tun = rtnl_dereference(tfile->tun);
dev = tun ? tun->dev : NULL;
__tun_detach(tfile, clean); if (dev)
netdev_state_change(dev);
rtnl_unlock();
if (clean)
sock_put(&tfile->sk);
}
staticvoid tun_detach_all(struct net_device *dev)
{ struct tun_struct *tun = netdev_priv(dev); struct tun_file *tfile, *tmp; int i, n = tun->numqueues;
/* The filter is updated without holding any locks. Which is * perfectly safe. We disable it first and in the worst
* case we'll accept a few undesired packets. */
filter->count = 0;
wmb();
/* Use first set of addresses as an exact filter */ for (n = 0; n < uf.count && n < FLT_EXACT_COUNT; n++)
memcpy(filter->addr[n], addr[n].u, ETH_ALEN);
nexact = n;
/* Remaining multicast addresses are hashed,
* unicast will leave the filter disabled. */
memset(filter->mask, 0, sizeof(filter->mask)); for (; n < uf.count; n++) { if (!is_multicast_ether_addr(addr[n].u)) {
err = 0; /* no filter */ goto free_addr;
}
addr_hash_set(filter->mask, addr[n].u);
}
/* For ALLMULTI just set the mask to all ones.
* This overrides the mask populated above. */ if ((uf.flags & TUN_FLT_ALLMULTI))
memset(filter->mask, ~0, sizeof(filter->mask));
/* Now enable the filter */
wmb();
filter->count = nexact;
/* Return the number of exact filters */
err = nexact;
free_addr:
kfree(addr); return err;
}
/* Returns: 0 - drop, !=0 - accept */ staticint run_filter(struct tap_filter *filter, conststruct sk_buff *skb)
{ /* Cannot use eth_hdr(skb) here because skb_mac_hdr() is incorrect
* at this point. */ struct ethhdr *eh = (struct ethhdr *) skb->data; int i;
/* Exact match */ for (i = 0; i < filter->count; i++) if (ether_addr_equal(eh->h_dest, filter->addr[i])) return 1;
/* Inexact match (multicast only) */ if (is_multicast_ether_addr(eh->h_dest)) return addr_hash_test(filter->mask, eh->h_dest);
return 0;
}
/* * Checks whether the packet is accepted or not. * Returns: 0 - drop, !=0 - accept
*/ staticint check_filter(struct tap_filter *filter, conststruct sk_buff *skb)
{ if (!filter->count) return 1;
/* Net device start xmit */ staticvoid tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb)
{ #ifdef CONFIG_RPS if (tun->numqueues == 1 && static_branch_unlikely(&rps_needed)) { /* Select queue was not called for the skbuff, so we extract the * RPS hash and save it into the flow_table here.
*/ struct tun_flow_entry *e;
__u32 rxhash;
rxhash = __skb_get_hash_symmetric(skb);
e = tun_flow_find(&tun->flows[tun_hashfn(rxhash)], rxhash); if (e)
tun_flow_save_rps_rxhash(e, rxhash);
} #endif
}
/* Drop if the filter does not like it. * This is a noop if the filter is disabled.
* Filter can be enabled only for the TAP devices. */ if (!check_filter(&tun->txflt, skb)) {
drop_reason = SKB_DROP_REASON_TAP_TXFILTER; goto drop;
}
if (tfile->socket.sk->sk_filter &&
sk_filter_reason(tfile->socket.sk, skb, &drop_reason)) goto drop;
len = run_ebpf_filter(tun, skb, len); if (len == 0) {
drop_reason = SKB_DROP_REASON_TAP_FILTER; goto drop;
}
if (pskb_trim(skb, len)) {
drop_reason = SKB_DROP_REASON_NOMEM; goto drop;
}
if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) {
drop_reason = SKB_DROP_REASON_SKB_UCOPY_FAULT; goto drop;
}
skb_tx_timestamp(skb);
/* Orphan the skb - required as we might hang on to it * for indefinite time.
*/
skb_orphan(skb);
nf_reset_ct(skb);
if (ptr_ring_produce(&tfile->tx_ring, skb)) {
drop_reason = SKB_DROP_REASON_FULL_RING; goto drop;
}
/* dev->lltx requires to do our own update of trans_start */
queue = netdev_get_tx_queue(dev, txq);
txq_trans_cond_update(queue);
/* Notify and wake up reader process */ if (tfile->flags & TUN_FASYNC)
kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
tfile->socket.sk->sk_data_ready(tfile->socket.sk);
staticvoid tun_net_mclist(struct net_device *dev)
{ /* * This callback is supposed to deal with mc filter in * _rx_ path and has nothing to do with the _tx_ path. * In rx path we always accept everything userspace gives us.
*/
}
staticvoid __tun_xdp_flush_tfile(struct tun_file *tfile)
{ /* Notify and wake up reader process */ if (tfile->flags & TUN_FASYNC)
kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
tfile->socket.sk->sk_data_ready(tfile->socket.sk);
}
staticint tun_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, u32 flags)
{ struct tun_struct *tun = netdev_priv(dev); struct tun_file *tfile;
u32 numqueues; int nxmit = 0; int i;
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) return -EINVAL;
rcu_read_lock();
resample:
numqueues = READ_ONCE(tun->numqueues); if (!numqueues) {
rcu_read_unlock(); return -ENXIO; /* Caller will free/return all frames */
}
tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
numqueues]); if (unlikely(!tfile)) goto resample;
spin_lock(&tfile->tx_ring.producer_lock); for (i = 0; i < n; i++) { struct xdp_frame *xdp = frames[i]; /* Encode the XDP flag into lowest bit for consumer to differ * XDP buffer from sk_buff.
*/ void *frame = tun_xdp_to_ptr(xdp);
if (__ptr_ring_produce(&tfile->tx_ring, frame)) {
dev_core_stats_tx_dropped_inc(dev); break;
}
nxmit++;
}
spin_unlock(&tfile->tx_ring.producer_lock);
if (flags & XDP_XMIT_FLUSH)
__tun_xdp_flush_tfile(tfile);
if (!ptr_ring_empty(&tfile->tx_ring))
mask |= EPOLLIN | EPOLLRDNORM;
/* Make sure SOCKWQ_ASYNC_NOSPACE is set if not writable to * guarantee EPOLLOUT to be raised by either here or * tun_sock_write_space(). Then process could get notification * after it writes to a down device and meets -EIO.
*/ if (tun_sock_writeable(tun, tfile) ||
(!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
tun_sock_writeable(tun, tfile)))
mask |= EPOLLOUT | EPOLLWRNORM;
if (tun->dev->reg_state != NETREG_REGISTERED)
mask = EPOLLERR;
tun_put(tun); return mask;
}
staticstruct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile,
size_t len, conststruct iov_iter *it)
{ struct sk_buff *skb;
size_t linear; int err; int i;
if (it->nr_segs > MAX_SKB_FRAGS + 1 ||
len > (ETH_MAX_MTU - NET_SKB_PAD - NET_IP_ALIGN)) return ERR_PTR(-EMSGSIZE);
local_bh_disable();
skb = napi_get_frags(&tfile->napi);
local_bh_enable(); if (!skb) return ERR_PTR(-ENOMEM);
linear = iov_iter_single_seg_count(it);
err = __skb_grow(skb, linear); if (err) goto free;
return skb;
free: /* frees skb and all frags allocated with napi_alloc_frag() */
napi_free_frags(&tfile->napi); return ERR_PTR(err);
}
/* prepad is the amount to reserve at front. len is length after that.
* linear is a hint as to how much to copy (usually headers). */ staticstruct sk_buff *tun_alloc_skb(struct tun_file *tfile,
size_t prepad, size_t len,
size_t linear, int noblock)
{ struct sock *sk = tfile->socket.sk; struct sk_buff *skb; int err;
/* Under a page? Don't bother with paged skb. */ if (prepad + len < PAGE_SIZE)
linear = len;
if (len - linear > MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
linear = len - MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER);
skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
&err, PAGE_ALLOC_COSTLY_ORDER); if (!skb) return ERR_PTR(err);
skb_reserve(skb, prepad);
skb_put(skb, linear);
skb->data_len = len - linear;
skb->len += len - linear;
/* There's a small window that XDP may be set after the check * of xdp_prog above, this should be rare and for simplicity * we do XDP on skb in case the headroom is not enough.
*/ if (hdr->gso_type || !xdp_prog) {
*skb_xdp = 1; return __tun_build_skb(tfile, alloc_frag, buf, buflen, len,
pad, metasize);
}
if (err == XDP_REDIRECT)
xdp_do_flush(); if (err != XDP_PASS) goto out;
pad = xdp.data - xdp.data_hard_start;
len = xdp.data_end - xdp.data;
/* It is known that the xdp_buff was prepared with metadata * support, so the metasize will never be negative.
*/
metasize = xdp.data - xdp.data_meta;
}
bpf_net_ctx_clear(bpf_net_ctx);
rcu_read_unlock();
local_bh_enable();
/* Get packet from user space buffer */ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, void *msg_control, struct iov_iter *from, int noblock, bool more)
{ struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) }; struct sk_buff *skb;
size_t total_len = iov_iter_count(from);
size_t len = total_len, align = tun->align, linear; struct virtio_net_hdr_v1_hash_tunnel hdr; struct virtio_net_hdr *gso; int good_linear; int copylen; int hdr_len = 0; bool zerocopy = false; int err;
u32 rxhash = 0; int skb_xdp = 1; bool frags = tun_napi_frags_enabled(tfile); enum skb_drop_reason drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
netdev_features_t features = 0;
/* * Keep it easy and always zero the whole buffer, even if the * tunnel-related field will be touched only when the feature * is enabled and the hdr size id compatible.
*/
memset(&hdr, 0, sizeof(hdr));
gso = (struct virtio_net_hdr *)&hdr;
if (!(tun->flags & IFF_NO_PI)) { if (len < sizeof(pi)) return -EINVAL;
len -= sizeof(pi);
if (!copy_from_iter_full(&pi, sizeof(pi), from)) return -EFAULT;
}
if (tun->flags & IFF_VNET_HDR) { int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
features = tun_vnet_hdr_guest_features(vnet_hdr_sz);
hdr_len = __tun_vnet_hdr_get(vnet_hdr_sz, tun->flags,
features, from, gso); if (hdr_len < 0) return hdr_len;
/* There are 256 bytes to be copied in skb, so there is * enough room for skb expand head in case it is used. * The rest of the buffer is mapped from userspace.
*/
copylen = min(hdr_len ? hdr_len : GOODCOPY_LEN, good_linear);
linear = copylen;
iov_iter_advance(&i, copylen); if (iov_iter_npages(&i, INT_MAX) <= MAX_SKB_FRAGS)
zerocopy = true;
}
if (!frags && tun_can_build_skb(tun, tfile, len, noblock, zerocopy)) { /* For the packet that is not easy to be processed * (e.g gso or jumbo packet), we will do it at after * skb was created with generic XDP routine.
*/
skb = tun_build_skb(tun, tfile, from, gso, len, &skb_xdp);
err = PTR_ERR_OR_ZERO(skb); if (err) goto drop; if (!skb) return total_len;
} else { if (!zerocopy) {
copylen = len;
linear = min(hdr_len, good_linear);
}
if (frags) {
mutex_lock(&tfile->napi_mutex);
skb = tun_napi_alloc_frags(tfile, copylen, from); /* tun_napi_alloc_frags() enforces a layout for the skb. * If zerocopy is enabled, then this layout will be * overwritten by zerocopy_sg_from_iter().
*/
zerocopy = false;
} else { if (!linear)
linear = min_t(size_t, good_linear, copylen);
/* Compute the costly rx hash only if needed for flow updates. * We may get a very small possibility of OOO during switching, not * worth to optimize.
*/ if (!rcu_access_pointer(tun->steering_prog) && tun->numqueues > 1 &&
!tfile->detached)
rxhash = __skb_get_hash_symmetric(skb);
/* Put packet to the user space buffer */ static ssize_t tun_put_user(struct tun_struct *tun, struct tun_file *tfile, struct sk_buff *skb, struct iov_iter *iter)
{ struct tun_pi pi = { 0, skb->protocol };
ssize_t total; int vlan_offset = 0; int vlan_hlen = 0; int vnet_hdr_sz = 0; int ret;
if (skb_vlan_tag_present(skb))
vlan_hlen = VLAN_HLEN;
if (tun->flags & IFF_VNET_HDR)
vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
total = skb->len + vlan_hlen + vnet_hdr_sz;
if (!(tun->flags & IFF_NO_PI)) { if (iov_iter_count(iter) < sizeof(pi)) return -EINVAL;
total += sizeof(pi); if (iov_iter_count(iter) < total) { /* Packet will be striped */
pi.flags |= TUN_PKT_STRIP;
}
if (copy_to_iter(&pi, sizeof(pi), iter) != sizeof(pi)) return -EFAULT;
}
if (vnet_hdr_sz) { struct virtio_net_hdr_v1_hash_tunnel hdr; struct virtio_net_hdr *gso;
ret = tun_vnet_hdr_tnl_from_skb(tun->flags, tun->dev, skb,
&hdr); if (ret) return ret;
/* * Drop the packet if the configured header size is too small * WRT the enabled offloads.
*/
gso = (struct virtio_net_hdr *)&hdr;
ret = __tun_vnet_hdr_put(vnet_hdr_sz, tun->dev->features,
iter, gso); if (ret) return ret;
}
/* Trivial set of netlink ops to allow deleting tun or tap * device with netlink.
*/ staticint tun_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack)
{
NL_SET_ERR_MSG(extack, "tun/tap creation via rtnetlink is not supported."); return -EOPNOTSUPP;
}
/* The externally provided xdp_buff may have no metadata support, which * is marked by xdp->data_meta being xdp->data + 1. This will lead to a * metasize of -1 and is the reason why the condition checks for > 0.
*/
metasize = xdp->data - xdp->data_meta; if (metasize > 0)
skb_metadata_set(skb, metasize);
features = tun_vnet_hdr_guest_features(READ_ONCE(tun->vnet_hdr_sz));
tnl_hdr = (struct virtio_net_hdr_v1_hash_tunnel *)gso; if (tun_vnet_hdr_tnl_to_skb(tun->flags, features, skb, tnl_hdr)) {
atomic_long_inc(&tun->rx_frame_errors);
kfree_skb(skb);
ret = -EINVAL; goto out;
}
if (tun->flags & IFF_MULTI_QUEUE &&
(tun->numqueues + tun->numdisabled > 1)) { /* One or more queue has already been attached, no need * to initialize the device again.
*/
netdev_state_change(dev); return 0;
}
if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM;
err = security_tun_dev_create(); if (err < 0) return err;
/* Set dev type */ if (ifr->ifr_flags & IFF_TUN) { /* TUN device */
flags |= IFF_TUN;
name = "tun%d";
} elseif (ifr->ifr_flags & IFF_TAP) { /* TAP device */
flags |= IFF_TAP;
name = "tap%d";
} else return -EINVAL;
if (*ifr->ifr_name)
name = ifr->ifr_name;
dev = alloc_netdev_mqs(sizeof(struct tun_struct), name,
NET_NAME_UNKNOWN, tun_setup, queues,
queues);
err = register_netdevice(tun->dev); if (err < 0) {
free_netdev(dev); return err;
} /* free_netdev() won't check refcnt, to avoid race * with dev_put() we need publish tun after registration.
*/
rcu_assign_pointer(tfile->tun, tun);
}
if (ifr->ifr_flags & IFF_NO_CARRIER)
netif_carrier_off(tun->dev); else
netif_carrier_on(tun->dev);
/* Make sure persistent devices do not get stuck in * xoff state.
*/ if (netif_running(tun->dev))
netif_tx_wake_all_queues(tun->dev);
/* This is like a cut-down ethtool ops, except done via tun fd so no
* privs required. */ staticint set_offload(struct tun_struct *tun, unsignedlong arg)
{
netdev_features_t features = 0;
if (arg & TUN_F_CSUM) {
features |= NETIF_F_HW_CSUM;
arg &= ~TUN_F_CSUM;
if (arg & (TUN_F_TSO4|TUN_F_TSO6)) { if (arg & TUN_F_TSO_ECN) {
features |= NETIF_F_TSO_ECN;
arg &= ~TUN_F_TSO_ECN;
} if (arg & TUN_F_TSO4)
features |= NETIF_F_TSO; if (arg & TUN_F_TSO6)
features |= NETIF_F_TSO6;
arg &= ~(TUN_F_TSO4|TUN_F_TSO6);
}
arg &= ~TUN_F_UFO;
/* TODO: for now USO4 and USO6 should work simultaneously */ if (arg & TUN_F_USO4 && arg & TUN_F_USO6) {
features |= NETIF_F_GSO_UDP_L4;
arg &= ~(TUN_F_USO4 | TUN_F_USO6);
}
/* * Tunnel offload is allowed only if some plain offload is * available, too.
*/ if (features & PLAIN_GSO && arg & TUN_F_UDP_TUNNEL_GSO) {
features |= NETIF_F_GSO_UDP_TUNNEL; if (arg & TUN_F_UDP_TUNNEL_GSO_CSUM)
features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;
arg &= ~(TUN_F_UDP_TUNNEL_GSO |
TUN_F_UDP_TUNNEL_GSO_CSUM);
}
}
/* This gives the user a way to test for new features in future by
* trying to set them. */ if (arg) return -EINVAL;
if (copy_from_user(&fd, data, sizeof(fd))) return -EFAULT;
if (fd == -1) {
prog = NULL;
} else {
prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER); if (IS_ERR(prog)) return PTR_ERR(prog);
}
return __tun_set_ebpf(tun, prog_p, prog);
}
/* Return correct value for tun->dev->addr_len based on tun->dev->type. */ staticunsignedchar tun_get_addr_len(unsignedshort type)
{ switch (type) { case ARPHRD_IP6GRE: case ARPHRD_TUNNEL6: returnsizeof(struct in6_addr); case ARPHRD_IPGRE: case ARPHRD_TUNNEL: case ARPHRD_SIT: return 4; case ARPHRD_ETHER: return ETH_ALEN; case ARPHRD_IEEE802154: case ARPHRD_IEEE802154_MONITOR: return IEEE802154_EXTENDED_ADDR_LEN; case ARPHRD_PHONET_PIPE: case ARPHRD_PPP: case ARPHRD_NONE: return 0; case ARPHRD_6LOWPAN: return EUI64_ADDR_LEN; case ARPHRD_FDDI: return FDDI_K_ALEN; case ARPHRD_HIPPI: return HIPPI_ALEN; case ARPHRD_IEEE802: return FC_ALEN; case ARPHRD_ROSE: return ROSE_ADDR_LEN; case ARPHRD_NETROM: return AX25_ADDR_LEN; case ARPHRD_LOCALTLK: return LTALK_ALEN; default: return 0;
}
}
staticlong __tun_chr_ioctl(struct file *file, unsignedint cmd, unsignedlong arg, int ifreq_len)
{ struct tun_file *tfile = file->private_data; struct net *net = sock_net(&tfile->sk); struct tun_struct *tun; void __user* argp = (void __user*)arg; unsignedint carrier; struct ifreq ifr;
kuid_t owner;
kgid_t group; int ifindex; int sndbuf; int ret; bool do_notify = false;
if (cmd == TUNSETIFF || cmd == TUNSETQUEUE ||
(_IOC_TYPE(cmd) == SOCK_IOC_TYPE && cmd != SIOCGSKNS)) { if (copy_from_user(&ifr, argp, ifreq_len)) return -EFAULT;
} else {
memset(&ifr, 0, sizeof(ifr));
} if (cmd == TUNGETFEATURES) { /* Currently this just means: "what IFF flags are valid?". * This is needed because we never checked for invalid flags on * TUNSETIFF.
*/ return put_user(IFF_TUN | IFF_TAP | IFF_NO_CARRIER |
TUN_FEATURES, (unsignedint __user*)argp);
} elseif (cmd == TUNSETQUEUE) { return tun_set_queue(file, &ifr);
} elseif (cmd == SIOCGSKNS) { if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; return open_related_ns(&net->ns, get_net_ns);
}
rtnl_lock();
tun = tun_get(tfile); if (cmd == TUNSETIFF) {
ret = -EEXIST; if (tun) goto unlock;
ifr.ifr_name[IFNAMSIZ-1] = '\0';
ret = tun_set_iff(net, file, &ifr);
if (ret) goto unlock;
if (copy_to_user(argp, &ifr, ifreq_len))
ret = -EFAULT; goto unlock;
} if (cmd == TUNSETIFINDEX) {
ret = -EPERM; if (tun) goto unlock;
ret = -EFAULT; if (copy_from_user(&ifindex, argp, sizeof(ifindex))) goto unlock;
ret = -EINVAL; if (ifindex < 0) goto unlock;
ret = 0;
tfile->ifindex = ifindex; goto unlock;
}
case TUNSETOWNER: /* Set owner of the device */
owner = make_kuid(current_user_ns(), arg); if (!uid_valid(owner)) {
ret = -EINVAL; break;
}
tun->owner = owner;
do_notify = true;
netif_info(tun, drv, tun->dev, "owner set to %u\n",
from_kuid(&init_user_ns, tun->owner)); break;
case TUNSETGROUP: /* Set group of the device */
group = make_kgid(current_user_ns(), arg); if (!gid_valid(group)) {
ret = -EINVAL; break;
}
tun->group = group;
do_notify = true;
netif_info(tun, drv, tun->dev, "group set to %u\n",
from_kgid(&init_user_ns, tun->group)); break;
case TUNSETLINK: /* Only allow setting the type when the interface is down */ if (tun->dev->flags & IFF_UP) {
netif_info(tun, drv, tun->dev, "Linktype set failed because interface is up\n");
ret = -EBUSY;
} else {
ret = call_netdevice_notifiers(NETDEV_PRE_TYPE_CHANGE,
tun->dev);
ret = notifier_to_errno(ret); if (ret) {
netif_info(tun, drv, tun->dev, "Refused to change device type\n"); break;
}
tun->dev->type = (int) arg;
tun->dev->addr_len = tun_get_addr_len(tun->dev->type);
netif_info(tun, drv, tun->dev, "linktype set to %d\n",
tun->dev->type);
call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE,
tun->dev);
} break;
case TUNSETDEBUG:
tun->msg_enable = (u32)arg; break;
case TUNSETOFFLOAD:
ret = set_offload(tun, arg); break;
case TUNSETTXFILTER: /* Can be set only for TAPs */
ret = -EINVAL; if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP) break;
ret = update_filter(&tun->txflt, (void __user *)arg); break;
case SIOCGIFHWADDR: /* Get hw address */
netif_get_mac_address(&ifr.ifr_hwaddr, net, tun->dev->name); if (copy_to_user(argp, &ifr, ifreq_len))
ret = -EFAULT; break;
case SIOCSIFHWADDR: /* Set hw address */ if (tun->dev->addr_len > sizeof(ifr.ifr_hwaddr)) {
ret = -EINVAL; break;
}
ret = dev_set_mac_address_user(tun->dev,
(struct sockaddr_storage *)&ifr.ifr_hwaddr,
NULL); break;
case TUNGETSNDBUF:
sndbuf = tfile->socket.sk->sk_sndbuf; if (copy_to_user(argp, &sndbuf, sizeof(sndbuf)))
ret = -EFAULT; break;
case TUNSETSNDBUF: if (copy_from_user(&sndbuf, argp, sizeof(sndbuf))) {
ret = -EFAULT; break;
} if (sndbuf <= 0) {
ret = -EINVAL; break;
}
tun->sndbuf = sndbuf;
tun_set_sndbuf(tun); break;
case TUNATTACHFILTER: /* Can be set only for TAPs */
ret = -EINVAL; if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP) break;
ret = -EFAULT; if (copy_from_user(&tun->fprog, argp, sizeof(tun->fprog))) break;
ret = tun_attach_filter(tun); break;
case TUNDETACHFILTER: /* Can be set only for TAPs */
ret = -EINVAL; if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP) break;
ret = 0;
tun_detach_filter(tun, tun->numqueues); break;
case TUNGETFILTER:
ret = -EINVAL; if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP) break;
ret = -EFAULT; if (copy_to_user(argp, &tun->fprog, sizeof(tun->fprog))) break;
ret = 0; break;
case TUNSETSTEERINGEBPF:
ret = tun_set_ebpf(tun, &tun->steering_prog, argp); break;
case TUNSETFILTEREBPF:
ret = tun_set_ebpf(tun, &tun->filter_prog, argp); break;
case TUNSETCARRIER:
ret = -EFAULT; if (copy_from_user(&carrier, argp, sizeof(carrier))) goto unlock;
ret = tun_net_change_carrier(tun->dev, (bool)carrier); break;
case TUNGETDEVNETNS:
ret = -EPERM; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) goto unlock;
ret = open_related_ns(&net->ns, get_net_ns); break;
default:
ret = tun_vnet_ioctl(&tun->vnet_hdr_sz, &tun->flags, cmd, argp); break;
}
if (do_notify)
netdev_state_change(tun->dev);
unlock:
rtnl_unlock(); if (tun)
tun_put(tun); return ret;
}
#ifdef CONFIG_COMPAT staticlong tun_chr_compat_ioctl(struct file *file, unsignedint cmd, unsignedlong arg)
{ switch (cmd) { case TUNSETIFF: case TUNGETIFF: case TUNSETTXFILTER: case TUNGETSNDBUF: case TUNSETSNDBUF: case SIOCGIFHWADDR: case SIOCSIFHWADDR:
arg = (unsignedlong)compat_ptr(arg); break; default:
arg = (compat_ulong_t)arg; break;
}
/* * compat_ifreq is shorter than ifreq, so we must not access beyond * the end of that structure. All fields that are used in this * driver are compatible though, we don't need to convert the * contents.
*/ return __tun_chr_ioctl(file, cmd, arg, sizeof(struct compat_ifreq));
} #endif/* CONFIG_COMPAT */
staticint tun_chr_fasync(int fd, struct file *file, int on)
{ struct tun_file *tfile = file->private_data; int ret;
if (on) {
ret = file_f_owner_allocate(file); if (ret) goto out;
}
if ((ret = fasync_helper(fd, file, on, &tfile->fasync)) < 0) goto out;
if (dev->rtnl_link_ops != &tun_link_ops) return NOTIFY_DONE;
switch (event) { case NETDEV_CHANGE_TX_QUEUE_LEN: if (tun_queue_resize(tun)) return NOTIFY_BAD; break; case NETDEV_UP: for (i = 0; i < tun->numqueues; i++) { struct tun_file *tfile;
/* Get an underlying socket object from tun file. Returns error unless file is * attached to a device. The returned object works like a packet socket, it * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for
* holding a reference to the file for as long as the socket is in use. */ struct socket *tun_get_socket(struct file *file)
{ struct tun_file *tfile; if (file->f_op != &tun_fops) return ERR_PTR(-EINVAL);
tfile = file->private_data; if (!tfile) return ERR_PTR(-EBADFD); return &tfile->socket;
}
EXPORT_SYMBOL_GPL(tun_get_socket);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.