// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2017 - 2019 Cambridge Greys Limited * Copyright (C) 2011 - 2014 Cisco Systems Inc * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and * James Leu (jleu@mindspring.net). * Copyright (C) 2001 by various other people who didn't put their name here.
*/
/* * Adapted from network devices with the following major changes: * All transports are static - simplifies the code significantly * Multiple FDs/IRQs per device * Vector IO optionally used for read/write, falling back to legacy * based on configuration and/or availability * Configuration is no longer positional - L2TPv3 and GRE require up to * 10 parameters, passing this as positional is not fit for purpose. * Only socket transports are supported
*/
/* A mini-buffer for packet drop read * All of our supported transports are datagram oriented and we always * read using recvmsg or recvmmsg. If we pass a buffer which is smaller * than the packet size it still counts as full packet read and will * clean the incoming stream to keep sigio/epoll happy
*/
#define DROP_BUFFER_SIZE 32
staticchar *drop_buffer;
/* * Advance the mmsg queue head by n = advance. Resets the queue to * maximum enqueue/dequeue-at-once capacity if possible. Called by * dequeuers. Caller must hold the head_lock!
*/
/* * Generic vector dequeue via sendmmsg with support for forming headers * using transport specific callback. Allows GRE, L2TPv3, RAW and * other transports to use a common dequeue procedure in vector mode
*/
staticint vector_send(struct vector_queue *qi)
{ struct vector_private *vp = netdev_priv(qi->dev); struct mmsghdr *send_from; int result = 0, send_len;
if (spin_trylock(&qi->head_lock)) { /* update queue_depth to current value */ while (atomic_read(&qi->queue_depth) > 0) { /* Calculate the start of the vector */
send_len = atomic_read(&qi->queue_depth);
send_from = qi->mmsg_vector;
send_from += qi->head; /* Adjust vector size if wraparound */ if (send_len + qi->head > qi->max_depth)
send_len = qi->max_depth - qi->head; /* Try to TX as many packets as possible */ if (send_len > 0) {
result = uml_vector_sendmmsg(
vp->fds->tx_fd,
send_from,
send_len,
0
);
vp->in_write_poll =
(result != send_len);
} /* For some of the sendmmsg error scenarios * we may end being unsure in the TX success * for all packets. It is safer to declare * them all TX-ed and blame the network.
*/ if (result < 0) { if (net_ratelimit())
netdev_err(vp->dev, "sendmmsg err=%i\n",
result);
vp->in_error = true;
result = send_len;
} if (result > 0) {
consume_vector_skbs(qi, result); /* This is equivalent to an TX IRQ. * Restart the upper layers to feed us * more packets.
*/ if (result > vp->estats.tx_queue_max)
vp->estats.tx_queue_max = result;
vp->estats.tx_queue_running_average =
(vp->estats.tx_queue_running_average + result) >> 1;
}
netif_wake_queue(qi->dev); /* if TX is busy, break out of the send loop, * poll write IRQ will reschedule xmit for us.
*/ if (result != send_len) {
vp->estats.tx_restart_queue++; break;
}
}
spin_unlock(&qi->head_lock);
} return atomic_read(&qi->queue_depth);
}
/* Queue destructor. Deliberately stateless so we can use * it in queue cleanup if initialization fails.
*/
if (qi == NULL) return; /* deallocate any skbuffs - we rely on any unused to be * set to NULL.
*/ if (qi->skbuff_vector != NULL) { for (i = 0; i < qi->max_depth; i++) { if (*(qi->skbuff_vector + i) != NULL)
dev_kfree_skb_any(*(qi->skbuff_vector + i));
}
kfree(qi->skbuff_vector);
} /* deallocate matching IOV structures including header buffs */ if (qi->mmsg_vector != NULL) {
mmsg_vector = qi->mmsg_vector; for (i = 0; i < qi->max_depth; i++) {
iov = mmsg_vector->msg_hdr.msg_iov; if (iov != NULL) { if ((vp->header_size > 0) &&
(iov->iov_base != NULL))
kfree(iov->iov_base);
kfree(iov);
}
mmsg_vector++;
}
kfree(qi->mmsg_vector);
}
kfree(qi);
}
/* * Queue constructor. Create a queue with a given side.
*/ staticstruct vector_queue *create_queue( struct vector_private *vp, int max_size, int header_size, int num_extra_frags)
{ struct vector_queue *result; int i; struct iovec *iov; struct mmsghdr *mmsg_vector;
/* further failures can be handled safely by destroy_queue*/
mmsg_vector = result->mmsg_vector; for (i = 0; i < max_size; i++) { /* Clear all pointers - we use non-NULL as marking on * what to free on destruction
*/
*(result->skbuff_vector + i) = NULL;
mmsg_vector->msg_hdr.msg_iov = NULL;
mmsg_vector++;
}
mmsg_vector = result->mmsg_vector;
result->max_iov_frags = num_extra_frags; for (i = 0; i < max_size; i++) { if (vp->header_size > 0)
iov = kmalloc_array(3 + num_extra_frags, sizeof(struct iovec),
GFP_KERNEL
); else
iov = kmalloc_array(2 + num_extra_frags, sizeof(struct iovec),
GFP_KERNEL
); if (iov == NULL) goto out_fail;
mmsg_vector->msg_hdr.msg_iov = iov;
mmsg_vector->msg_hdr.msg_iovlen = 1;
mmsg_vector->msg_hdr.msg_control = NULL;
mmsg_vector->msg_hdr.msg_controllen = 0;
mmsg_vector->msg_hdr.msg_flags = MSG_DONTWAIT;
mmsg_vector->msg_hdr.msg_name = NULL;
mmsg_vector->msg_hdr.msg_namelen = 0; if (vp->header_size > 0) {
iov->iov_base = kmalloc(header_size, GFP_KERNEL); if (iov->iov_base == NULL) goto out_fail;
iov->iov_len = header_size;
mmsg_vector->msg_hdr.msg_iovlen = 2;
iov++;
}
iov->iov_base = NULL;
iov->iov_len = 0;
mmsg_vector++;
}
spin_lock_init(&result->head_lock);
spin_lock_init(&result->tail_lock);
atomic_set(&result->queue_depth, 0);
result->head = 0;
result->tail = 0; return result;
out_skb_fail:
kfree(result->mmsg_vector);
out_mmsg_fail:
kfree(result); return NULL;
out_fail:
destroy_queue(result); return NULL;
}
/* * We do not use the RX queue as a proper wraparound queue for now * This is not necessary because the consumption via napi_gro_receive() * happens in-line. While we can try using the return code of * netif_rx() for flow control there are no drivers doing this today. * For this RX specific use we ignore the tail/head locks and * just read into a prepared queue filled with skbuffs.
*/
staticstruct sk_buff *prep_skb( struct vector_private *vp, struct user_msghdr *msg)
{ int linear = vp->max_packet + vp->headroom + SAFETY_MARGIN; struct sk_buff *result; int iov_index = 0, len; struct iovec *iov = msg->msg_iov; int err, nr_frags, frag;
skb_frag_t *skb_frag;
if (vp->req_size <= linear)
len = linear; else
len = vp->req_size;
result = alloc_skb_with_frags(
linear,
len - vp->max_packet,
3,
&err,
GFP_ATOMIC
); if (vp->header_size > 0)
iov_index++; if (result == NULL) {
iov[iov_index].iov_base = NULL;
iov[iov_index].iov_len = 0; goto done;
}
skb_reserve(result, vp->headroom);
result->dev = vp->dev;
skb_put(result, vp->max_packet);
result->data_len = len - vp->max_packet;
result->len += len - vp->max_packet;
skb_reset_mac_header(result);
result->ip_summed = CHECKSUM_NONE;
iov[iov_index].iov_base = result->data;
iov[iov_index].iov_len = vp->max_packet;
iov_index++;
/* RX is always emptied 100% during each cycle, so we do not * have to do the tail wraparound math for it.
*/
qi->head = qi->tail = 0;
for (i = 0; i < queue_depth; i++) { /* it is OK if allocation fails - recvmmsg with NULL data in * iov argument still performs an RX, just drops the packet * This allows us stop faffing around with a "drop buffer"
*/
/* Bog standard recv using recvmsg - not used normally unless the user * explicitly specifies not to use recvmmsg vector RX.
*/
staticint vector_legacy_rx(struct vector_private *vp)
{ int pkt_len; struct user_msghdr hdr; struct iovec iov[2 + MAX_IOV_SIZE]; /* header + data use case only */ int iovpos = 0; struct sk_buff *skb; int header_check;
if (skb == NULL) { /* Read a packet into drop_buffer and don't do * anything with it.
*/
iov[iovpos].iov_base = drop_buffer;
iov[iovpos].iov_len = DROP_BUFFER_SIZE;
hdr.msg_iovlen = 1;
vp->dev->stats.rx_dropped++;
}
/* We treat packet processing as enqueue, buffer refresh as dequeue * The queue_depth tells us how many buffers have been used and how * many do we need to prep the next time prep_queue_for_rx() is called.
*/
atomic_add(packet_count, &qi->queue_depth);
for (i = 0; i < packet_count; i++) {
skb = (*skbuff_vector); if (mmsg_vector->msg_len > vp->header_size) { if (vp->header_size > 0) {
header_check = vp->verify_header(
mmsg_vector->msg_hdr.msg_iov->iov_base,
skb,
vp
); if (header_check < 0) { /* Overlay header failed to verify - discard. * We can actually keep this skb and reuse it, * but that will make the prep logic too * complex.
*/
dev_kfree_skb_irq(skb);
vp->estats.rx_encaps_errors++; continue;
} if (header_check > 0) {
vp->estats.rx_csum_offload_good++;
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
}
pskb_trim(skb,
mmsg_vector->msg_len - vp->rx_header_size);
skb->protocol = eth_type_trans(skb, skb->dev); /* * We do not need to lock on updating stats here * The interrupt loop is non-reentrant.
*/
vp->dev->stats.rx_bytes += skb->len;
vp->dev->stats.rx_packets++;
napi_gro_receive(&vp->napi, skb);
} else { /* Overlay header too short to do anything - discard. * We can actually keep this skb and reuse it, * but that will make the prep logic too complex.
*/ if (skb != NULL)
dev_kfree_skb_irq(skb);
}
(*skbuff_vector) = NULL; /* Move to the next buffer element */
mmsg_vector++;
skbuff_vector++;
} if (packet_count > 0) { if (vp->estats.rx_queue_max < packet_count)
vp->estats.rx_queue_max = packet_count;
vp->estats.rx_queue_running_average =
(vp->estats.rx_queue_running_average + packet_count) >> 1;
} return packet_count;
}
if (!netif_running(dev)) return IRQ_NONE; /* We need to pay attention to it only if we got * -EAGAIN or -ENOBUFFS from sendmmsg. Otherwise * we ignore it. In the future, it may be worth * it to improve the IRQ controller a bit to make * tweaking the IRQ mask less costly
*/
/* WRITE IRQ - we need it only if we have vector TX */ if ((vp->options & VECTOR_TX) > 0) {
err = um_request_irq(
irq_rr + VECTOR_BASE_IRQ, vp->fds->tx_fd,
IRQ_WRITE, vector_tx_interrupt,
IRQF_SHARED, dev->name, dev); if (err < 0) {
netdev_err(dev, "vector_open: failed to get tx irq(%d)\n", err);
err = -ENETUNREACH; goto out_close;
}
vp->tx_irq = irq_rr + VECTOR_BASE_IRQ;
irq_rr = (irq_rr + 1) % VECTOR_IRQ_SPACE;
}
if ((vp->options & VECTOR_QDISC_BYPASS) != 0) { if (!uml_raw_enable_qdisc_bypass(vp->fds->rx_fd))
vp->options |= VECTOR_BPF;
} if (((vp->options & VECTOR_BPF) != 0) && (vp->bpf == NULL))
vp->bpf = uml_vector_default_bpf(dev->dev_addr);
if (vp->bpf != NULL)
uml_vector_attach_bpf(vp->fds->rx_fd, vp->bpf);
netif_start_queue(dev);
vector_reset_stats(vp);
/* clear buffer - it can happen that the host side of the interface * is full when we get here. In this case, new data is never queued, * SIGIOs never arrive, and the net never works.
*/
staticint vector_set_features(struct net_device *dev,
netdev_features_t features)
{ struct vector_private *vp = netdev_priv(dev); /* Adjust buffer sizes for GSO/GRO. Unfortunately, there is * no way to negotiate it on raw sockets, so we can change * only our side.
*/ if (features & NETIF_F_GRO) /* All new frame buffers will be GRO-sized */
vp->req_size = 65536; else /* All new frame buffers will be normal sized */
vp->req_size = vp->max_packet + vp->headroom + SAFETY_MARGIN; return 0;
}
/* Stats are modified in the dequeue portions of * rx/tx which are protected by the head locks * grabbing these locks here ensures they are up * to date.
*/
if (!mac_pton(str, addr)) {
netdev_err(dev, "Failed to parse '%s' as an ethernet address\n", str); goto random;
} if (is_multicast_ether_addr(addr)) {
netdev_err(dev, "Attempt to assign a multicast ethernet address to a device disallowed\n"); goto random;
} if (!is_valid_ether_addr(addr)) {
netdev_err(dev, "Attempt to assign an invalid ethernet address to a device disallowed\n"); goto random;
} if (!is_local_ether_addr(addr)) {
netdev_warn(dev, "Warning: Assigning a globally valid ethernet address to a device\n");
netdev_warn(dev, "You should set the 2nd rightmost bit in the first byte of the MAC,\n");
netdev_warn(dev, "i.e. %02x:%02x:%02x:%02x:%02x:%02x\n",
addr[0] | 0x02, addr[1], addr[2], addr[3], addr[4], addr[5]);
}
eth_hw_addr_set(dev, addr); return;
random:
netdev_info(dev, "Choosing a random ethernet address\n");
eth_hw_addr_random(dev);
}
staticvoid vector_eth_configure( int n, struct arglist *def
)
{ struct vector_device *device; struct net_device *dev; struct vector_private *vp; int err;
device = kzalloc(sizeof(*device), GFP_KERNEL); if (device == NULL) {
pr_err("Failed to allocate struct vector_device for vec%d\n", n); return;
}
dev = alloc_etherdev(sizeof(struct vector_private)); if (dev == NULL) {
pr_err("Failed to allocate struct net_device for vec%d\n", n); goto out_free_device;
}
dev->mtu = get_mtu(def);
INIT_LIST_HEAD(&device->list);
device->unit = n;
/* If this name ends up conflicting with an existing registered * netdevice, that is OK, register_netdev{,ice}() will notice this * and fail.
*/
snprintf(dev->name, sizeof(dev->name), "vec%d", n);
vector_setup_etheraddr(dev, uml_vector_fetch_arg(def, "mac"));
vp = netdev_priv(dev);
INIT_LIST_HEAD(&vp->list);
vp->dev = dev;
vp->unit = n;
vp->options = get_transport_options(def);
vp->parsed = def;
vp->max_packet = get_mtu(def) + ETH_HEADER_OTHER; /* * TODO - we need to calculate headroom so that ip header * is 16 byte aligned all the time
*/
vp->headroom = get_headroom(def);
vp->coalesce = 2;
vp->req_size = get_req_size(def);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.