/* * Back-end of the driver for virtual network devices. This portion of the * driver exports a 'unified' network-device interface that can be accessed * by any operating system that implements a compatible front end. A * reference front-end implementation can be found in: * drivers/net/xen-netfront.c * * Copyright (c) 2002-2005, K A Fraser * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version 2 * as published by the Free Software Foundation; or, when distributed * separately from the Linux kernel or incorporated into other * software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE.
*/
/* Provide an option to disable split event channels at load time as * event channels are limited resource. Split event channels are * enabled by default.
*/ bool separate_tx_rx_irq = true;
module_param(separate_tx_rx_irq, bool, 0644);
/* The time that packets can stay on the guest Rx internal queue * before they are dropped.
*/ unsignedint rx_drain_timeout_msecs = 10000;
module_param(rx_drain_timeout_msecs, uint, 0444);
/* The length of time before the frontend is considered unresponsive * because it isn't providing Rx slots.
*/ unsignedint rx_stall_timeout_msecs = 60000;
module_param(rx_stall_timeout_msecs, uint, 0444);
#define MAX_QUEUES_DEFAULT 8 unsignedint xenvif_max_queues;
module_param_named(max_queues, xenvif_max_queues, uint, 0644);
MODULE_PARM_DESC(max_queues, "Maximum number of queues per virtual interface");
/* * This is the maximum slots a skb can have. If a guest sends a skb * which exceeds this limit it is considered malicious.
*/ #define FATAL_SKB_SLOTS_DEFAULT 20 staticunsignedint fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT;
module_param(fatal_skb_slots, uint, 0444);
/* The amount to copy out of the first guest Tx slot into the skb's * linear area. If the first slot has more data, it will be mapped * and put into the first frag. * * This is sized to avoid pulling headers from the frags for most * TCP/IP packets.
*/ #define XEN_NETBACK_TX_COPY_LEN 128
/* This is the maximum number of flows in the hash cache. */ #define XENVIF_HASH_CACHE_SIZE_DEFAULT 64 unsignedint xenvif_hash_cache_size = XENVIF_HASH_CACHE_SIZE_DEFAULT;
module_param_named(hash_cache_size, xenvif_hash_cache_size, uint, 0644);
MODULE_PARM_DESC(hash_cache_size, "Number of flows in the hash cache");
/* The module parameter tells that we have to put data * for xen-netfront with the XDP_PACKET_HEADROOM offset * needed for XDP processing
*/ bool provides_xdp_headroom = true;
module_param(provides_xdp_headroom, bool, 0644);
/* * Allow a burst big enough to transmit a jumbo packet of up to 128kB. * Otherwise the interface can seize up due to insufficient credit.
*/
max_burst = max(131072UL, queue->credit_bytes);
/* Take care that adding a new chunk of credit doesn't wrap to zero. */
max_credit = queue->remaining_credit + queue->credit_bytes; if (max_credit < queue->remaining_credit)
max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
do {
make_tx_response(queue, txp, extra_count, XEN_NETIF_RSP_ERROR); if (cons == end) break;
RING_COPY_REQUEST(&queue->tx, cons++, txp);
extra_count = 0; /* only the first frag can have extras */
} while (1);
queue->tx.req_cons = cons;
}
staticvoid xenvif_fatal_tx_err(struct xenvif *vif)
{
netdev_err(vif->dev, "fatal error; disabling device\n");
vif->disabled = true; /* Disable the vif from queue 0's kthread */ if (vif->num_queues)
xenvif_kick_thread(&vif->queues[0]);
}
staticint xenvif_count_requests(struct xenvif_queue *queue, struct xen_netif_tx_request *first, unsignedint extra_count, struct xen_netif_tx_request *txp, int work_to_do)
{
RING_IDX cons = queue->tx.req_cons; int slots = 0; int drop_err = 0; int more_data;
if (!(first->flags & XEN_NETTXF_more_data)) return 0;
do { struct xen_netif_tx_request dropped_tx = { 0 };
if (slots >= work_to_do) {
netdev_err(queue->vif->dev, "Asked for %d slots but exceeds this limit\n",
work_to_do);
xenvif_fatal_tx_err(queue->vif); return -ENODATA;
}
/* This guest is really using too many slots and * considered malicious.
*/ if (unlikely(slots >= fatal_skb_slots)) {
netdev_err(queue->vif->dev, "Malicious frontend using %d slots, threshold %u\n",
slots, fatal_skb_slots);
xenvif_fatal_tx_err(queue->vif); return -E2BIG;
}
/* Xen network protocol had implicit dependency on * MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to * the historical MAX_SKB_FRAGS value 18 to honor the * same behavior as before. Any packet using more than * 18 slots but less than fatal_skb_slots slots is * dropped
*/ if (!drop_err && slots >= XEN_NETBK_LEGACY_SLOTS_MAX) { if (net_ratelimit())
netdev_dbg(queue->vif->dev, "Too many slots (%d) exceeding limit (%d), dropping packet\n",
slots, XEN_NETBK_LEGACY_SLOTS_MAX);
drop_err = -E2BIG;
}
if (drop_err)
txp = &dropped_tx;
RING_COPY_REQUEST(&queue->tx, cons + slots, txp);
/* If the guest submitted a frame >= 64 KiB then * first->size overflowed and following slots will * appear to be larger than the frame. * * This cannot be fatal error as there are buggy * frontends that do this. * * Consume all slots and drop the packet.
*/ if (!drop_err && txp->size > first->size) { if (net_ratelimit())
netdev_dbg(queue->vif->dev, "Invalid tx request, slot size %u > remaining size %u\n",
txp->size, first->size);
drop_err = -EIO;
}
if (txp == first)
txp = txfrags; else
txp++;
queue->pending_cons++;
nr_slots--;
} else { /* The copy op partially covered the tx_request. * The remainder will be mapped or copied in the next * iteration.
*/
txp->offset += amount;
txp->size -= amount;
}
}
if (nskb) { /* A frag_list skb was allocated but it is no longer needed * because enough slots were converted to copy ops above or some * were empty.
*/
kfree_skb(nskb);
}
staticint xenvif_tx_check_gop(struct xenvif_queue *queue, struct sk_buff *skb, struct gnttab_map_grant_ref **gopp_map, struct gnttab_copy **gopp_copy)
{ struct gnttab_map_grant_ref *gop_map = *gopp_map;
u16 pending_idx; /* This always points to the shinfo of the skb being checked, which * could be either the first or the one on the frag_list
*/ struct skb_shared_info *shinfo = skb_shinfo(skb); /* If this is non-NULL, we are currently checking the frag_list skb, and * this points to the shinfo of the first one
*/ struct skb_shared_info *first_shinfo = NULL; int nr_frags = shinfo->nr_frags; constbool sharedslot = nr_frags &&
frag_get_pending_idx(&shinfo->frags[0]) ==
copy_pending_idx(skb, copy_count(skb) - 1); int i, err = 0;
for (i = 0; i < copy_count(skb); i++) { int newerr;
/* Check status of header. */
pending_idx = copy_pending_idx(skb, i);
newerr = (*gopp_copy)->status;
/* Split copies need to be handled together. */ if (XENVIF_TX_CB(skb)->split_mask & (1U << i)) {
(*gopp_copy)++; if (!newerr)
newerr = (*gopp_copy)->status;
} if (likely(!newerr)) { /* The first frag might still have this slot mapped */ if (i < copy_count(skb) - 1 || !sharedslot)
xenvif_idx_release(queue, pending_idx,
XEN_NETIF_RSP_OKAY);
} else {
err = newerr; if (net_ratelimit())
netdev_dbg(queue->vif->dev, "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n",
(*gopp_copy)->status,
pending_idx,
(*gopp_copy)->source.u.ref); /* The first frag might still have this slot mapped */ if (i < copy_count(skb) - 1 || !sharedslot)
xenvif_idx_release(queue, pending_idx,
XEN_NETIF_RSP_ERROR);
}
(*gopp_copy)++;
}
check_frags: for (i = 0; i < nr_frags; i++, gop_map++) { int j, newerr;
/* Check error status: if okay then remember grant handle. */
newerr = gop_map->status;
if (likely(!newerr)) {
xenvif_grant_handle_set(queue,
pending_idx,
gop_map->handle); /* Had a previous error? Invalidate this fragment. */ if (unlikely(err)) {
xenvif_idx_unmap(queue, pending_idx); /* If the mapping of the first frag was OK, but * the header's copy failed, and they are * sharing a slot, send an error
*/ if (i == 0 && !first_shinfo && sharedslot)
xenvif_idx_release(queue, pending_idx,
XEN_NETIF_RSP_ERROR); else
xenvif_idx_release(queue, pending_idx,
XEN_NETIF_RSP_OKAY);
} continue;
}
/* Error on this fragment: respond to client with an error. */ if (net_ratelimit())
netdev_dbg(queue->vif->dev, "Grant map of %d. frag failed! status: %d pending_idx: %u ref: %u\n",
i,
gop_map->status,
pending_idx,
gop_map->ref);
/* Not the first error? Preceding frags already invalidated. */ if (err) continue;
/* Invalidate preceding fragments of this skb. */ for (j = 0; j < i; j++) {
pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
xenvif_idx_unmap(queue, pending_idx);
xenvif_idx_release(queue, pending_idx,
XEN_NETIF_RSP_OKAY);
}
/* And if we found the error while checking the frag_list, unmap * the first skb's frags
*/ if (first_shinfo) { for (j = 0; j < first_shinfo->nr_frags; j++) {
pending_idx = frag_get_pending_idx(&first_shinfo->frags[j]);
xenvif_idx_unmap(queue, pending_idx);
xenvif_idx_release(queue, pending_idx,
XEN_NETIF_RSP_OKAY);
}
}
/* Remember the error: invalidate all subsequent fragments. */
err = newerr;
}
for (i = 0; i < nr_frags; i++) {
skb_frag_t *frag = shinfo->frags + i; struct xen_netif_tx_request *txp; struct page *page;
u16 pending_idx;
pending_idx = frag_get_pending_idx(frag);
/* If this is not the first frag, chain it to the previous*/ if (prev_pending_idx == INVALID_PENDING_IDX)
skb_shinfo(skb)->destructor_arg =
&callback_param(queue, pending_idx); else
callback_param(queue, prev_pending_idx).ctx =
&callback_param(queue, pending_idx);
/* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy * peers can fail to set NETRXF_csum_blank when sending a GSO * frame. In this case force the SKB to CHECKSUM_PARTIAL and * recalculate the partial checksum.
*/ if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
queue->stats.rx_gso_checksum_fixup++;
skb->ip_summed = CHECKSUM_PARTIAL;
recalculate_partial_csum = true;
}
/* A non-CHECKSUM_PARTIAL SKB does not require setup. */ if (skb->ip_summed != CHECKSUM_PARTIAL) return 0;
/* Timer could already be pending in rare cases. */ if (timer_pending(&queue->credit_timeout)) {
queue->rate_limited = true; returntrue;
}
/* Passed the point where we can replenish credit? */ if (time_after_eq64(now, next_credit)) {
queue->credit_window_start = now;
tx_add_credit(queue);
}
/* Still too big to send right now? Set a callback. */ if (size > queue->remaining_credit) {
mod_timer(&queue->credit_timeout,
next_credit);
queue->credit_window_start = next_credit;
queue->rate_limited = true;
returntrue;
}
returnfalse;
}
/* No locking is required in xenvif_mcast_add/del() as they are * only ever invoked from NAPI poll. An RCU list is used because * xenvif_mcast_match() is called asynchronously, during start_xmit.
*/
void xenvif_mcast_addr_list_free(struct xenvif *vif)
{ /* No need for locking or RCU here. NAPI poll and TX queue * are stopped.
*/ while (!list_empty(&vif->fe_mcast_addr)) { struct xenvif_mcast_addr *mcast;
skb = xenvif_alloc_skb(data_len); if (unlikely(skb == NULL)) {
netdev_dbg(queue->vif->dev, "Can't allocate a skb in start_xmit.\n");
xenvif_tx_err(queue, &txreq, extra_count, idx); break;
}
skb_shinfo(skb)->nr_frags = ret; /* At this point shinfo->nr_frags is in fact the number of * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX.
*/
frag_overflow = 0;
nskb = NULL; if (skb_shinfo(skb)->nr_frags > MAX_SKB_FRAGS) {
frag_overflow = skb_shinfo(skb)->nr_frags - MAX_SKB_FRAGS;
BUG_ON(frag_overflow > MAX_SKB_FRAGS);
skb_shinfo(skb)->nr_frags = MAX_SKB_FRAGS;
nskb = xenvif_alloc_skb(0); if (unlikely(nskb == NULL)) {
skb_shinfo(skb)->nr_frags = 0;
kfree_skb(skb);
xenvif_tx_err(queue, &txreq, extra_count, idx); if (net_ratelimit())
netdev_err(queue->vif->dev, "Can't allocate the frag_list skb.\n"); break;
}
}
/* Consolidate skb with a frag_list into a brand new one with local pages on * frags. Returns 0 or -ENOMEM if can't allocate new pages.
*/ staticint xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *skb)
{ unsignedint offset = skb_headlen(skb);
skb_frag_t frags[MAX_SKB_FRAGS]; int i, f; struct ubuf_info *uarg; struct sk_buff *nskb = skb_shinfo(skb)->frag_list;
/* Release all the original (foreign) frags. */ for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
skb_frag_unref(skb, f);
uarg = skb_shinfo(skb)->destructor_arg; /* increase inflight counter to offset decrement in callback */
atomic_inc(&queue->inflight_packets);
uarg->ops->complete(NULL, uarg, true);
skb_shinfo(skb)->destructor_arg = NULL;
/* Fill the skb with the new (local) frags. */
memcpy(skb_shinfo(skb)->frags, frags, i * sizeof(skb_frag_t));
skb_shinfo(skb)->nr_frags = i;
skb->truesize += i * PAGE_SIZE;
/* Check the remap error code. */ if (unlikely(xenvif_tx_check_gop(queue, skb, &gop_map, &gop_copy))) { /* If there was an error, xenvif_tx_check_gop is * expected to release all the frags which were mapped, * so kfree_skb shouldn't do it again
*/
skb_shinfo(skb)->nr_frags = 0; if (skb_has_frag_list(skb)) { struct sk_buff *nskb =
skb_shinfo(skb)->frag_list;
skb_shinfo(nskb)->nr_frags = 0;
}
kfree_skb(skb); continue;
}
if (unlikely(skb_has_frag_list(skb))) { struct sk_buff *nskb = skb_shinfo(skb)->frag_list;
xenvif_skb_zerocopy_prepare(queue, nskb); if (xenvif_handle_frag_list(queue, skb)) { if (net_ratelimit())
netdev_err(queue->vif->dev, "Not enough memory to consolidate frag_list!\n");
xenvif_skb_zerocopy_prepare(queue, skb);
kfree_skb(skb); continue;
} /* Copied all the bits from the frag list -- free it. */
skb_frag_list_init(skb);
kfree_skb(nskb);
}
if (checksum_setup(queue, skb)) {
netdev_dbg(queue->vif->dev, "Can't setup checksum in net_tx_action\n"); /* We have to set this flag to trigger the callback */ if (skb_shinfo(skb)->destructor_arg)
xenvif_skb_zerocopy_prepare(queue, skb);
kfree_skb(skb); continue;
}
skb_probe_transport_header(skb);
/* If the packet is GSO then we will have just set up the * transport header offset in checksum_setup so it's now * straightforward to calculate gso_segs.
*/ if (skb_is_gso(skb)) { int mss, hdrlen;
/* GSO implies having the L4 header. */
WARN_ON_ONCE(!skb_transport_header_was_set(skb)); if (unlikely(!skb_transport_header_was_set(skb))) {
kfree_skb(skb); continue;
}
/* Set this flag right before netif_receive_skb, otherwise * someone might think this packet already left netback, and * do a skb_copy_ubufs while we are still in control of the * skb. E.g. the __pskb_pull_tail earlier can do such thing.
*/ if (skb_shinfo(skb)->destructor_arg) {
xenvif_skb_zerocopy_prepare(queue, skb);
queue->stats.tx_zerocopy_sent++;
}
/* This is the only place where we grab this lock, to protect callbacks * from each other.
*/
spin_lock_irqsave(&queue->callback_lock, flags); do {
u16 pending_idx = ubuf->desc;
ubuf = (struct ubuf_info_msgzc *) ubuf->ctx;
BUG_ON(queue->dealloc_prod - queue->dealloc_cons >=
MAX_PENDING_REQS);
index = pending_index(queue->dealloc_prod);
queue->dealloc_ring[index] = pending_idx; /* Sync with xenvif_tx_dealloc_action: * insert idx then incr producer.
*/
smp_wmb();
queue->dealloc_prod++;
} while (ubuf);
spin_unlock_irqrestore(&queue->callback_lock, flags);
if (likely(zerocopy_success))
queue->stats.tx_zerocopy_success++; else
queue->stats.tx_zerocopy_fail++;
xenvif_skb_zerocopy_complete(queue);
}
if (gop - queue->tx_unmap_ops > 0) { int ret;
ret = gnttab_unmap_refs(queue->tx_unmap_ops,
NULL,
queue->pages_to_unmap,
gop - queue->tx_unmap_ops); if (ret) {
netdev_err(queue->vif->dev, "Unmap fail: nr_ops %tu ret %d\n",
gop - queue->tx_unmap_ops, ret); for (i = 0; i < gop - queue->tx_unmap_ops; ++i) { if (gop[i].status != GNTST_okay)
netdev_err(queue->vif->dev, " host_addr: 0x%llx handle: 0x%x status: %d\n",
gop[i].host_addr,
gop[i].handle,
gop[i].status);
}
BUG();
}
}
for (i = 0; i < gop - queue->tx_unmap_ops; ++i)
xenvif_idx_release(queue, pending_idx_release[i],
XEN_NETIF_RSP_OKAY);
}
/* Called after netfront has transmitted */ int xenvif_tx_action(struct xenvif_queue *queue, int budget)
{ unsigned nr_mops = 0, nr_cops = 0; int work_done, ret;
gnttab_batch_copy(queue->tx_copy_ops, nr_cops); if (nr_mops != 0) {
ret = gnttab_map_refs(queue->tx_map_ops,
NULL,
queue->pages_to_map,
nr_mops); if (ret) { unsignedint i;
netdev_err(queue->vif->dev, "Map fail: nr %u ret %d\n",
nr_mops, ret); for (i = 0; i < nr_mops; ++i)
WARN_ON_ONCE(queue->tx_map_ops[i].status ==
GNTST_okay);
}
}
/* Release the pending index before pusing the Tx response so * its available before a new Tx request is pushed by the * frontend.
*/
index = pending_index(queue->pending_prod++);
queue->pending_ring[index] = pending_idx;
staticbool xenvif_dealloc_kthread_should_stop(struct xenvif_queue *queue)
{ /* Dealloc thread must remain running until all inflight * packets complete.
*/ return kthread_should_stop() &&
!atomic_read(&queue->inflight_packets);
}
int xenvif_dealloc_kthread(void *data)
{ struct xenvif_queue *queue = data;
for (;;) {
wait_event_interruptible(queue->dealloc_wq,
tx_dealloc_work_todo(queue) ||
xenvif_dealloc_kthread_should_stop(queue)); if (xenvif_dealloc_kthread_should_stop(queue)) break;
while (xenvif_ctrl_work_todo(vif)) {
xenvif_ctrl_action(vif);
eoi_flag = 0;
}
xen_irq_lateeoi(irq, eoi_flag);
return IRQ_HANDLED;
}
staticint __init netback_init(void)
{ int rc = 0;
if (!xen_domain()) return -ENODEV;
/* Allow as many queues as there are CPUs but max. 8 if user has not * specified a value.
*/ if (xenvif_max_queues == 0)
xenvif_max_queues = min_t(unsignedint, MAX_QUEUES_DEFAULT,
num_online_cpus());
if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) {
pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n",
fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX);
fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX;
}
rc = xenvif_xenbus_init(); if (rc) goto failed_init;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.