/* A packet could be split to fit the RX buffer, so we can retrieve * the payload length from the header and the buffer pointer taking * care of the offset in the original packet.
*/
pkt_hdr = virtio_vsock_hdr(pkt);
payload_len = pkt->len;
/* pkt->hdr is little-endian so no need to byteswap here */
hdr->src_cid = pkt_hdr->src_cid;
hdr->src_port = pkt_hdr->src_port;
hdr->dst_cid = pkt_hdr->dst_cid;
hdr->dst_port = pkt_hdr->dst_port;
/* If 'vsk' != NULL then payload is always present, so we * will never call '__zerocopy_sg_from_iter()' below without * setting skb owner in 'skb_set_owner_w()'. The only case * when 'vsk' == NULL is VIRTIO_VSOCK_OP_RST control message * without payload.
*/
WARN_ON_ONCE(!(vsk && (info->msg && payload_len)) && zcopy);
/* Set owner here, because '__zerocopy_sg_from_iter()' uses * owner of skb without check to update 'sk_wmem_alloc'.
*/ if (vsk)
skb_set_owner_w(skb, sk_vsock(vsk));
if (info->msg && payload_len > 0) { int err;
err = virtio_transport_fill_skb(skb, info, payload_len, zcopy); if (err) goto out;
/* This function can only be used on connecting/connected sockets, * since a socket assigned to a transport is required. * * Do not use on listener sockets!
*/ staticint virtio_transport_send_pkt_info(struct vsock_sock *vsk, struct virtio_vsock_pkt_info *info)
{
u32 max_skb_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
u32 src_cid, src_port, dst_cid, dst_port; conststruct virtio_transport *t_ops; struct virtio_vsock_sock *vvs;
u32 pkt_len = info->pkt_len; bool can_zcopy = false;
u32 rest_len; int ret;
/* virtio_transport_get_credit might return less than pkt_len credit */
pkt_len = virtio_transport_get_credit(vvs, pkt_len);
/* Do not send zero length OP_RW pkt */ if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) return pkt_len;
if (info->msg) { /* If zerocopy is not enabled by 'setsockopt()', we behave as * there is no MSG_ZEROCOPY flag set.
*/ if (!sock_flag(sk_vsock(vsk), SOCK_ZEROCOPY))
info->msg->msg_flags &= ~MSG_ZEROCOPY;
if (info->msg->msg_flags & MSG_ZEROCOPY)
can_zcopy = virtio_transport_can_zcopy(t_ops, info, pkt_len);
if (can_zcopy)
max_skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE,
(MAX_SKB_FRAGS * PAGE_SIZE));
}
rest_len = pkt_len;
do { struct sk_buff *skb;
size_t skb_len;
skb_len = min(max_skb_len, rest_len);
skb = virtio_transport_alloc_skb(info, skb_len, can_zcopy,
src_cid, src_port,
dst_cid, dst_port); if (!skb) {
ret = -ENOMEM; break;
}
/* We process buffer part by part, allocating skb on * each iteration. If this is last skb for this buffer * and MSG_ZEROCOPY mode is in use - we must allocate * completion for the current syscall.
*/ if (info->msg && info->msg->msg_flags & MSG_ZEROCOPY &&
skb_len == rest_len && info->op == VIRTIO_VSOCK_OP_RW) { if (virtio_transport_init_zcopy_skb(vsk, skb,
info->msg,
can_zcopy)) {
kfree_skb(skb);
ret = -ENOMEM; break;
}
}
virtio_transport_inc_tx_pkt(vvs, skb);
ret = t_ops->send_pkt(skb); if (ret < 0) break;
/* Both virtio and vhost 'send_pkt()' returns 'skb_len', * but for reliability use 'ret' instead of 'skb_len'. * Also if partial send happens (e.g. 'ret' != 'skb_len') * somehow, we break this loop, but account such returned * value in 'virtio_transport_put_credit()'.
*/
rest_len -= ret;
if (WARN_ONCE(ret != skb_len, "'send_pkt()' returns %i, but %zu expected\n",
ret, skb_len)) break;
} while (rest_len);
virtio_transport_put_credit(vvs, rest_len);
/* Return number of bytes, if any data has been sent. */ if (rest_len != pkt_len)
ret = pkt_len - rest_len;
return ret;
}
staticbool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
u32 len)
{ if (vvs->buf_used + len > vvs->buf_alloc) returnfalse;
bytes = len - total; if (bytes > skb->len)
bytes = skb->len;
spin_unlock_bh(&vvs->rx_lock);
/* sk_lock is held by caller so no one else can dequeue. * Unlock rx_lock since skb_copy_datagram_iter() may sleep.
*/
err = skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset,
&msg->msg_iter, bytes); if (err) goto out;
if (WARN_ONCE(skb_queue_empty(&vvs->rx_queue) && vvs->rx_bytes, "rx_queue is empty, but rx_bytes is non-zero\n")) {
spin_unlock_bh(&vvs->rx_lock); return err;
}
while (total < len && !skb_queue_empty(&vvs->rx_queue)) {
size_t bytes, dequeued = 0;
skb = skb_peek(&vvs->rx_queue);
bytes = min_t(size_t, len - total,
skb->len - VIRTIO_VSOCK_SKB_CB(skb)->offset);
/* sk_lock is held by caller so no one else can dequeue. * Unlock rx_lock since skb_copy_datagram_iter() may sleep.
*/
spin_unlock_bh(&vvs->rx_lock);
err = skb_copy_datagram_iter(skb,
VIRTIO_VSOCK_SKB_CB(skb)->offset,
&msg->msg_iter, bytes); if (err) goto out;
/* To reduce the number of credit update messages, * don't update credits as long as lots of space is available. * Note: the limit chosen here is arbitrary. Setting the limit * too high causes extra messages. Too low causes transmitter * stalls. As stalls are in theory more expensive than extra * messages, we set the limit to a high value. TODO: experiment * with different values. Also send credit update message when * number of bytes in rx queue is not enough to wake up reader.
*/ if (fwd_cnt_delta &&
(free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE || low_rx_bytes))
virtio_transport_send_credit_update(vsk);
bytes = len - total; if (bytes > skb->len)
bytes = skb->len;
spin_unlock_bh(&vvs->rx_lock);
/* sk_lock is held by caller so no one else can dequeue. * Unlock rx_lock since skb_copy_datagram_iter() may sleep.
*/
err = skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset,
&msg->msg_iter, bytes); if (err) return err;
spin_lock_bh(&vvs->rx_lock);
}
total += skb->len;
hdr = virtio_vsock_hdr(skb);
if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) { if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOR)
msg->msg_flags |= MSG_EOR;
/* Normally packets are associated with a socket. There may be no socket if an * attempt was made to connect to a socket that does not exist.
*/ staticint virtio_transport_reset_no_sock(conststruct virtio_transport *t, struct sk_buff *skb)
{ struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); struct virtio_vsock_pkt_info info = {
.op = VIRTIO_VSOCK_OP_RST,
.type = le16_to_cpu(hdr->type),
.reply = true,
}; struct sk_buff *reply;
/* Send RST only if the original pkt is not a RST pkt */ if (le16_to_cpu(hdr->op) == VIRTIO_VSOCK_OP_RST) return 0;
/* This function should be called with sk_lock held and SOCK_DONE set */ staticvoid virtio_transport_remove_sock(struct vsock_sock *vsk)
{ struct virtio_vsock_sock *vvs = vsk->trans;
/* We don't need to take rx_lock, as the socket is closing and we are * removing it.
*/
__skb_queue_purge(&vvs->rx_queue);
vsock_remove_sock(vsk);
}
if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)
vvs->msg_count++;
/* Try to copy small packets into the buffer of last packet queued, * to avoid wasting memory queueing the entire buffer with a small * payload.
*/ if (len <= GOOD_COPY_LEN && !skb_queue_empty(&vvs->rx_queue)) { struct virtio_vsock_hdr *last_hdr; struct sk_buff *last_skb;
/* If there is space in the last packet queued, we copy the * new packet in its buffer. We avoid this if the last packet * queued has VIRTIO_VSOCK_SEQ_EOM set, because this is * delimiter of SEQPACKET message, so 'pkt' is the first packet * of a new message.
*/ if (skb->len < skb_tailroom(last_skb) &&
!(le32_to_cpu(last_hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)) {
memcpy(skb_put(last_skb, skb->len), skb->data, skb->len);
free_pkt = true;
last_hdr->flags |= hdr->flags;
le32_add_cpu(&last_hdr->len, len); goto out;
}
}
__skb_queue_tail(&vvs->rx_queue, skb);
out:
spin_unlock_bh(&vvs->rx_lock); if (free_pkt)
kfree_skb(skb);
}
switch (le16_to_cpu(hdr->op)) { case VIRTIO_VSOCK_OP_RW:
virtio_transport_recv_enqueue(vsk, skb);
vsock_data_ready(sk); return err; case VIRTIO_VSOCK_OP_CREDIT_REQUEST:
virtio_transport_send_credit_update(vsk); break; case VIRTIO_VSOCK_OP_CREDIT_UPDATE:
sk->sk_write_space(sk); break; case VIRTIO_VSOCK_OP_SHUTDOWN: if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_RCV)
vsk->peer_shutdown |= RCV_SHUTDOWN; if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_SEND)
vsk->peer_shutdown |= SEND_SHUTDOWN; if (vsk->peer_shutdown == SHUTDOWN_MASK) { if (vsock_stream_has_data(vsk) <= 0 && !sock_flag(sk, SOCK_DONE)) {
(void)virtio_transport_reset(vsk, NULL);
virtio_transport_do_close(vsk, true);
} /* Remove this socket anyway because the remote peer sent * the shutdown. This way a new connection will succeed * if the remote peer uses the same source port, * even if the old socket is still unreleased, but now disconnected.
*/
vsock_remove_sock(vsk);
} if (le32_to_cpu(virtio_vsock_hdr(skb)->flags))
sk->sk_state_change(sk); break; case VIRTIO_VSOCK_OP_RST:
virtio_transport_do_close(vsk, true); break; default:
err = -EINVAL; break;
}
/* Listener sockets are not associated with any transport, so we are * not able to take the state to see if there is space available in the * remote peer, but since they are only used to receive requests, we * can assume that there is always space available in the other peer.
*/ if (!vvs) returntrue;
/* buf_alloc and fwd_cnt is always included in the hdr */
spin_lock_bh(&vvs->tx_lock);
vvs->peer_buf_alloc = le32_to_cpu(hdr->buf_alloc);
vvs->peer_fwd_cnt = le32_to_cpu(hdr->fwd_cnt);
space_available = virtio_transport_has_space(vsk);
spin_unlock_bh(&vvs->tx_lock); return space_available;
}
if (le16_to_cpu(hdr->op) != VIRTIO_VSOCK_OP_REQUEST) {
virtio_transport_reset_no_sock(t, skb); return -EINVAL;
}
if (sk_acceptq_is_full(sk)) {
virtio_transport_reset_no_sock(t, skb); return -ENOMEM;
}
/* __vsock_release() might have already flushed accept_queue. * Subsequent enqueues would lead to a memory leak.
*/ if (sk->sk_shutdown == SHUTDOWN_MASK) {
virtio_transport_reset_no_sock(t, skb); return -ESHUTDOWN;
}
ret = vsock_assign_transport(vchild, vsk); /* Transport assigned (looking at remote_addr) must be the same * where we received the request.
*/ if (ret || vchild->transport != &t->transport) {
release_sock(child);
virtio_transport_reset_no_sock(t, skb);
sock_put(child); return ret;
}
if (virtio_transport_space_update(child, skb))
child->sk_write_space(child);
if (!virtio_transport_valid_type(le16_to_cpu(hdr->type))) {
(void)virtio_transport_reset_no_sock(t, skb); goto free_pkt;
}
/* The socket must be in connected or bound table * otherwise send reset back
*/
sk = vsock_find_connected_socket(&src, &dst); if (!sk) {
sk = vsock_find_bound_socket(&dst); if (!sk) {
(void)virtio_transport_reset_no_sock(t, skb); goto free_pkt;
}
}
if (!skb_set_owner_sk_safe(skb, sk)) {
WARN_ONCE(1, "receiving vsock socket has sk_refcnt == 0\n"); goto free_pkt;
}
vsk = vsock_sk(sk);
lock_sock(sk);
/* Check if sk has been closed or assigned to another transport before * lock_sock (note: listener sockets are not assigned to any transport)
*/ if (sock_flag(sk, SOCK_DONE) ||
(sk->sk_state != TCP_LISTEN && vsk->transport != &t->transport)) {
(void)virtio_transport_reset_no_sock(t, skb);
release_sock(sk);
sock_put(sk); goto free_pkt;
}
/* Update CID in case it has changed after a transport reset event */ if (vsk->local_addr.svm_cid != VMADDR_CID_ANY)
vsk->local_addr.svm_cid = dst.svm_cid;
if (space_available)
sk->sk_write_space(sk);
switch (sk->sk_state) { case TCP_LISTEN:
virtio_transport_recv_listen(sk, skb, t);
kfree_skb(skb); break; case TCP_SYN_SENT:
virtio_transport_recv_connecting(sk, skb);
kfree_skb(skb); break; case TCP_ESTABLISHED:
virtio_transport_recv_connected(sk, skb); break; case TCP_CLOSING:
virtio_transport_recv_disconnecting(sk, skb);
kfree_skb(skb); break; default:
(void)virtio_transport_reset_no_sock(t, skb);
kfree_skb(skb); break;
}
release_sock(sk);
/* Release refcnt obtained when we fetched this socket out of the * bound or connected list.
*/
sock_put(sk); return;
/* Remove skbs found in a queue that have a vsk that matches. * * Each skb is freed. * * Returns the count of skbs that were reply packets.
*/ int virtio_transport_purge_skbs(void *vsk, struct sk_buff_head *queue)
{ struct sk_buff_head freeme; struct sk_buff *skb, *tmp; int cnt = 0;
skb_queue_head_init(&freeme);
spin_lock_bh(&queue->lock);
skb_queue_walk_safe(queue, skb, tmp) { if (vsock_sk(skb->sk) != vsk) continue;
int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_actor)
{ struct virtio_vsock_sock *vvs = vsk->trans; struct sock *sk = sk_vsock(vsk); struct virtio_vsock_hdr *hdr; struct sk_buff *skb;
u32 pkt_len; int off = 0; int err;
spin_lock_bh(&vvs->rx_lock); /* Use __skb_recv_datagram() for race-free handling of the receive. It * works for types other than dgrams.
*/
skb = __skb_recv_datagram(sk, &vvs->rx_queue, MSG_DONTWAIT, &off, &err); if (!skb) {
spin_unlock_bh(&vvs->rx_lock); return err;
}
hdr = virtio_vsock_hdr(skb); if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)
vvs->msg_count--;
int virtio_transport_notify_set_rcvlowat(struct vsock_sock *vsk, int val)
{ struct virtio_vsock_sock *vvs = vsk->trans; bool send_update;
spin_lock_bh(&vvs->rx_lock);
/* If number of available bytes is less than new SO_RCVLOWAT value, * kick sender to send more data, because sender may sleep in its * 'send()' syscall waiting for enough space at our side. Also * don't send credit update when peer already knows actual value - * such transmission will be useless.
*/
send_update = (vvs->rx_bytes < val) &&
(vvs->fwd_cnt != vvs->last_fwd_cnt);
spin_unlock_bh(&vvs->rx_lock);
if (send_update) { int err;
err = virtio_transport_send_credit_update(vsk); if (err < 0) return err;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.