// SPDX-License-Identifier: GPL-2.0-only /* * vsock_test - vsock.ko test suite * * Copyright (C) 2017 Red Hat, Inc. * * Author: Stefan Hajnoczi <stefanha@redhat.com>
*/
/* Compare partial and full peek. */ if (memcmp(buf_half, buf_peek, sizeof(buf_half))) {
fprintf(stderr, "Partial peek data mismatch\n"); exit(EXIT_FAILURE);
}
if (seqpacket) { /* This type of socket supports MSG_TRUNC flag, * so check it with MSG_PEEK. We must get length * of the message.
*/
recv_buf(fd, buf_half, sizeof(buf_half), MSG_PEEK | MSG_TRUNC, sizeof(buf_peek));
}
/* Compare full peek and normal read. */ if (memcmp(buf_peek, buf_normal, sizeof(buf_peek))) {
fprintf(stderr, "Full peek data mismatch\n"); exit(EXIT_FAILURE);
}
staticvoid test_seqpacket_msg_bounds_client(conststruct test_opts *opts)
{ unsignedlong curr_hash;
size_t max_msg_size; int page_size; int msg_count; int fd;
for (int i = 0; i < msg_count; i++) {
size_t buf_size; int flags; void *buf;
/* Use "small" buffers and "big" buffers. */ if (i & 1)
buf_size = page_size +
(rand() % (max_msg_size - page_size)); else
buf_size = 1 + (rand() % page_size);
buf = malloc(buf_size);
if (!buf) {
perror("malloc"); exit(EXIT_FAILURE);
}
memset(buf, rand() & 0xff, buf_size); /* Set at least one MSG_EOR + some random. */ if (i == (msg_count / 2) || (rand() & 1)) {
flags = MSG_EOR;
curr_hash++;
} else {
flags = 0;
}
send_buf(fd, buf, buf_size, flags, buf_size);
/* * Hash sum is computed at both client and server in * the same way: * H += hash('message data') * Such hash "controls" both data integrity and message * bounds. After data exchange, both sums are compared * using control socket, and if message bounds wasn't * broken - two values must be equal.
*/
curr_hash += hash_djb2(buf, buf_size);
free(buf);
}
if (read_overhead_ns > READ_OVERHEAD_NSEC) {
fprintf(stderr, "too much time in read(2), %lu > %i ns\n",
read_overhead_ns, READ_OVERHEAD_NSEC); exit(EXIT_FAILURE);
}
control_writeln("WAITDONE");
close(fd);
}
staticvoid test_seqpacket_timeout_server(conststruct test_opts *opts)
{ int fd;
if (getsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE,
&sock_buf_size, &len)) {
perror("getsockopt"); exit(EXIT_FAILURE);
}
sock_buf_size++;
/* size_t can be < unsigned long long */
buf_size = (size_t)sock_buf_size; if (buf_size != sock_buf_size) {
fprintf(stderr, "Returned BUFFER_SIZE too large\n"); exit(EXIT_FAILURE);
}
data = malloc(buf_size); if (!data) {
perror("malloc"); exit(EXIT_FAILURE);
}
send_buf(fd, data, buf_size, 0, -EMSGSIZE);
control_writeln("CLISENT");
free(data);
close(fd);
}
staticvoid test_seqpacket_bigmsg_server(conststruct test_opts *opts)
{ int fd;
for (i = 0; i < buf_size; i++) { if (valid_buf[i] != BUF_PATTERN_2) {
fprintf(stderr, "invalid pattern for 'valid_buf' at %i, expected %hhX, got %hhX\n",
i, BUF_PATTERN_2, valid_buf[i]); exit(EXIT_FAILURE);
}
}
/* At this point, server sent 1 byte. */
fds.fd = fd;
poll_flags = POLLIN | POLLRDNORM;
fds.events = poll_flags;
/* Try to wait for 1 sec. */ if (poll(&fds, 1, 1000) < 0) {
perror("poll"); exit(EXIT_FAILURE);
}
/* poll() must return nothing. */ if (fds.revents) {
fprintf(stderr, "Unexpected poll result %hx\n",
fds.revents); exit(EXIT_FAILURE);
}
/* Tell server to send rest of data. */
control_writeln("CLNSENT");
/* Poll for data. */ if (poll(&fds, 1, 10000) < 0) {
perror("poll"); exit(EXIT_FAILURE);
}
/* Only these two bits are expected. */ if (fds.revents != poll_flags) {
fprintf(stderr, "Unexpected poll result %hx\n",
fds.revents); exit(EXIT_FAILURE);
}
/* Use MSG_DONTWAIT, if call is going to wait, EAGAIN * will be returned.
*/
recv_buf(fd, buf, sizeof(buf), MSG_DONTWAIT, RCVLOWAT_BUF_SIZE);
if (stream) { /* For SOCK_STREAM we must continue reading. */
expected_ret = sizeof(data);
} else { /* For SOCK_SEQPACKET socket's queue must be empty. */
expected_ret = -EAGAIN;
}
/* When the other peer calls shutdown(SHUT_RD), there is a chance that * the send() call could occur before the message carrying the close * information arrives over the transport. In such cases, the send() * might still succeed. To avoid this race, let's retry the send() call * a few times, ensuring the test is more reliable.
*/
timeout_begin(TIMEOUT); while(1) {
res = send(fd, "A", 1, 0); if (res == -1 && errno != EINTR) break;
/* Sleep a little before trying again to avoid flooding the * other peer and filling its receive buffer, causing * false-negative.
*/
timeout_usleep(SEND_SLEEP_USEC);
timeout_check("send");
}
timeout_end();
if (client_fd < 0) {
perror("accept"); exit(EXIT_FAILURE);
}
/* Waiting for remote peer to close connection */
vsock_wait_remote_close(client_fd);
}
close(listen_fd);
}
staticvoid test_double_bind_connect_client(conststruct test_opts *opts)
{ int i, client_fd;
for (i = 0; i < 2; i++) { /* Wait until server is ready to accept a new connection */
control_expectln("LISTENING");
/* We use 'peer_port + 1' as "some" port for the 'bind()' * call. It is safe for overflow, but must be considered, * when running multiple test applications simultaneously * where 'peer-port' argument differs by 1.
*/
client_fd = vsock_bind_connect(opts->peer_cid, opts->peer_port,
opts->peer_port + 1, SOCK_STREAM);
close(client_fd);
}
}
#define MSG_BUF_IOCTL_LEN 64 staticvoid test_unsent_bytes_server(conststruct test_opts *opts, int type)
{ unsignedchar buf[MSG_BUF_IOCTL_LEN]; int client_fd;
/* SIOCOUTQ isn't guaranteed to instantly track sent data. Even though * the "RECEIVED" message means that the other side has received the * data, there can be a delay in our kernel before updating the "unsent * bytes" counter. vsock_wait_sent() will repeat SIOCOUTQ until it * returns 0.
*/ if (!vsock_wait_sent(fd))
fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n");
close(fd);
}
staticvoid test_unread_bytes_server(conststruct test_opts *opts, int type)
{ unsignedchar buf[MSG_BUF_IOCTL_LEN]; int client_fd;
control_expectln("SENT"); /* The data has arrived but has not been read. The expected is * MSG_BUF_IOCTL_LEN.
*/ if (!vsock_ioctl_int(fd, SIOCINQ, MSG_BUF_IOCTL_LEN)) {
fprintf(stderr, "Test skipped, SIOCINQ not supported.\n"); goto out;
}
recv_buf(fd, buf, sizeof(buf), 0, sizeof(buf)); /* All data has been consumed, so the expected is 0. */
vsock_ioctl_int(fd, SIOCINQ, 0);
#define RCVLOWAT_CREDIT_UPD_BUF_SIZE (1024 * 128) /* This define is the same as in 'include/linux/virtio_vsock.h': * it is used to decide when to send credit update message during * reading from rx queue of a socket. Value and its usage in * kernel is important for this test.
*/ #define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64)
if (low_rx_bytes_test) { /* Set new SO_RCVLOWAT here. This enables sending credit * update when number of bytes if our rx queue become < * SO_RCVLOWAT value.
*/
recv_buf_size = 1 + VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
/* Send one dummy byte here, because 'setsockopt()' above also * sends special packet which tells sender to update our buffer * size. This 'send_byte()' will serialize such packet with data * reads in a loop below. Sender starts transmission only when * it receives this single byte.
*/
send_byte(fd, 1, 0);
buf = malloc(buf_size); if (!buf) {
perror("malloc"); exit(EXIT_FAILURE);
}
/* Wait until there will be 128KB of data in rx queue. */ while (1) {
ssize_t res;
res = recv(fd, buf, buf_size, MSG_PEEK); if (res == buf_size) break;
/* There is 128KB of data in the socket's rx queue, dequeue first * 64KB, credit update is sent if 'low_rx_bytes_test' == true. * Otherwise, credit update is sent in 'if (!low_rx_bytes_test)'.
*/
recv_buf_size = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
recv_buf(fd, buf, recv_buf_size, 0, recv_buf_size);
/* This 'poll()' will return once we receive last byte * sent by client.
*/ if (poll(&fds, 1, -1) < 0) {
perror("poll"); exit(EXIT_FAILURE);
}
if (fds.revents & POLLERR) {
fprintf(stderr, "'poll()' error\n"); exit(EXIT_FAILURE);
}
if (fds.revents & (POLLIN | POLLRDNORM)) {
recv_buf(fd, buf, recv_buf_size, MSG_DONTWAIT, recv_buf_size);
} else { /* These flags must be set, as there is at * least 64KB of data ready to read.
*/
fprintf(stderr, "POLLIN | POLLRDNORM expected\n"); exit(EXIT_FAILURE);
}
/* The goal of test leak_acceptq is to stress the race between connect() and * close(listener). Implementation of client/server loops boils down to: * * client server * ------ ------ * write(CONTINUE) * expect(CONTINUE) * listen() * write(LISTENING) * expect(LISTENING) * connect() close()
*/ #define ACCEPTQ_LEAK_RACE_TIMEOUT 2 /* seconds */
staticvoid test_stream_leak_acceptq_client(conststruct test_opts *opts)
{
time_t tout; int fd;
tout = current_nsec() + ACCEPTQ_LEAK_RACE_TIMEOUT * NSEC_PER_SEC; do {
control_writeulong(CONTROL_CONTINUE);
fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fd >= 0)
close(fd);
} while (current_nsec() < tout);
control_writeulong(CONTROL_DONE);
}
/* Test for a memory leak. User is expected to run kmemleak scan, see README. */ staticvoid test_stream_leak_acceptq_server(conststruct test_opts *opts)
{ int fd;
/* Test for a memory leak. User is expected to run kmemleak scan, see README. */ staticvoid test_stream_msgzcopy_leak_errq_client(conststruct test_opts *opts)
{ struct pollfd fds = { 0 }; int fd;
/* Test msgzcopy_leak_zcskb is meant to exercise sendmsg() error handling path, * that might leak an skb. The idea is to fail virtio_transport_init_zcopy_skb() * by hitting net.core.optmem_max limit in sock_omalloc(), specifically * * vsock_connectible_sendmsg * virtio_transport_stream_enqueue * virtio_transport_send_pkt_info * virtio_transport_init_zcopy_skb * . msg_zerocopy_realloc * . msg_zerocopy_alloc * . sock_omalloc * . sk_omem_alloc + size > sysctl_optmem_max * return -ENOMEM * * We abuse the implementation detail of net/socket.c:____sys_sendmsg(). * sk_omem_alloc can be precisely bumped by sock_kmalloc(), as it is used to * fetch user-provided control data. * * While this approach works for now, it relies on assumptions regarding the * implementation and configuration (for example, order of net.core.optmem_max * can not exceed MAX_PAGE_ORDER), which may not hold in the future. A more * resilient testing could be implemented by leveraging the Fault injection * framework (CONFIG_FAULT_INJECTION), e.g. * * client# echo N > /sys/kernel/debug/failslab/ignore-gfp-wait * client# echo 0 > /sys/kernel/debug/failslab/verbose * * void client(const struct test_opts *opts) * { * char buf[16]; * int f, s, i; * * f = open("/proc/self/fail-nth", O_WRONLY); * * for (i = 1; i < 32; i++) { * control_writeulong(CONTROL_CONTINUE); * * s = vsock_stream_connect(opts->peer_cid, opts->peer_port); * enable_so_zerocopy_check(s); * * sprintf(buf, "%d", i); * write(f, buf, strlen(buf)); * * send(s, &(char){ 0 }, 1, MSG_ZEROCOPY); * * write(f, "0", 1); * close(s); * } * * control_writeulong(CONTROL_DONE); * close(f); * } * * void server(const struct test_opts *opts) * { * int fd; * * while (control_readulong() == CONTROL_CONTINUE) { * fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); * vsock_wait_remote_close(fd); * close(fd); * } * } * * Refer to Documentation/fault-injection/fault-injection.rst.
*/ #define MAX_PAGE_ORDER 10 /* usually */ #define PAGE_SIZE 4096
/* Test for a memory leak. User is expected to run kmemleak scan, see README. */ staticvoid test_stream_msgzcopy_leak_zcskb_client(conststruct test_opts *opts)
{
size_t optmem_max, ctl_len, chunk_size; struct msghdr msg = { 0 }; struct iovec iov; char *chunk; int fd, res;
FILE *f;
f = fopen("/proc/sys/net/core/optmem_max", "r"); if (!f) {
perror("fopen(optmem_max)"); exit(EXIT_FAILURE);
}
staticbool test_stream_transport_uaf(int cid)
{ int sockets[MAX_PORT_RETRIES]; struct sockaddr_vm addr;
socklen_t alen; int fd, i, c; bool ret;
/* Probe for a transport by attempting a local CID bind. Unavailable * transport (or more specifically: an unsupported transport/CID * combination) results in EADDRNOTAVAIL, other errnos are fatal.
*/
fd = vsock_bind_try(cid, VMADDR_PORT_ANY, SOCK_STREAM); if (fd < 0) { if (errno != EADDRNOTAVAIL) {
perror("Unexpected bind() errno"); exit(EXIT_FAILURE);
}
/* Drain the autobind pool; see __vsock_bind_connectible(). */ for (i = 0; i < MAX_PORT_RETRIES; ++i)
sockets[i] = vsock_bind(cid, ++addr.svm_port, SOCK_STREAM);
close(fd);
/* Setting SOCK_NONBLOCK makes connect() return soon after * (re-)assigning the transport. We are not connecting to anything * anyway, so there is no point entering the main loop in * vsock_connect(); waiting for timeout, checking for signals, etc.
*/
fd = socket(AF_VSOCK, SOCK_STREAM | SOCK_NONBLOCK, 0); if (fd < 0) {
perror("socket"); exit(EXIT_FAILURE);
}
/* Assign transport, while failing to autobind. Autobind pool was * drained, so EADDRNOTAVAIL coming from __vsock_bind_connectible() is * expected. * * One exception is ENODEV which is thrown by vsock_assign_transport(), * i.e. before vsock_auto_bind(), when the only transport loaded is * vhost.
*/ if (!connect(fd, (struct sockaddr *)&addr, alen)) {
fprintf(stderr, "Unexpected connect() success\n"); exit(EXIT_FAILURE);
} if (errno == ENODEV && cid == VMADDR_CID_HOST) {
ret = false; goto cleanup;
} if (errno != EADDRNOTAVAIL) {
perror("Unexpected connect() errno"); exit(EXIT_FAILURE);
}
/* Reassign transport, triggering old transport release and * (potentially) unbinding of an unbound socket. * * Vulnerable system may crash now.
*/ for (c = VMADDR_CID_HYPERVISOR; c <= VMADDR_CID_HOST + 1; ++c) { if (c != cid) {
addr.svm_cid = c;
(void)connect(fd, (struct sockaddr *)&addr, alen);
}
}
ret = true;
cleanup:
close(fd); while (i--)
close(sockets[i]);
return ret;
}
/* Test attempts to trigger a transport release for an unbound socket. This can * lead to a reference count mishandling.
*/ staticvoid test_stream_transport_uaf_client(conststruct test_opts *opts)
{ bool tested = false; int cid, tr;
/* Print a warning if there is a G2H transport loaded. * This is on a best effort basis because VMCI can be either G2H and H2G, and there is * no easy way to understand it. * The bug we are testing only appears when G2H transports are not loaded. * This is because `vsock_assign_transport`, when using CID 0, assigns a G2H transport * to vsk->transport. If none is available it is set to NULL, causing the null-ptr-deref.
*/ if (tr & TRANSPORTS_G2H)
fprintf(stderr, "G2H Transport detected. This test will not fail.\n");
ret = pthread_create(&thread_id, NULL, test_stream_transport_change_thread, &pid); if (ret) {
fprintf(stderr, "pthread_create: %d\n", ret); exit(EXIT_FAILURE);
}
control_expectln("LISTENING");
tout = current_nsec() + TRANSPORT_CHANGE_TIMEOUT * NSEC_PER_SEC; do { struct sockaddr_vm sa = {
.svm_family = AF_VSOCK,
.svm_cid = opts->peer_cid,
.svm_port = opts->peer_port,
}; bool send_control = false; int s;
s = socket(AF_VSOCK, SOCK_STREAM, 0); if (s < 0) {
perror("socket"); exit(EXIT_FAILURE);
}
ret = connect(s, (struct sockaddr *)&sa, sizeof(sa)); /* The connect can fail due to signals coming from the thread, * or because the receiver connection queue is full. * Ignoring also the latter case because there is no way * of synchronizing client's connect and server's accept when * connect(s) are constantly being interrupted by signals.
*/ if (ret == -1 && (errno != EINTR && errno != ECONNRESET)) {
perror("connect"); exit(EXIT_FAILURE);
}
/* Notify the server if the connect() is successful or the * receiver connection queue is full, so it will do accept() * to drain it.
*/ if (!ret || errno == ECONNRESET)
send_control = true;
/* Set CID to 0 cause a transport change. */
sa.svm_cid = 0;
/* There is a case where this will not fail: * if the previous connect() is interrupted while the * connection request is already sent, this second * connect() will wait for the response.
*/
ret = connect(s, (struct sockaddr *)&sa, sizeof(sa)); if (!ret || errno == ECONNRESET)
send_control = true;
close(s);
if (send_control)
control_writeulong(CONTROL_CONTINUE);
} while (current_nsec() < tout);
control_writeulong(CONTROL_DONE);
ret = pthread_cancel(thread_id); if (ret) {
fprintf(stderr, "pthread_cancel: %d\n", ret); exit(EXIT_FAILURE);
}
ret = pthread_join(thread_id, NULL); if (ret) {
fprintf(stderr, "pthread_join: %d\n", ret); exit(EXIT_FAILURE);
}
if (signal(SIGUSR1, old_handler) == SIG_ERR) {
perror("signal"); exit(EXIT_FAILURE);
}
}
staticvoid test_stream_transport_change_server(conststruct test_opts *opts)
{ int s = vsock_stream_listen(VMADDR_CID_ANY, opts->peer_port);
/* Set the socket to be nonblocking because connects that have been interrupted * (EINTR) can fill the receiver's accept queue anyway, leading to connect failure. * As of today (6.15) in such situation there is no way to understand, from the * client side, if the connection has been queued in the server or not.
*/ if (fcntl(s, F_SETFL, fcntl(s, F_GETFL, 0) | O_NONBLOCK) < 0) {
perror("fcntl"); exit(EXIT_FAILURE);
}
control_writeln("LISTENING");
while (control_readulong() == CONTROL_CONTINUE) { /* Must accept the connection, otherwise the `listen` * queue will fill up and new connections will fail. * There can be more than one queued connection, * clear them all.
*/ while (true) { int client = accept(s, NULL, NULL);
if (client < 0) { if (errno == EAGAIN) break;
perror("accept"); exit(EXIT_FAILURE);
}
close(client);
}
}
close(s);
}
staticvoid test_stream_linger_client(conststruct test_opts *opts)
{ int fd;
staticvoid usage(void)
{
fprintf(stderr, "Usage: vsock_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid> [--peer-port=<port>] [--list] [--skip=<test_id>]\n" "\n" " Server: vsock_test --control-port=1234 --mode=server --peer-cid=3\n" " Client: vsock_test --control-host=192.168.0.1 --control-port=1234 --mode=client --peer-cid=2\n" "\n" "Run vsock.ko tests. Must be launched in both guest\n" "and host. One side must use --mode=client and\n" "the other side must use --mode=server.\n" "\n" "A TCP control socket connection is used to coordinate tests\n" "between the client and the server. The server requires a\n" "listen address and the client requires an address to\n" "connect to.\n" "\n" "The CID of the other side must be given with --peer-cid=<cid>.\n" "During the test, two AF_VSOCK ports will be used: the port\n" "specified with --peer-port=<port> (or the default port)\n" "and the next one.\n" "\n" "Options:\n" " --help This help message\n" " --control-host <host> Server IP address to connect to\n" " --control-port <port> Server port to listen on/connect to\n" " --mode client|server Server or client mode\n" " --peer-cid <cid> CID of the other side\n" " --peer-port <port> AF_VSOCK port used for the test [default: %d]\n" " --list List of tests that will be executed\n" " --pick <test_id> Test ID to execute selectively;\n" " use multiple --pick options to select more tests\n" " --skip <test_id> Test ID to skip;\n" " use multiple --skip options to skip more tests\n",
DEFAULT_PEER_PORT
); exit(EXIT_FAILURE);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.