for (i = 0; i < CPU_SETSIZE; i++) { if (CPU_ISSET(i, set))
done += snprintf(&buf[done], len - done, "%d ", i);
}
}
staticvoid ublk_adjust_affinity(cpu_set_t *set)
{ int j, updated = 0;
/* * Just keep the 1st CPU now. * * In future, auto affinity selection can be tried.
*/ for (j = 0; j < CPU_SETSIZE; j++) { if (CPU_ISSET(j, set)) { if (!updated) {
updated = 1; continue;
}
CPU_CLR(j, set);
}
}
}
/* Caller must free the allocated buffer */ staticint ublk_ctrl_get_affinity(struct ublk_dev *ctrl_dev, cpu_set_t **ptr_buf)
{ struct ublk_ctrl_cmd_data data = {
.cmd_op = UBLK_U_CMD_GET_QUEUE_AFFINITY,
.flags = CTRL_CMD_HAS_DATA | CTRL_CMD_HAS_BUF,
};
cpu_set_t *buf; int i, ret;
buf = malloc(sizeof(cpu_set_t) * ctrl_dev->dev_info.nr_hw_queues); if (!buf) return -ENOMEM;
for (i = 0; i < ctrl_dev->dev_info.nr_hw_queues; i++) {
data.data[0] = i;
data.len = sizeof(cpu_set_t);
data.addr = (__u64)&buf[i];
ret = __ublk_ctrl_cmd(ctrl_dev, &data); if (ret < 0) {
free(buf); return ret;
}
ublk_adjust_affinity(&buf[i]);
}
for (i = 0; i < info->nr_hw_queues; i++) {
ublk_print_cpu_set(&affinity[i], buf, sizeof(buf));
printf("\tqueue %u: affinity(%s)\n",
i, buf);
}
free(affinity);
}
/* only freed io can be issued */ if (!(io->flags & UBLKS_IO_FREE)) return 0;
/* * we issue because we need either fetching or committing or * getting data
*/ if (!(io->flags &
(UBLKS_IO_NEED_FETCH_RQ | UBLKS_IO_NEED_COMMIT_RQ_COMP | UBLKS_IO_NEED_GET_DATA))) return 0;
staticvoid ublk_submit_fetch_commands(struct ublk_thread *t)
{ struct ublk_queue *q; struct ublk_io *io; int i = 0, j = 0;
if (t->dev->per_io_tasks) { /* * Lexicographically order all the (qid,tag) pairs, with * qid taking priority (so (1,0) > (0,1)). Then make * this thread the daemon for every Nth entry in this * list (N is the number of threads), starting at this * thread's index. This ensures that each queue is * handled by as many ublk server threads as possible, * so that load that is concentrated on one or a few * queues can make use of all ublk server threads.
*/ conststruct ublksrv_ctrl_dev_info *dinfo = &t->dev->dev_info; int nr_ios = dinfo->nr_hw_queues * dinfo->queue_depth; for (i = t->idx; i < nr_ios; i += t->dev->nthreads) { int q_id = i / dinfo->queue_depth; int tag = i % dinfo->queue_depth;
q = &t->dev->q[q_id];
io = &q->ios[tag];
io->buf_index = j++;
ublk_queue_io_cmd(t, io);
}
} else { /* * Service exclusively the queue whose q_id matches our * thread index.
*/ struct ublk_queue *q = &t->dev->q[t->idx]; for (i = 0; i < q->q_depth; i++) {
io = &q->ios[i];
io->buf_index = i;
ublk_queue_io_cmd(t, io);
}
}
}
if (!fetch) {
t->state |= UBLKS_T_STOPPING;
io->flags &= ~UBLKS_IO_NEED_FETCH_RQ;
}
if (cqe->res == UBLK_IO_RES_OK) {
assert(tag < q->q_depth); if (q->tgt_ops->queue_io)
q->tgt_ops->queue_io(t, q, tag);
} elseif (cqe->res == UBLK_IO_RES_NEED_GET_DATA) {
io->flags |= UBLKS_IO_NEED_GET_DATA | UBLKS_IO_FREE;
ublk_queue_io_cmd(t, io);
} else { /* * COMMIT_REQ will be completed immediately since no fetching * piggyback is required. * * Marking IO_FREE only, then this io won't be issued since * we only issue io with (UBLKS_IO_FREE | UBLKSRV_NEED_*) *
* */
io->flags = UBLKS_IO_FREE;
}
}
staticvoid *ublk_io_handler_fn(void *data)
{ struct ublk_thread_info *info = data; struct ublk_thread *t = &info->dev->threads[info->idx]; int dev_id = info->dev->dev_info.dev_id; int ret;
t->dev = info->dev;
t->idx = info->idx;
ret = ublk_thread_init(t, info->extra_flags); if (ret) {
ublk_err("ublk dev %d thread %u init failed\n",
dev_id, t->idx); return NULL;
} /* IO perf is sensitive with queue pthread affinity on NUMA machine*/ if (info->affinity)
ublk_thread_set_sched_affinity(t, info->affinity);
sem_post(info->ready);
tinfo = calloc(sizeof(struct ublk_thread_info), dev->nthreads); if (!tinfo) return -ENOMEM;
sem_init(&ready, 0, 0);
ret = ublk_dev_prep(ctx, dev); if (ret) return ret;
ret = ublk_ctrl_get_affinity(dev, &affinity_buf); if (ret) return ret;
if (ctx->auto_zc_fallback)
extra_flags = UBLKS_Q_AUTO_BUF_REG_FALLBACK; if (ctx->no_ublk_fixed_fd)
extra_flags |= UBLKS_Q_NO_UBLK_FIXED_FD;
for (i = 0; i < dinfo->nr_hw_queues; i++) {
dev->q[i].dev = dev;
dev->q[i].q_id = i;
ret = ublk_queue_init(&dev->q[i], extra_flags); if (ret) {
ublk_err("ublk dev %d queue %d init queue failed\n",
dinfo->dev_id, i); goto fail;
}
}
for (i = 0; i < dev->nthreads; i++) {
tinfo[i].dev = dev;
tinfo[i].idx = i;
tinfo[i].ready = &ready;
tinfo[i].extra_flags = extra_flags;
/* * If threads are not tied 1:1 to queues, setting thread * affinity based on queue affinity makes little sense. * However, thread CPU affinity has significant impact * on performance, so to compare fairly, we'll still set * thread CPU affinity based on queue affinity where * possible.
*/ if (dev->nthreads == dinfo->nr_hw_queues)
tinfo[i].affinity = &affinity_buf[i];
pthread_create(&dev->threads[i].thread, NULL,
ublk_io_handler_fn,
&tinfo[i]);
}
for (i = 0; i < dev->nthreads; i++)
sem_wait(&ready);
free(tinfo);
free(affinity_buf);
/* everything is fine now, start us */ if (ctx->recovery)
ret = ublk_ctrl_end_user_recovery(dev, getpid()); else {
ublk_set_parameters(dev);
ret = ublk_ctrl_start_dev(dev, getpid());
} if (ret < 0) {
ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret); goto fail;
}
ublk_ctrl_get_info(dev); if (ctx->fg)
ublk_ctrl_dump(dev); else
ublk_send_dev_event(ctx, dev, dev->dev_info.dev_id);
/* wait until we are terminated */ for (i = 0; i < dev->nthreads; i++)
pthread_join(dev->threads[i].thread, &thread_ret);
fail: for (i = 0; i < dinfo->nr_hw_queues; i++)
ublk_queue_deinit(&dev->q[i]);
ublk_dev_unprep(dev);
ublk_dbg(UBLK_DBG_DEV, "%s exit\n", __func__);
return ret;
}
staticint wait_ublk_dev(constchar *path, int evt_mask, unsigned timeout)
{ #define EV_SIZE (sizeof(struct inotify_event)) #define EV_BUF_LEN (128 * (EV_SIZE + 16)) struct pollfd pfd; int fd, wd; int ret = -EINVAL; constchar *dev_name = basename(path);
ret = read(fd, buffer, EV_BUF_LEN); if (ret < 0) {
ublk_err("%s: read inotify fd failed\n", __func__); goto rm_watch;
}
while (i < ret) { struct inotify_event *event = (struct inotify_event *)&buffer[i];
ublk_dbg(UBLK_DBG_DEV, "%s: inotify event %x %s\n",
__func__, event->mask, event->name); if (event->mask & evt_mask) { if (!strcmp(event->name, dev_name)) {
ret = 0; goto rm_watch;
}
}
i += EV_SIZE + event->len;
}
}
rm_watch:
inotify_rm_watch(fd, wd);
fail:
close(fd); return ret;
}
staticint ublk_stop_io_daemon(conststruct ublk_dev *dev)
{ int daemon_pid = dev->dev_info.ublksrv_pid; int dev_id = dev->dev_info.dev_id; char ublkc[64]; int ret = 0;
if (daemon_pid < 0) return 0;
/* daemon may be dead already */ if (kill(daemon_pid, 0) < 0) goto wait;
/* ublk char device may be gone already */ if (access(ublkc, F_OK) != 0) goto wait;
/* Wait until ublk char device is closed, when the daemon is shutdown */
ret = wait_ublk_dev(ublkc, IN_CLOSE, 10); /* double check and since it may be closed before starting inotify */ if (ret == -ETIMEDOUT)
ret = kill(daemon_pid, 0) < 0;
wait:
waitpid(daemon_pid, NULL, 0);
ublk_dbg(UBLK_DBG_DEV, "%s: pid %d dev_id %d ret %d\n",
__func__, daemon_pid, dev_id, ret);
ops = ublk_find_tgt(tgt_type); if (!ops) {
ublk_err("%s: no such tgt type, type %s\n",
__func__, tgt_type);
ret = -ENODEV; goto fail;
}
if (nr_queues > UBLK_MAX_QUEUES || depth > UBLK_QUEUE_DEPTH) {
ublk_err("%s: invalid nr_queues or depth queues %u depth %u\n",
__func__, nr_queues, depth);
ret = -EINVAL; goto fail;
}
/* default to 1:1 threads:queues if nthreads is unspecified */ if (!nthreads)
nthreads = nr_queues;
if (nthreads > UBLK_MAX_THREADS) {
ublk_err("%s: %u is too many threads (max %u)\n",
__func__, nthreads, UBLK_MAX_THREADS);
ret = -EINVAL; goto fail;
}
if (nthreads != nr_queues && !ctx->per_io_tasks) {
ublk_err("%s: threads %u must be same as queues %u if " "not using per_io_tasks\n",
__func__, nthreads, nr_queues);
ret = -EINVAL; goto fail;
}
dev = ublk_ctrl_init(); if (!dev) {
ublk_err("%s: can't alloc dev id %d, type %s\n",
__func__, dev_id, tgt_type);
ret = -ENOMEM; goto fail;
}
/* kernel doesn't support get_features */
ret = ublk_ctrl_get_features(dev, &features); if (ret < 0) {
ret = -EINVAL; goto fail;
}
if (!(features & UBLK_F_CMD_IOCTL_ENCODE)) {
ret = -ENOTSUP; goto fail;
}
for (i = 0; i < MAX_BACK_FILES; i++) { if (ctx->files[i]) {
strcpy(dev->tgt.backing_file[i], ctx->files[i]);
dev->tgt.nr_backing_files++;
}
}
if (ctx->recovery)
ret = ublk_ctrl_start_user_recovery(dev); else
ret = ublk_ctrl_add_dev(dev); if (ret < 0) {
ublk_err("%s: can't add dev id %d, type %s ret %d\n",
__func__, dev_id, tgt_type, ret); goto fail;
}
ret = ublk_start_daemon(ctx, dev);
ublk_dbg(UBLK_DBG_DEV, "%s: daemon exit %d\b", ret); if (ret < 0)
ublk_ctrl_del_dev(dev);
fail: if (ret < 0)
ublk_send_dev_event(ctx, dev, -1); if (dev)
ublk_ctrl_deinit(dev); return ret;
}
staticint __cmd_dev_list(struct dev_ctx *ctx);
staticint cmd_dev_add(struct dev_ctx *ctx)
{ int res;
setsid();
res2 = fork(); if (res2 == 0) { /* prepare for detaching */
close(STDIN_FILENO);
close(STDOUT_FILENO);
close(STDERR_FILENO);
run:
res = __cmd_dev_add(ctx); return res;
} else { /* detached from the foreground task */ exit(EXIT_SUCCESS);
}
} elseif (res > 0) {
uint64_t id; int exit_code = EXIT_FAILURE;
res = read(ctx->_evtfd, &id, sizeof(id));
close(ctx->_evtfd); if (res == sizeof(id) && id != ERROR_EVTFD_DEVID) {
ctx->dev_id = id - 1; if (__cmd_dev_list(ctx) >= 0)
exit_code = EXIT_SUCCESS;
}
shmdt(ctx->shadow_dev);
shmctl(ctx->_shmid, IPC_RMID, NULL); /* wait for child and detach from it */
wait(NULL); if (exit_code == EXIT_FAILURE)
ublk_err("%s: command failed\n", __func__); exit(exit_code);
} else { exit(EXIT_FAILURE);
}
}
staticint __cmd_dev_del(struct dev_ctx *ctx)
{ int number = ctx->dev_id; struct ublk_dev *dev; int ret;
dev = ublk_ctrl_init();
dev->dev_info.dev_id = number;
ret = ublk_ctrl_get_info(dev); if (ret < 0) goto fail;
ret = ublk_ctrl_stop_dev(dev); if (ret < 0)
ublk_err("%s: stop dev %d failed ret %d\n", __func__, number, ret);
ret = ublk_stop_io_daemon(dev); if (ret < 0)
ublk_err("%s: stop daemon id %d dev %d, ret %d\n",
__func__, dev->dev_info.ublksrv_pid, number, ret);
ublk_ctrl_del_dev(dev);
fail:
ublk_ctrl_deinit(dev);
return (ret >= 0) ? 0 : ret;
}
staticint cmd_dev_del(struct dev_ctx *ctx)
{ int i;
if (ctx->dev_id >= 0 || !ctx->all) return __cmd_dev_del(ctx);
for (i = 0; i < 255; i++) {
ctx->dev_id = i;
__cmd_dev_del(ctx);
} return 0;
}
ret = ublk_ctrl_get_info(dev); if (ret < 0) { if (ctx->logging)
ublk_err("%s: can't get dev info from %d: %d\n",
__func__, ctx->dev_id, ret);
} else { if (ctx->shadow_dev)
memcpy(&dev->q, ctx->shadow_dev->q, sizeof(dev->q));
ublk_ctrl_dump(dev);
}
ublk_ctrl_deinit(dev);
return ret;
}
staticint cmd_dev_list(struct dev_ctx *ctx)
{ int i;
if (ctx->dev_id >= 0 || !ctx->all) return __cmd_dev_list(ctx);
ctx->logging = false; for (i = 0; i < 255; i++) {
ctx->dev_id = i;
__cmd_dev_list(ctx);
} return 0;
}
staticvoid cmd_recover_help(char *exe)
{
__cmd_create_help(exe, true);
printf("\tPlease provide exact command line for creating this device with real dev_id\n");
printf("\n");
}
printf("%s del [-n dev_id] -a \n", exe);
printf("\t -a delete all devices -n delete specified device\n\n");
printf("%s list [-n dev_id] -a \n", exe);
printf("\t -a list all devices, -n list specified device, default -a \n\n");
printf("%s features\n", exe);
printf("%s update_size -n dev_id -s|--size size_in_bytes \n", exe);
printf("%s quiesce -n dev_id\n", exe); return 0;
}
/* auto_zc_fallback depends on F_AUTO_BUF_REG & F_SUPPORT_ZERO_COPY */ if (ctx.auto_zc_fallback &&
!((ctx.flags & UBLK_F_AUTO_BUF_REG) &&
(ctx.flags & UBLK_F_SUPPORT_ZERO_COPY))) {
ublk_err("%s: auto_zc_fallback is set but neither " "F_AUTO_BUF_REG nor F_SUPPORT_ZERO_COPY is enabled\n",
__func__); return -EINVAL;
}
i = optind; while (i < argc && ctx.nr_files < MAX_BACK_FILES) {
ctx.files[ctx.nr_files++] = argv[i++];
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.