// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2018 Cambridge Greys Ltd * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com) * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
*/
/* 2001-09-28...2002-04-17 * Partition stuff by James_McMechan@hotmail.com * old style ubd by setting UBD_SHIFT to 0 * 2002-09-27...2002-10-18 massive tinkering for 2.5 * partitions have changed in 2.5 * 2003-01-29 more tinkering for 2.5.59-1 * This should now address the sysfs problems and has * the symlink for devfs to allow for booting with * the common /dev/ubd/discX/... names rather than * only /dev/ubdN/discN this version also has lots of * clean ups preparing for ubd-many. * James McMechan
*/
/* If *index_out == -1 at exit, the passed option was a general one; * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it * should not be freed on exit.
*/ staticint ubd_setup_common(char *str, int *index_out, char **error_out)
{ struct ubd *ubd_dev; struct openflags flags = global_openflags; char *file, *backing_file, *serial; int n, err = 0, i;
__setup("ubd", ubd_setup);
__uml_help(ubd_setup, "ubd=[(:|,)][(:|,)]\n" " This is used to associate a device with a file in the underlying\n" " filesystem. When specifying two filenames, the first one is the\n" " COW name and the second is the backing file name. As separator you can\n" " use either a ':' or a ',': the first one allows writing things like;\n" " ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n" " while with a ',' the shell would not expand the 2nd '~'.\n" " When using only one filename, UML will detect whether to treat it like\n" " a COW file or a backing file. To override this detection, add the 'd'\n" " flag:\n" " ubd0d=BackingFile\n" " Usually, there is a filesystem in the file, but \n" " that's not required. Swap devices containing swap files can be\n" " specified like this. Also, a file which doesn't contain a\n" " filesystem can have its contents read in the virtual \n" " machine by running 'dd' on the device. must be in the range\n" " 0 to 7. Appending an 'r' to the number will cause that device\n" " to be mounted read-only. For example ubd1r=./ext_fs. Appending\n" " an 's' will cause data to be written to disk on the host immediately.\n" " 'c' will cause the device to be treated as being shared between multiple\n" " UMLs and file locking will be turned off - this is appropriate for a\n" " cluster filesystem and inappropriate at almost all other times.\n\n" " 't' will disable trim/discard support on the device (enabled by default).\n\n" " An optional device serial number can be exposed using the serial parameter\n" " on the cmdline which is exposed as a sysfs entry. This is particularly\n" " useful when a unique number should be given to the device. Note when\n" " specifying a label, the filename2 must be also presented. It can be\n" " an empty string, in which case the backing file is not used:\n" " ubd0=File,,Serial\n"
);
staticint udb_setup(char *str)
{
printk("udb%s specified on command line is almost certainly a ubd -> " "udb TYPO\n", str); return 1;
}
__setup("udb", udb_setup);
__uml_help(udb_setup, "udb\n" " This option is here solely to catch ubd -> udb typos, which can be\n" " to impossible to catch visually unless you specifically look for\n" " them. The only result of any option starting with 'udb' is an error\n" " in the boot output.\n\n"
);
/* Only changed by ubd_init, which is an initcall. */ staticint thread_fd = -1;
/* Function to read several request pointers at a time * handling fractional reads if (and as) needed
*/
staticint bulk_req_safe_read( int fd, struct io_thread_req * (*request_buffer)[], struct io_thread_req **remainder, int *remainder_size, int max_recs
)
{ int n = 0; int res = 0;
if (*remainder_size > 0) {
memmove(
(char *) request_buffer,
(char *) remainder, *remainder_size
);
n = *remainder_size;
}
res = os_read_file(
fd,
((char *) request_buffer) + *remainder_size, sizeof(struct io_thread_req *)*max_recs
- *remainder_size
); if (res > 0) {
n += res; if ((n % sizeof(struct io_thread_req *)) > 0) { /* * Read somehow returned not a multiple of dword * theoretically possible, but never observed in the * wild, so read routine must be able to handle it
*/
*remainder_size = n % sizeof(struct io_thread_req *);
WARN(*remainder_size > 0, "UBD IPC read returned a partial result");
memmove(
remainder,
((char *) request_buffer) +
(n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *),
*remainder_size
);
n = n - *remainder_size;
}
} else {
n = res;
} return n;
}
static irqreturn_t ubd_intr(int irq, void *dev)
{ int len, i;
while ((len = bulk_req_safe_read(thread_fd, irq_req_buffer,
&irq_remainder, &irq_remainder_size,
UBD_REQ_BUFFER_SIZE)) >= 0) { for (i = 0; i < len / sizeof(struct io_thread_req *); i++)
ubd_end_request((*irq_req_buffer)[i]);
}
if (len < 0 && len != -EAGAIN)
pr_err("spurious interrupt in %s, err = %d\n", __func__, len); return IRQ_HANDLED;
}
/* Only changed by ubd_init, which is an initcall. */ staticstruct os_helper_thread *io_td;
staticvoid kill_io_thread(void)
{ if (io_td)
os_kill_helper_thread(io_td);
}
__uml_exitcall(kill_io_thread);
staticinlineint ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
{ char *file; int fd; int err;
__u32 version;
__u32 align; char *backing_file;
time64_t mtime; unsignedlonglong size; int sector_size; int bitmap_offset;
if (ubd_dev->file && ubd_dev->cow.file) {
file = ubd_dev->cow.file;
err = os_file_modtime(file, &modtime); if (err < 0) {
printk(KERN_ERR "Failed to get modification time of backing " "file \"%s\", err = %d\n", file, -err); return err;
}
err = os_file_size(file, &actual); if (err < 0) {
printk(KERN_ERR "Failed to get size of backing file \"%s\", " "err = %d\n", file, -err); return err;
}
if (actual != size) { /*__u64 can be a long on AMD64 and with %lu GCC complains; so
* the typecast.*/
printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header " "vs backing file\n", (unsignedlonglong) size, actual); return -EINVAL;
} if (modtime != mtime) {
printk(KERN_ERR "mtime mismatch (%lld vs %lld) of COW header vs " "backing file\n", mtime, modtime); return -EINVAL;
} return 0;
}
staticint create_cow_file(char *cow_file, char *backing_file, struct openflags flags, int sectorsize, int alignment, int *bitmap_offset_out, unsignedlong *bitmap_len_out, int *data_offset_out)
{ int err, fd;
staticint ubd_config(char *str, char **error_out)
{ int n, ret;
/* This string is possibly broken up and stored, so it's only * freed if ubd_setup_common fails, or if only general options * were set.
*/
str = kstrdup(str, GFP_KERNEL); if (str == NULL) {
*error_out = "Failed to allocate memory"; return -ENOMEM;
}
ret = ubd_setup_common(str, &n, error_out); if (ret) goto err_free;
if (n == -1) {
ret = 0; goto err_free;
}
mutex_lock(&ubd_lock);
ret = ubd_add(n, error_out); if (ret)
ubd_devs[n].file = NULL;
mutex_unlock(&ubd_lock);
out: return ret;
err_free:
kfree(str); goto out;
}
staticint ubd_get_config(char *name, char *str, int size, char **error_out)
{ struct ubd *ubd_dev; int n, len = 0;
n = parse_unit(&name); if((n >= MAX_DEV) || (n < 0)){
*error_out = "ubd_get_config : device number out of range"; return -1;
}
/* All these are called by mconsole in process context and without * ubd-specific locks. The structure itself is const except for .list.
*/ staticstruct mc_device ubd_mc = {
.list = LIST_HEAD_INIT(ubd_mc.list),
.name = "ubd",
.config = ubd_config,
.get_config = ubd_get_config,
.id = ubd_id,
.remove = ubd_remove,
};
/* This takes care of the case where we're exactly at the end of the * device, and *cow_offset + 1 is off the end. So, just back it up * by one word. Thanks to Lynn Kerby for the fix and James McMechan * for the original diagnosis.
*/ if (*cow_offset == (DIV_ROUND_UP(bitmap_len, sizeof(unsignedlong)) - 1))
(*cow_offset)--;
io_req = ubd_alloc_req(dev, req, segs); if (!io_req) return -ENOMEM;
io_req->desc_cnt = segs; if (segs)
ubd_map_req(dev, io_req, req);
ret = os_write_file(thread_fd, &io_req, sizeof(io_req)); if (ret != sizeof(io_req)) { if (ret != -EAGAIN)
pr_err("write to io thread failed: %d\n", -ret);
kfree(io_req);
} return ret;
}
static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, conststruct blk_mq_queue_data *bd)
{ struct ubd *ubd_dev = hctx->queue->queuedata; struct request *req = bd->rq; int ret = 0, res = BLK_STS_OK;
blk_mq_start_request(req);
spin_lock_irq(&ubd_dev->lock);
switch (req_op(req)) { case REQ_OP_FLUSH: case REQ_OP_READ: case REQ_OP_WRITE: case REQ_OP_DISCARD: case REQ_OP_WRITE_ZEROES:
ret = ubd_submit_request(ubd_dev, req); break; default:
WARN_ON_ONCE(1);
res = BLK_STS_NOTSUPP;
}
spin_unlock_irq(&ubd_dev->lock);
if (ret < 0) { if (ret == -ENOMEM)
res = BLK_STS_RESOURCE; else
res = BLK_STS_DEV_RESOURCE;
}
staticint map_error(int error_code)
{ switch (error_code) { case 0: return BLK_STS_OK; case ENOSYS: case EOPNOTSUPP: return BLK_STS_NOTSUPP; case ENOSPC: return BLK_STS_NOSPC;
} return BLK_STS_IOERR;
}
/* * Everything from here onwards *IS NOT PART OF THE KERNEL* * * The following functions are part of UML hypervisor code. * All functions from here onwards are executed as a helper * thread and are not allowed to execute any kernel functions. * * Any communication must occur strictly via shared memory and IPC. * * Do not add printks, locks, kernel memory operations, etc - it * will result in unpredictable behaviour and/or crashes.
*/
staticint update_bitmap(struct io_thread_req *req, struct io_desc *segment)
{ int n;
if (segment->cow_offset == -1) return map_error(0);
n = os_pwrite_file(req->fds[1], &segment->bitmap_words, sizeof(segment->bitmap_words), segment->cow_offset); if (n != sizeof(segment->bitmap_words)) return map_error(-n);
/* FLUSH is really a special case, we cannot "case" it with others */
if (req_op(req->req) == REQ_OP_FLUSH) { /* fds[0] is always either the rw image or our cow file */
req->error = map_error(-os_sync_file(req->fds[0])); return;
}
nsectors = desc->length / req->sectorsize;
start = 0; do {
bit = ubd_test_bit(start, (unsignedchar *) &desc->sector_mask);
end = start; while((end < nsectors) &&
(ubd_test_bit(end, (unsignedchar *) &desc->sector_mask) == bit))
end++;
off = req->offset + req->offsets[bit] +
start * req->sectorsize;
len = (end - start) * req->sectorsize; if (desc->buffer != NULL)
buf = &desc->buffer[start * req->sectorsize];
switch (req_op(req->req)) { case REQ_OP_READ:
n = 0; do {
buf = &buf[n];
len -= n;
n = os_pread_file(req->fds[bit], buf, len, off); if (n < 0) {
req->error = map_error(-n); return;
}
} while((n < len) && (n != 0)); if (n < len) memset(&buf[n], 0, len - n); break; case REQ_OP_WRITE:
n = os_pwrite_file(req->fds[bit], buf, len, off); if(n != len){
req->error = map_error(-n); return;
} break; case REQ_OP_DISCARD:
n = os_falloc_punch(req->fds[bit], off, len); if (n) {
req->error = map_error(-n); return;
} break; case REQ_OP_WRITE_ZEROES:
n = os_falloc_zeroes(req->fds[bit], off, len); if (n) {
req->error = map_error(-n); return;
} break; default:
WARN_ON_ONCE(1);
req->error = BLK_STS_NOTSUPP; return;
}
/* Changed in start_io_thread, which is serialized by being called only * from ubd_init, which is an initcall.
*/ int kernel_fd = -1;
/* Only changed by the io thread. XXX: currently unused. */ staticint io_count;
void *io_thread(void *arg)
{ int n, count, written, res;
os_fix_helper_thread_signals();
while(1){
n = bulk_req_safe_read(
kernel_fd,
io_req_buffer,
&io_remainder,
&io_remainder_size,
UBD_REQ_BUFFER_SIZE
); if (n <= 0) { if (n == -EAGAIN)
ubd_read_poll(-1);
continue;
}
for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { struct io_thread_req *req = (*io_req_buffer)[count]; int i;
io_count++; for (i = 0; !req->error && i < req->desc_cnt; i++)
do_io(req, &(req->io_desc[i]));
}
written = 0;
do {
res = os_write_file(kernel_fd,
((char *) io_req_buffer) + written,
n - written); if (res >= 0) {
written += res;
} if (written < n) {
ubd_write_poll(-1);
}
} while (written < n);
}
return NULL;
}
Messung V0.5
¤ Dauer der Verarbeitung: 0.15 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.