/* * Unique, monotonically increasing sequential number associated with block * devices instances (i.e. incremented each time a device is attached). * Associating uevents with block devices in userspace is difficult and racy: * the uevent netlink socket is lossy, and on slow and overloaded systems has * a very high latency. * Block devices do not have exclusive owners in userspace, any process can set * one up (e.g. loop devices). Moreover, device names can be reused (e.g. loop0 * can be reused again and again). * A userspace process setting up a block device and watching for its events * cannot thus reliably tell whether an event relates to the device it just set * up or another earlier instance with the same name. * This sequential number allows userspace processes to solve this problem, and * uniquely associate an uevent to the lifetime to a device.
*/ static atomic64_t diskseq;
/* for extended dynamic devt allocation, currently only one major is used */ #define NR_EXT_DEVT (1 << MINORBITS) static DEFINE_IDA(ext_devt_ida);
void set_capacity(struct gendisk *disk, sector_t sectors)
{ if (sectors > BLK_DEV_MAX_SECTORS) {
pr_warn_once("%s: truncate capacity from %lld to %lld\n",
disk->disk_name, sectors,
BLK_DEV_MAX_SECTORS);
sectors = BLK_DEV_MAX_SECTORS;
}
/* * Set disk capacity and notify if the size is not currently zero and will not * be set to zero. Returns true if a uevent was sent, otherwise false.
*/ bool set_capacity_and_notify(struct gendisk *disk, sector_t size)
{
sector_t capacity = get_capacity(disk); char *envp[] = { "RESIZE=1", NULL };
set_capacity(disk, size);
/* * Only print a message and send a uevent if the gendisk is user visible * and alive. This avoids spamming the log and udev when setting the * initial capacity during probing.
*/ if (size == capacity ||
!disk_live(disk) ||
(disk->flags & GENHD_FL_HIDDEN)) returnfalse;
pr_info("%s: detected capacity change from %lld to %lld\n",
disk->disk_name, capacity, size);
/* * Historically we did not send a uevent for changes to/from an empty * device.
*/ if (!capacity || !size) returnfalse;
kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp); returntrue;
}
EXPORT_SYMBOL_GPL(set_capacity_and_notify);
staticvoid part_stat_read_all(struct block_device *part, struct disk_stats *stat)
{ int cpu;
/* * While iterating all CPUs, some IOs may be issued from a CPU already * traversed and complete on a CPU that has not yet been traversed, * causing the inflight number to be negative.
*/
inflight[READ] = read > 0 ? read : 0;
inflight[WRITE] = write > 0 ? write : 0;
}
/** * bdev_count_inflight - get the number of inflight IOs for a block device. * * @part: the block device. * * Inflight here means started IO accounting, from bdev_start_io_acct() for * bio-based block device, and from blk_account_io_start() for rq-based block * device.
*/ unsignedint bdev_count_inflight(struct block_device *part)
{ unsignedint inflight[2] = {0};
/* index in the above - for now: assume no multimajor ranges */ staticinlineint major_to_index(unsigned major)
{ return major % BLKDEV_MAJOR_HASH_SIZE;
}
/** * __register_blkdev - register a new block device * * @major: the requested major device number [1..BLKDEV_MAJOR_MAX-1]. If * @major = 0, try to allocate any unused major number. * @name: the name of the new block device as a zero terminated string * @probe: pre-devtmpfs / pre-udev callback used to create disks when their * pre-created device node is accessed. When a probe call uses * add_disk() and it fails the driver must cleanup resources. This * interface may soon be removed. * * The @name must be unique within the system. * * The return value depends on the @major input parameter: * * - if a major device number was requested in range [1..BLKDEV_MAJOR_MAX-1] * then the function returns zero on success, or a negative error code * - if any unused major number was requested with @major = 0 parameter * then the return value is the allocated major number in range * [1..BLKDEV_MAJOR_MAX-1] or a negative error code otherwise * * See Documentation/admin-guide/devices.txt for the list of allocated * major numbers. * * Use register_blkdev instead for any new code.
*/ int __register_blkdev(unsignedint major, constchar *name, void (*probe)(dev_t devt))
{ struct blk_major_name **n, *p; int index, ret = 0;
mutex_lock(&major_names_lock);
/* temporary */ if (major == 0) { for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) { if (major_names[index] == NULL) break;
}
if (index == 0) {
printk("%s: failed to get major for %s\n",
__func__, name);
ret = -EBUSY; goto out;
}
major = index;
ret = major;
}
if (major >= BLKDEV_MAJOR_MAX) {
pr_err("%s: major requested (%u) is greater than the maximum (%u) for %s\n",
__func__, major, BLKDEV_MAJOR_MAX-1, name);
ret = -EINVAL; goto out;
}
p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL); if (p == NULL) {
ret = -ENOMEM; goto out;
}
int disk_scan_partitions(struct gendisk *disk, blk_mode_t mode)
{ struct file *file; int ret = 0;
if (!disk_has_partscan(disk)) return -EINVAL; if (disk->open_partitions) return -EBUSY;
/* * If the device is opened exclusively by current thread already, it's * safe to scan partitons, otherwise, use bd_prepare_to_claim() to * synchronize with other exclusive openers and other partition * scanners.
*/ if (!(mode & BLK_OPEN_EXCL)) {
ret = bd_prepare_to_claim(disk->part0, disk_scan_partitions,
NULL); if (ret) return ret;
}
set_bit(GD_NEED_PART_SCAN, &disk->state);
file = bdev_file_open_by_dev(disk_devt(disk), mode & ~BLK_OPEN_EXCL,
NULL, NULL); if (IS_ERR(file))
ret = PTR_ERR(file); else
fput(file);
/* * If blkdev_get_by_dev() failed early, GD_NEED_PART_SCAN is still set, * and this will cause that re-assemble partitioned raid device will * creat partition for underlying disk.
*/
clear_bit(GD_NEED_PART_SCAN, &disk->state); if (!(mode & BLK_OPEN_EXCL))
bd_abort_claiming(disk->part0, disk_scan_partitions); return ret;
}
if (!(disk->flags & GENHD_FL_HIDDEN)) { /* Make sure the first partition scan will be proceed */ if (get_capacity(disk) && disk_has_partscan(disk))
set_bit(GD_NEED_PART_SCAN, &disk->state);
bdev_add(disk->part0, ddev->devt); if (get_capacity(disk))
disk_scan_partitions(disk, BLK_OPEN_READ);
/* * Announce the disk and partitions after all partitions are * created. (for hidden disks uevents remain suppressed forever)
*/
dev_set_uevent_suppress(ddev, 0);
disk_uevent(disk, KOBJ_ADD);
}
{ struct device *ddev = disk_to_dev(disk); int ret;
if (WARN_ON_ONCE(bdev_nr_sectors(disk->part0) > BLK_DEV_MAX_SECTORS)) return -EINVAL;
if (queue_is_mq(disk->queue)) { /* * ->submit_bio and ->poll_bio are bypassed for blk-mq drivers.
*/ if (disk->fops->submit_bio || disk->fops->poll_bio) return -EINVAL;
} else { if (!disk->fops->submit_bio) return -EINVAL;
bdev_set_flag(disk->part0, BD_HAS_SUBMIT_BIO);
}
/* * If the driver provides an explicit major number it also must provide * the number of minors numbers supported, and those will be used to * setup the gendisk. * Otherwise just allocate the device numbers for both the whole device * and all partitions from the extended dev_t space.
*/
ret = -EINVAL; if (disk->major) { if (WARN_ON(!disk->minors)) goto out;
if (disk->minors > DISK_MAX_PARTS) {
pr_err("block: can't allocate more than %d partitions\n",
DISK_MAX_PARTS);
disk->minors = DISK_MAX_PARTS;
} if (disk->first_minor > MINORMASK ||
disk->minors > MINORMASK + 1 ||
disk->first_minor + disk->minors > MINORMASK + 1) goto out;
} else { if (WARN_ON(disk->minors)) goto out;
ret = blk_alloc_ext_minor(); if (ret < 0) goto out;
disk->major = BLOCK_EXT_MAJOR;
disk->first_minor = ret;
}
/* delay uevents, until we scanned partition table */
dev_set_uevent_suppress(ddev, 1);
ddev->parent = parent;
ddev->groups = groups;
dev_set_name(ddev, "%s", disk->disk_name); if (fwnode)
device_set_node(ddev, fwnode); if (!(disk->flags & GENHD_FL_HIDDEN))
ddev->devt = MKDEV(disk->major, disk->first_minor);
ret = device_add(ddev); if (ret) goto out_free_ext_minor;
ret = disk_alloc_events(disk); if (ret) goto out_device_del;
ret = sysfs_create_link(block_depr, &ddev->kobj,
kobject_name(&ddev->kobj)); if (ret) goto out_device_del;
/* * avoid probable deadlock caused by allocating memory with * GFP_KERNEL in runtime_resume callback of its all ancestor * devices
*/
pm_runtime_set_memalloc_noio(ddev, true);
disk->part0->bd_holder_dir =
kobject_create_and_add("holders", &ddev->kobj); if (!disk->part0->bd_holder_dir) {
ret = -ENOMEM; goto out_del_block_link;
}
disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); if (!disk->slave_dir) {
ret = -ENOMEM; goto out_put_holder_dir;
}
ret = blk_register_queue(disk); if (ret) goto out_put_slave_dir;
if (!(disk->flags & GENHD_FL_HIDDEN)) {
ret = bdi_register(disk->bdi, "%u:%u",
disk->major, disk->first_minor); if (ret) goto out_unregister_queue;
bdi_set_owner(disk->bdi, ddev);
ret = sysfs_create_link(&ddev->kobj,
&disk->bdi->dev->kobj, "bdi"); if (ret) goto out_unregister_bdi;
} else { /* * Even if the block_device for a hidden gendisk is not * registered, it needs to have a valid bd_dev so that the * freeing of the dynamic major works.
*/
disk->part0->bd_dev = MKDEV(disk->major, disk->first_minor);
} return 0;
/** * add_disk_fwnode - add disk information to kernel list with fwnode * @parent: parent device for the disk * @disk: per-device partitioning information * @groups: Additional per-device sysfs groups * @fwnode: attached disk fwnode * * This function registers the partitioning information in @disk * with the kernel. Also attach a fwnode to the disk device.
*/ int __must_check add_disk_fwnode(struct device *parent, struct gendisk *disk, conststruct attribute_group **groups, struct fwnode_handle *fwnode)
{ struct blk_mq_tag_set *set; unsignedint memflags; int ret;
if (queue_is_mq(disk->queue)) {
set = disk->queue->tag_set;
memflags = memalloc_noio_save();
down_read(&set->update_nr_hwq_lock);
ret = __add_disk(parent, disk, groups, fwnode);
up_read(&set->update_nr_hwq_lock);
memalloc_noio_restore(memflags);
} else {
ret = __add_disk(parent, disk, groups, fwnode);
}
/* * add_disk_final() needn't to read `nr_hw_queues`, so move it out * of read lock `set->update_nr_hwq_lock` for avoiding unnecessary * lock dependency on `disk->open_mutex` from scanning partition.
*/ if (!ret)
add_disk_final(disk); return ret;
}
EXPORT_SYMBOL_GPL(add_disk_fwnode);
/** * device_add_disk - add disk information to kernel list * @parent: parent device for the disk * @disk: per-device partitioning information * @groups: Additional per-device sysfs groups * * This function registers the partitioning information in @disk * with the kernel.
*/ int __must_check device_add_disk(struct device *parent, struct gendisk *disk, conststruct attribute_group **groups)
{ return add_disk_fwnode(parent, disk, groups, NULL);
}
EXPORT_SYMBOL(device_add_disk);
/* * On surprise disk removal, bdev_mark_dead() may call into file * systems below. Make it clear that we're expecting to not hold * disk->open_mutex.
*/
lockdep_assert_not_held(&disk->open_mutex);
rcu_read_lock();
xa_for_each(&disk->part_tbl, idx, bdev) { if (!kobject_get_unless_zero(&bdev->bd_device.kobj)) continue;
rcu_read_unlock();
staticbool __blk_mark_disk_dead(struct gendisk *disk)
{ /* * Fail any new I/O.
*/ if (test_and_set_bit(GD_DEAD, &disk->state)) returnfalse;
if (test_bit(GD_OWNS_QUEUE, &disk->state))
blk_queue_flag_set(QUEUE_FLAG_DYING, disk->queue);
/* * Stop buffered writers from dirtying pages that can't be written out.
*/
set_capacity(disk, 0);
/* * Prevent new I/O from crossing bio_queue_enter().
*/ return blk_queue_start_drain(disk->queue);
}
/** * blk_mark_disk_dead - mark a disk as dead * @disk: disk to mark as dead * * Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O * to this disk.
*/ void blk_mark_disk_dead(struct gendisk *disk)
{
__blk_mark_disk_dead(disk);
blk_report_disk_dead(disk, true);
}
EXPORT_SYMBOL_GPL(blk_mark_disk_dead);
if (WARN_ON_ONCE(!disk_live(disk) && !(disk->flags & GENHD_FL_HIDDEN))) return;
disk_del_events(disk);
/* * Prevent new openers by unlinked the bdev inode.
*/
mutex_lock(&disk->open_mutex);
xa_for_each(&disk->part_tbl, idx, part)
bdev_unhash(part);
mutex_unlock(&disk->open_mutex);
/* * Tell the file system to write back all dirty data and shut down if * it hasn't been notified earlier.
*/ if (!test_bit(GD_DEAD, &disk->state))
blk_report_disk_dead(disk, false);
/* * Drop all partitions now that the disk is marked dead.
*/
mutex_lock(&disk->open_mutex);
start_drain = __blk_mark_disk_dead(disk); if (start_drain)
blk_freeze_acquire_lock(q);
xa_for_each_start(&disk->part_tbl, idx, part, 1)
drop_partition(part);
mutex_unlock(&disk->open_mutex);
if (!(disk->flags & GENHD_FL_HIDDEN)) {
sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
/* * Unregister bdi before releasing device numbers (as they can * get reused and we'd get clashes in sysfs).
*/
bdi_unregister(disk->bdi);
}
/* * If the disk does not own the queue, allow using passthrough requests * again. Else leave the queue frozen to fail all I/O.
*/ if (!test_bit(GD_OWNS_QUEUE, &disk->state))
__blk_mq_unfreeze_queue(q, true); elseif (queue_is_mq(q))
blk_mq_exit_queue(q);
/** * del_gendisk - remove the gendisk * @disk: the struct gendisk to remove * * Removes the gendisk and all its associated resources. This deletes the * partitions associated with the gendisk, and unregisters the associated * request_queue. * * This is the counter to the respective __device_add_disk() call. * * The final removal of the struct gendisk happens when its refcount reaches 0 * with put_disk(), which should be called after del_gendisk(), if * __device_add_disk() was used. * * Drivers exist which depend on the release of the gendisk to be synchronous, * it should not be deferred. * * Context: can sleep
*/ void del_gendisk(struct gendisk *disk)
{ struct blk_mq_tag_set *set; unsignedint memflags;
if (!queue_is_mq(disk->queue)) {
__del_gendisk(disk);
} else {
set = disk->queue->tag_set;
/** * invalidate_disk - invalidate the disk * @disk: the struct gendisk to invalidate * * A helper to invalidates the disk. It will clean the disk's associated * buffer/page caches and reset its internal states so that the disk * can be reused by the drivers. * * Context: can sleep
*/ void invalidate_disk(struct gendisk *disk)
{ struct block_device *bdev = disk->part0;
iter = kmalloc(sizeof(*iter), GFP_KERNEL); if (!iter) return ERR_PTR(-ENOMEM);
seqf->private = iter;
class_dev_iter_init(iter, &block_class, NULL, &disk_type); do {
dev = class_dev_iter_next(iter); if (!dev) return NULL;
} while (skip--);
/* * Show the number of IOs issued to driver. * For bio-based device, started from bdev_start_io_acct(); * For rq-based device, started from blk_mq_start_request();
*/
ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr, char *buf)
{ struct block_device *bdev = dev_to_bdev(dev); struct request_queue *q = bdev_get_queue(bdev); unsignedint inflight[2] = {0};
/** * disk_release - releases all allocated resources of the gendisk * @dev: the device representing this disk * * This function releases all allocated resources of the gendisk. * * Drivers which used __device_add_disk() have a gendisk with a request_queue * assigned. Since the request_queue sits on top of the gendisk for these * drivers we also call blk_put_queue() for them, and we expect the * request_queue refcount to reach 0 at this point, and so the request_queue * will also be freed prior to the disk. * * Context: can sleep
*/ staticvoid disk_release(struct device *dev)
{ struct gendisk *disk = dev_to_disk(dev);
might_sleep();
WARN_ON_ONCE(disk_live(disk));
blk_trace_remove(disk->queue);
/* * To undo the all initialization from blk_mq_init_allocated_queue in * case of a probe failure where add_disk is never called we have to * call blk_mq_exit_queue here. We can't do this for the more common * teardown case (yet) as the tagset can be gone by the time the disk * is released once it was added.
*/ if (queue_is_mq(disk->queue) &&
test_bit(GD_OWNS_QUEUE, &disk->state) &&
!test_bit(GD_ADDED, &disk->state))
blk_mq_exit_queue(disk->queue);
#ifdef CONFIG_PROC_FS /* * aggregate disk stat collector. Uses the same stats that the sysfs * entries do, above, but makes them available through one seq_file. * * The output looks suspiciously like /proc/partitions with a bunch of * extra fields.
*/ staticint diskstats_show(struct seq_file *seqf, void *v)
{ struct gendisk *gp = v; struct block_device *hd; unsignedint inflight; struct disk_stats stat; unsignedlong idx;
/* if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next) seq_puts(seqf, "major minor name" " rio rmerge rsect ruse wio wmerge " "wsect wuse running use aveq" "\n\n");
*/
rcu_read_lock();
xa_for_each(&gp->part_tbl, idx, hd) { if (bdev_is_partition(hd) && !bdev_nr_sectors(hd)) continue;
disk = __alloc_disk_node(q, node, lkclass); if (!disk) {
blk_put_queue(q); return ERR_PTR(-ENOMEM);
}
set_bit(GD_OWNS_QUEUE, &disk->state); return disk;
}
EXPORT_SYMBOL(__blk_alloc_disk);
/** * put_disk - decrements the gendisk refcount * @disk: the struct gendisk to decrement the refcount for * * This decrements the refcount for the struct gendisk. When this reaches 0 * we'll have disk_release() called. * * Note: for blk-mq disk put_disk must be called before freeing the tag_set * when handling probe errors (that is before add_disk() is called). * * Context: Any context, but the last reference must not be dropped from * atomic context.
*/ void put_disk(struct gendisk *disk)
{ if (disk)
put_device(disk_to_dev(disk));
}
EXPORT_SYMBOL(put_disk);
if (!ro)
event[8] = '0';
kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
}
/** * set_disk_ro - set a gendisk read-only * @disk: gendisk to operate on * @read_only: %true to set the disk read-only, %false set the disk read/write * * This function is used to indicate whether a given disk device should have its * read-only flag set. set_disk_ro() is typically used by device drivers to * indicate whether the underlying physical device is write-protected.
*/ void set_disk_ro(struct gendisk *disk, bool read_only)
{ if (read_only) { if (test_and_set_bit(GD_READ_ONLY, &disk->state)) return;
} else { if (!test_and_clear_bit(GD_READ_ONLY, &disk->state)) return;
}
set_disk_ro_uevent(disk, read_only);
}
EXPORT_SYMBOL(set_disk_ro);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.