/* * Compressed RAM block device * * Copyright (C) 2008, 2009, 2010 Nitin Gupta * 2012, 2013 Minchan Kim * * This code is released using a dual license strategy: BSD/GPL * You can choose the licence that better fits your requirements. * * Released under the terms of 3-clause BSD License * Released under the terms of GNU General Public License Version 2.0 *
*/
/* Module params (documentation at end) */ staticunsignedint num_devices = 1; /* * Pages that compress to sizes equals or greater than this are stored * uncompressed in memory.
*/ static size_t huge_class_size;
/* * A post-processing bucket is, essentially, a size class, this defines * the range (in bytes) of pp-slots sizes in particular bucket.
*/ #define PP_BUCKET_SIZE_RANGE 64 #define NUM_PP_BUCKETS ((PAGE_SIZE / PP_BUCKET_SIZE_RANGE) + 1)
/* The higher the bucket id the more optimal slot post-processing is */ while (idx >= 0) {
pps = list_first_entry_or_null(&ctl->pp_buckets[idx], struct zram_pp_slot,
entry); if (pps) break;
err = kstrtoul(buf, 10, &val); if (err || val != 0) return -EINVAL;
down_read(&zram->init_lock); if (init_done(zram)) {
atomic_long_set(&zram->stats.max_used_pages,
zs_get_total_pages(zram->mem_pool));
}
up_read(&zram->init_lock);
return len;
}
/* * Mark all pages which are older than or equal to cutoff as IDLE. * Callers should hold the zram init lock in read mode
*/ staticvoid mark_idle(struct zram *zram, ktime_t cutoff)
{ int is_idle = 1; unsignedlong nr_pages = zram->disksize >> PAGE_SHIFT; int index;
for (index = 0; index < nr_pages; index++) { /* * Do not mark ZRAM_SAME slots as ZRAM_IDLE, because no * post-processing (recompress, writeback) happens to the * ZRAM_SAME slot. * * And ZRAM_WB slots simply cannot be ZRAM_IDLE.
*/
zram_slot_lock(zram, index); if (!zram_allocated(zram, index) ||
zram_test_flag(zram, index, ZRAM_WB) ||
zram_test_flag(zram, index, ZRAM_SAME)) {
zram_slot_unlock(zram, index); continue;
}
if (!sysfs_streq(buf, "all")) { /* * If it did not parse as 'all' try to treat it as an integer * when we have memory tracking enabled.
*/
u64 age_sec;
page = alloc_page(GFP_KERNEL); if (!page) return -ENOMEM;
while ((pps = select_pp_slot(ctl))) {
spin_lock(&zram->wb_limit_lock); if (zram->wb_limit_enable && !zram->bd_wb_limit) {
spin_unlock(&zram->wb_limit_lock);
ret = -EIO; break;
}
spin_unlock(&zram->wb_limit_lock);
if (!blk_idx) {
blk_idx = alloc_block_bdev(zram); if (!blk_idx) {
ret = -ENOSPC; break;
}
}
index = pps->index;
zram_slot_lock(zram, index); /* * scan_slots() sets ZRAM_PP_SLOT and relases slot lock, so * slots can change in the meantime. If slots are accessed or * freed they lose ZRAM_PP_SLOT flag and hence we don't * post-process them.
*/ if (!zram_test_flag(zram, index, ZRAM_PP_SLOT)) goto next; if (zram_read_from_zspool(zram, page, index)) goto next;
zram_slot_unlock(zram, index);
/* * XXX: A single page IO would be inefficient for write * but it would be not bad as starter.
*/
err = submit_bio_wait(&bio); if (err) {
release_pp_slot(zram, pps); /* * BIO errors are not fatal, we continue and simply * attempt to writeback the remaining objects (pages). * At the same time we need to signal user-space that * some writes (at least one, but also could be all of * them) were not successful and we do so by returning * the most recent BIO error.
*/
ret = err; continue;
}
atomic64_inc(&zram->stats.bd_writes);
zram_slot_lock(zram, index); /* * Same as above, we release slot lock during writeback so * slot can change under us: slot_free() or slot_free() and * reallocation (zram_write_page()). In both cases slot loses * ZRAM_PP_SLOT flag. No concurrent post-processing can set * ZRAM_PP_SLOT on such slots until current post-processing * finishes.
*/ if (!zram_test_flag(zram, index, ZRAM_PP_SLOT)) goto next;
/* * Workaround to support the old writeback interface. * * The old writeback interface has a minor inconsistency and * requires key=value only for page_index parameter, while the * writeback mode is a valueless parameter. * * This is not the case anymore and now all parameters are * required to have values, however, we need to support the * legacy writeback interface format so we check if we can * recognize a valueless parameter as the (legacy) writeback * mode.
*/ if (!val || !*val) {
err = parse_mode(param, &mode); if (err) {
ret = err; goto release_init_lock;
}
/* * Block layer want one ->submit_bio to be active at a time, so if we use * chained IO with parent IO in same context, it's a deadlock. To avoid that, * use a worker thread context.
*/ staticint read_from_bdev_sync(struct zram *zram, struct page *page, unsignedlong entry)
{ struct zram_work work;
/* * No memory is allocated for same element filled pages. * Simply clear same page flag.
*/ if (zram_test_flag(zram, index, ZRAM_SAME)) {
zram_clear_flag(zram, index, ZRAM_SAME);
atomic64_dec(&zram->stats.same_pages); goto out;
}
handle = zram_get_handle(zram, index); if (!handle) return;
staticint zram_read_page(struct zram *zram, struct page *page, u32 index, struct bio *parent)
{ int ret;
zram_slot_lock(zram, index); if (!zram_test_flag(zram, index, ZRAM_WB)) { /* Slot should be locked through out the function call */
ret = zram_read_from_zspool(zram, page, index);
zram_slot_unlock(zram, index);
} else { /* * The slot should be unlocked before reading from the backing * device.
*/
zram_slot_unlock(zram, index);
ret = read_from_bdev(zram, page, zram_get_handle(zram, index),
parent);
}
/* Should NEVER happen. Return bio error if it does. */ if (WARN_ON(ret < 0))
pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
return ret;
}
/* * Use a temporary buffer to decompress the page, as the decompressor * always expects a full page for the output.
*/ staticint zram_bvec_read_partial(struct zram *zram, struct bio_vec *bvec,
u32 index, int offset)
{ struct page *page = alloc_page(GFP_NOIO); int ret;
if (!page) return -ENOMEM;
ret = zram_read_page(zram, page, index, NULL); if (likely(!ret))
memcpy_to_bvec(bvec, page_address(page) + offset);
__free_page(page); return ret;
}
staticint zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
u32 index, int offset, struct bio *bio)
{ if (is_partial_io(bvec)) return zram_bvec_read_partial(zram, bvec, index, offset); return zram_read_page(zram, bvec->bv_page, index, bio);
}
/* * This function is called from preemptible context so we don't need * to do optimistic and fallback to pessimistic handle allocation, * like we do for compressible pages.
*/
handle = zs_malloc(zram->mem_pool, PAGE_SIZE,
GFP_NOIO | __GFP_NOWARN |
__GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page)); if (IS_ERR_VALUE(handle)) return PTR_ERR((void *)handle);
if (!zram_can_store_page(zram)) {
zs_free(zram->mem_pool, handle); return -ENOMEM;
}
/* * This is a partial IO. Read the full page before writing the changes.
*/ staticint zram_bvec_write_partial(struct zram *zram, struct bio_vec *bvec,
u32 index, int offset, struct bio *bio)
{ struct page *page = alloc_page(GFP_NOIO); int ret;
if (!page) return -ENOMEM;
ret = zram_read_page(zram, page, index, bio); if (!ret) {
memcpy_from_bvec(page_address(page) + offset, bvec);
ret = zram_write_page(zram, page, index);
}
__free_page(page); return ret;
}
staticint zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
u32 index, int offset, struct bio *bio)
{ if (is_partial_io(bvec)) return zram_bvec_write_partial(zram, bvec, index, offset, bio); return zram_write_page(zram, bvec->bv_page, index);
}
/* Already compressed with same of higher priority */ if (zram_get_priority(zram, index) + 1 >= prio_max) goto next;
ok = place_pp_slot(zram, ctl, index);
next:
zram_slot_unlock(zram, index); if (!ok) break;
}
return 0;
}
/* * This function will decompress (unless it's ZRAM_HUGE) the page and then * attempt to compress it using provided compression algorithm priority * (which is potentially more effective). * * Corresponding ZRAM slot should be locked.
*/ staticint recompress_slot(struct zram *zram, u32 index, struct page *page,
u64 *num_recomp_pages, u32 threshold, u32 prio,
u32 prio_max)
{ struct zcomp_strm *zstrm = NULL; unsignedlong handle_old; unsignedlong handle_new; unsignedint comp_len_old; unsignedint comp_len_new; unsignedint class_index_old; unsignedint class_index_new; void *src; int ret = 0;
handle_old = zram_get_handle(zram, index); if (!handle_old) return -EINVAL;
comp_len_old = zram_get_obj_size(zram, index); /* * Do not recompress objects that are already "small enough".
*/ if (comp_len_old < threshold) return 0;
ret = zram_read_from_zspool(zram, page, index); if (ret) return ret;
/* * We touched this entry so mark it as non-IDLE. This makes sure that * we don't preserve IDLE flag and don't incorrectly pick this entry * for different post-processing type (e.g. writeback).
*/
zram_clear_flag(zram, index, ZRAM_IDLE);
prio = max(prio, zram_get_priority(zram, index) + 1); /* * Recompression slots scan should not select slots that are * already compressed with a higher priority algorithm, but * just in case
*/ if (prio >= prio_max) return 0;
/* * Iterate the secondary comp algorithms list (in order of priority) * and try to recompress the page.
*/ for (; prio < prio_max; prio++) { if (!zram->comps[prio]) continue;
/* Continue until we make progress */ if (class_index_new >= class_index_old ||
(threshold && comp_len_new >= threshold)) {
zcomp_stream_put(zstrm);
zstrm = NULL; continue;
}
/* Recompression was successful so break out */ break;
}
/* * Decrement the limit (if set) on pages we can recompress, even * when current recompression was unsuccessful or did not compress * the page below the threshold, because we still spent resources * on it.
*/ if (*num_recomp_pages)
*num_recomp_pages -= 1;
/* Compression error */ if (ret) return ret;
if (!zstrm) { /* * Secondary algorithms failed to re-compress the page * in a way that would save memory. * * Mark the object incompressible if the max-priority * algorithm couldn't re-compress it.
*/ if (prio < zram->num_active_comps) return 0;
zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE); return 0;
}
/* * We are holding per-CPU stream mutex and entry lock so better * avoid direct reclaim. Allocation error is not fatal since * we still have the old object in the mem_pool. * * XXX: technically, the node we really want here is the node that holds * the original compressed data. But that would require us to modify * zsmalloc API to return this information. For now, we will make do with * the node of the page allocated for recompression.
*/
handle_new = zs_malloc(zram->mem_pool, comp_len_new,
GFP_NOIO | __GFP_NOWARN |
__GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page)); if (IS_ERR_VALUE(handle_new)) {
zcomp_stream_put(zstrm); return PTR_ERR((void *)handle_new);
}
if (!strcmp(param, "type")) { if (!strcmp(val, "idle"))
mode = RECOMPRESS_IDLE; if (!strcmp(val, "huge"))
mode = RECOMPRESS_HUGE; if (!strcmp(val, "huge_idle"))
mode = RECOMPRESS_IDLE | RECOMPRESS_HUGE; continue;
}
if (!strcmp(param, "max_pages")) { /* * Limit the number of entries (pages) we attempt to * recompress.
*/
ret = kstrtoull(val, 10, &num_recomp_pages); if (ret) return ret; continue;
}
if (!strcmp(param, "threshold")) { /* * We will re-compress only idle objects equal or * greater in size than watermark.
*/
ret = kstrtouint(val, 10, &threshold); if (ret) return ret; continue;
}
if (!strcmp(param, "algo")) {
algo = val; continue;
}
if (!strcmp(param, "priority")) {
ret = kstrtouint(val, 10, &prio); if (ret) return ret;
if (prio == ZRAM_PRIMARY_COMP)
prio = ZRAM_SECONDARY_COMP;
prio_max = prio + 1; continue;
}
}
if (threshold >= huge_class_size) return -EINVAL;
down_read(&zram->init_lock); if (!init_done(zram)) {
ret = -EINVAL; goto release_init_lock;
}
/* Do not permit concurrent post-processing actions. */ if (atomic_xchg(&zram->pp_in_progress, 1)) {
up_read(&zram->init_lock); return -EAGAIN;
}
if (algo) { bool found = false;
for (; prio < ZRAM_MAX_COMPS; prio++) { if (!zram->comp_algs[prio]) continue;
if (!strcmp(zram->comp_algs[prio], algo)) {
prio_max = prio + 1;
found = true; break;
}
}
if (!found) {
ret = -EINVAL; goto release_init_lock;
}
}
prio_max = min(prio_max, (u32)zram->num_active_comps); if (prio >= prio_max) {
ret = -EINVAL; goto release_init_lock;
}
page = alloc_page(GFP_KERNEL); if (!page) {
ret = -ENOMEM; goto release_init_lock;
}
ctl = init_pp_ctl(); if (!ctl) {
ret = -ENOMEM; goto release_init_lock;
}
staticvoid zram_bio_discard(struct zram *zram, struct bio *bio)
{
size_t n = bio->bi_iter.bi_size;
u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
u32 offset = (bio->bi_iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
SECTOR_SHIFT;
/* * zram manages data in physical block size units. Because logical block * size isn't identical with physical block size on some arch, we * could get a discard request pointing to a specific offset within a * certain physical block. Although we can handle this request by * reading that physiclal block and decompressing and partially zeroing * and re-compressing and then re-storing it, this isn't reasonable * because our intent with a discard request is to save memory. So * skipping this logical block is appropriate here.
*/ if (offset) { if (n <= (PAGE_SIZE - offset)) return;
n -= (PAGE_SIZE - offset);
index++;
}
while (n >= PAGE_SIZE) {
zram_slot_lock(zram, index);
zram_free_page(zram, index);
zram_slot_unlock(zram, index);
atomic64_inc(&zram->stats.notify_free);
index++;
n -= PAGE_SIZE;
}
bio_endio(bio);
}
staticvoid zram_bio_read(struct zram *zram, struct bio *bio)
{ unsignedlong start_time = bio_start_io_acct(bio); struct bvec_iter iter = bio->bi_iter;
/* * Handler function for all zram I/O requests.
*/ staticvoid zram_submit_bio(struct bio *bio)
{ struct zram *zram = bio->bi_bdev->bd_disk->private_data;
switch (bio_op(bio)) { case REQ_OP_READ:
zram_bio_read(zram, bio); break; case REQ_OP_WRITE:
zram_bio_write(zram, bio); break; case REQ_OP_DISCARD: case REQ_OP_WRITE_ZEROES:
zram_bio_discard(zram, bio); break; default:
WARN_ON_ONCE(1);
bio_endio(bio);
}
}
/* I/O operation under all of CPU are done so let's free */
zram_meta_free(zram, zram->disksize);
zram->disksize = 0;
zram_destroy_comps(zram);
memset(&zram->stats, 0, sizeof(zram->stats));
atomic_set(&zram->pp_in_progress, 0);
reset_bdev(zram);
ret = kstrtou16(buf, 10, &do_reset); if (ret) return ret;
if (!do_reset) return -EINVAL;
zram = dev_to_zram(dev);
disk = zram->disk;
mutex_lock(&disk->open_mutex); /* Do not reset an active device or claimed device */ if (disk_openers(disk) || zram->claim) {
mutex_unlock(&disk->open_mutex); return -EBUSY;
}
/* From now on, anyone can't open /dev/zram[0-9] */
zram->claim = true;
mutex_unlock(&disk->open_mutex);
/* Make sure all the pending I/O are finished */
sync_blockdev(disk->part0);
zram_reset_device(zram);
/* * Allocate and initialize new zram device. the function returns * '>= 0' device_id upon success, and negative value otherwise.
*/ staticint zram_add(void)
{ struct queue_limits lim = {
.logical_block_size = ZRAM_LOGICAL_BLOCK_SIZE, /* * To ensure that we always get PAGE_SIZE aligned and * n*PAGE_SIZED sized I/O requests.
*/
.physical_block_size = PAGE_SIZE,
.io_min = PAGE_SIZE,
.io_opt = PAGE_SIZE,
.max_hw_discard_sectors = UINT_MAX, /* * zram_bio_discard() will clear all logical blocks if logical * block size is identical with physical block size(PAGE_SIZE). * But if it is different, we will skip discarding some parts of * logical blocks in the part of the request range which isn't * aligned to physical block size. So we can't ensure that all * discarded logical blocks are zeroed.
*/ #if ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE
.max_write_zeroes_sectors = UINT_MAX, #endif
.features = BLK_FEAT_STABLE_WRITES |
BLK_FEAT_SYNCHRONOUS,
}; struct zram *zram; int ret, device_id;
zram = kzalloc(sizeof(struct zram), GFP_KERNEL); if (!zram) return -ENOMEM;
ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL); if (ret < 0) goto out_free_dev;
device_id = ret;
/* Actual capacity set using sysfs (/sys/block/zram<id>/disksize */
set_capacity(zram->disk, 0);
ret = device_add_disk(NULL, zram->disk, zram_disk_groups); if (ret) goto out_cleanup_disk;
mutex_lock(&zram->disk->open_mutex); if (disk_openers(zram->disk)) {
mutex_unlock(&zram->disk->open_mutex); return -EBUSY;
}
claimed = zram->claim; if (!claimed)
zram->claim = true;
mutex_unlock(&zram->disk->open_mutex);
zram_debugfs_unregister(zram);
if (claimed) { /* * If we were claimed by reset_store(), del_gendisk() will * wait until reset_store() is done, so nothing need to do.
*/
;
} else { /* Make sure all the pending I/O are finished */
sync_blockdev(zram->disk->part0);
zram_reset_device(zram);
}
¤ Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.0.77Bemerkung:
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.