bool btrfs_compress_is_valid_type(constchar *str, size_t len)
{ int i;
for (i = 1; i < ARRAY_SIZE(btrfs_compress_types); i++) {
size_t comp_len = strlen(btrfs_compress_types[i]);
if (len < comp_len) continue;
if (!strncmp(btrfs_compress_types[i], str, comp_len)) returntrue;
} returnfalse;
}
staticint compression_compress_pages(int type, struct list_head *ws, struct address_space *mapping, u64 start, struct folio **folios, unsignedlong *out_folios, unsignedlong *total_in, unsignedlong *total_out)
{ switch (type) { case BTRFS_COMPRESS_ZLIB: return zlib_compress_folios(ws, mapping, start, folios,
out_folios, total_in, total_out); case BTRFS_COMPRESS_LZO: return lzo_compress_folios(ws, mapping, start, folios,
out_folios, total_in, total_out); case BTRFS_COMPRESS_ZSTD: return zstd_compress_folios(ws, mapping, start, folios,
out_folios, total_in, total_out); case BTRFS_COMPRESS_NONE: default: /* * This can happen when compression races with remount setting * it to 'no compress', while caller doesn't call * inode_need_compress() to check if we really need to * compress. * * Not a big deal, just need to inform caller that we * haven't allocated any pages yet.
*/
*out_folios = 0; return -E2BIG;
}
}
staticint compression_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
{ switch (cb->compress_type) { case BTRFS_COMPRESS_ZLIB: return zlib_decompress_bio(ws, cb); case BTRFS_COMPRESS_LZO: return lzo_decompress_bio(ws, cb); case BTRFS_COMPRESS_ZSTD: return zstd_decompress_bio(ws, cb); case BTRFS_COMPRESS_NONE: default: /* * This can't happen, the type is validated several times * before we get here.
*/
BUG();
}
}
staticint compression_decompress(int type, struct list_head *ws, const u8 *data_in, struct folio *dest_folio, unsignedlong dest_pgoff, size_t srclen, size_t destlen)
{ switch (type) { case BTRFS_COMPRESS_ZLIB: return zlib_decompress(ws, data_in, dest_folio,
dest_pgoff, srclen, destlen); case BTRFS_COMPRESS_LZO: return lzo_decompress(ws, data_in, dest_folio,
dest_pgoff, srclen, destlen); case BTRFS_COMPRESS_ZSTD: return zstd_decompress(ws, data_in, dest_folio,
dest_pgoff, srclen, destlen); case BTRFS_COMPRESS_NONE: default: /* * This can't happen, the type is validated several times * before we get here.
*/
BUG();
}
}
staticvoid btrfs_free_compressed_folios(struct compressed_bio *cb)
{ for (unsignedint i = 0; i < cb->nr_folios; i++)
btrfs_free_compr_folio(cb->compressed_folios[i]);
kfree(cb->compressed_folios);
}
/* * Global cache of last unused pages for compression/decompression.
*/ staticstruct btrfs_compr_pool { struct shrinker *shrinker;
spinlock_t lock; struct list_head list; int count; int thresh;
} compr_pool;
staticunsignedlong btrfs_compr_pool_count(struct shrinker *sh, struct shrink_control *sc)
{ int ret;
/* * We must not read the values more than once if 'ret' gets expanded in * the return statement so we don't accidentally return a negative * number, even if the first condition finds it positive.
*/
ret = READ_ONCE(compr_pool.count) - READ_ONCE(compr_pool.thresh);
/* * Do the cleanup once all the compressed pages hit the disk. This will clear * writeback on the file pages and free the compressed pages. * * This also calls the writeback end hooks for the file pages so that metadata * and checksums can be updated in the file.
*/ staticvoid end_bbio_compressed_write(struct btrfs_bio *bbio)
{ struct compressed_bio *cb = to_compressed_bio(bbio); struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
while (offset < cb->compressed_len) { int ret;
u32 len = min_t(u32, cb->compressed_len - offset, PAGE_SIZE);
/* Maximum compressed extent is smaller than bio size limit. */
ret = bio_add_folio(bio, cb->compressed_folios[offset >> PAGE_SHIFT],
len, 0);
ASSERT(ret);
offset += len;
}
}
/* * worker function to build and submit bios for previously compressed pages. * The corresponding pages in the inode should be marked for writeback * and the compressed pages should have a reference on them for dropping * when the IO is complete. * * This also checksums the file bytes and gets things ready for * the end io hooks.
*/ void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered, struct folio **compressed_folios, unsignedint nr_folios,
blk_opf_t write_flags, bool writeback)
{ struct btrfs_inode *inode = ordered->inode; struct btrfs_fs_info *fs_info = inode->root->fs_info; struct compressed_bio *cb;
/* * Add extra pages in the same compressed file extent so that we don't need to * re-read the same extent again and again. * * NOTE: this won't work well for subpage, as for subpage read, we lock the * full page then submit bio for each compressed/regular extents. * * This means, if we have several sectors in the same page points to the same * on-disk compressed data, we will re-read the same extent many times and * this function can only help for the next page.
*/ static noinline int add_ra_bio_pages(struct inode *inode,
u64 compressed_end, struct compressed_bio *cb, int *memstall, unsignedlong *pflags)
{ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
pgoff_t end_index; struct bio *orig_bio = &cb->orig_bbio->bio;
u64 cur = cb->orig_bbio->file_offset + orig_bio->bi_iter.bi_size;
u64 isize = i_size_read(inode); int ret; struct folio *folio; struct extent_map *em; struct address_space *mapping = inode->i_mapping; struct extent_map_tree *em_tree; struct extent_io_tree *tree; int sectors_missed = 0;
em_tree = &BTRFS_I(inode)->extent_tree;
tree = &BTRFS_I(inode)->io_tree;
if (isize == 0) return 0;
/* * For current subpage support, we only support 64K page size, * which means maximum compressed extent size (128K) is just 2x page * size. * This makes readahead less effective, so here disable readahead for * subpage for now, until full compressed write is supported.
*/ if (fs_info->sectorsize < PAGE_SIZE) return 0;
/* Beyond threshold, no need to continue */ if (sectors_missed > 4) break;
/* * Jump to next page start as we already have page for * current offset.
*/
cur += (folio_sz - offset); continue;
}
folio = filemap_alloc_folio(mapping_gfp_constraint(mapping,
~__GFP_FS), 0); if (!folio) break;
if (filemap_add_folio(mapping, folio, pg_index, GFP_NOFS)) { /* There is already a page, skip to page end */
cur += folio_size(folio);
folio_put(folio); continue;
}
if (!*memstall && folio_test_workingset(folio)) {
psi_memstall_enter(pflags);
*memstall = 1;
}
ret = set_folio_extent_mapped(folio); if (ret < 0) {
folio_unlock(folio);
folio_put(folio); break;
}
/* * At this point, we have a locked page in the page cache for * these bytes in the file. But, we have to make sure they map * to this compressed extent on disk.
*/ if (!em || cur < em->start ||
(cur + fs_info->sectorsize > btrfs_extent_map_end(em)) ||
(btrfs_extent_map_block_start(em) >> SECTOR_SHIFT) !=
orig_bio->bi_iter.bi_sector) {
btrfs_free_extent_map(em);
btrfs_unlock_extent(tree, cur, page_end, NULL);
folio_unlock(folio);
folio_put(folio); break;
}
add_size = min(em->start + em->len, page_end + 1) - cur;
btrfs_free_extent_map(em);
btrfs_unlock_extent(tree, cur, page_end, NULL);
if (folio_contains(folio, end_index)) {
size_t zero_offset = offset_in_folio(folio, isize);
if (zero_offset) { int zeros;
zeros = folio_size(folio) - zero_offset;
folio_zero_range(folio, zero_offset, zeros);
}
}
if (!bio_add_folio(orig_bio, folio, add_size,
offset_in_folio(folio, cur))) {
folio_unlock(folio);
folio_put(folio); break;
} /* * If it's subpage, we also need to increase its * subpage::readers number, as at endio we will decrease * subpage::readers and to unlock the page.
*/ if (fs_info->sectorsize < PAGE_SIZE)
btrfs_folio_set_lock(fs_info, folio, cur, add_size);
folio_put(folio);
cur += add_size;
} return 0;
}
/* * for a compressed read, the bio we get passed has all the inode pages * in it. We don't actually do IO on those pages but allocate new ones * to hold the compressed pages on disk. * * bio->bi_iter.bi_sector points to the compressed extent on disk * bio->bi_io_vec points to all of the inode pages * * After the compressed pages are read, we copy the bytes into the * bio we were passed and then call the bio end_io calls
*/ void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
{ struct btrfs_inode *inode = bbio->inode; struct btrfs_fs_info *fs_info = inode->root->fs_info; struct extent_map_tree *em_tree = &inode->extent_tree; struct compressed_bio *cb; unsignedint compressed_len;
u64 file_offset = bbio->file_offset;
u64 em_len;
u64 em_start; struct extent_map *em; unsignedlong pflags; int memstall = 0;
blk_status_t status; int ret;
/* we need the actual starting offset of this extent in the file */
read_lock(&em_tree->lock);
em = btrfs_lookup_extent_mapping(em_tree, file_offset, fs_info->sectorsize);
read_unlock(&em_tree->lock); if (!em) {
status = BLK_STS_IOERR; goto out;
}
/* include any pages we added in add_ra-bio_pages */
cb->len = bbio->bio.bi_iter.bi_size;
cb->bbio.bio.bi_iter.bi_sector = bbio->bio.bi_iter.bi_sector;
btrfs_add_compressed_bio_folios(cb);
/* * Heuristic uses systematic sampling to collect data from the input data * range, the logic can be tuned by the following constants: * * @SAMPLING_READ_SIZE - how many bytes will be copied from for each sample * @SAMPLING_INTERVAL - range from which the sampled data can be collected
*/ #define SAMPLING_READ_SIZE (16) #define SAMPLING_INTERVAL (256)
/* * For statistical analysis of the input data we consider bytes that form a * Galois Field of 256 objects. Each object has an attribute count, ie. how * many times the object appeared in the sample.
*/ #define BUCKET_SIZE (256)
/* * The size of the sample is based on a statistical sampling rule of thumb. * The common way is to perform sampling tests as long as the number of * elements in each cell is at least 5. * * Instead of 5, we choose 32 to obtain more accurate results. * If the data contain the maximum number of symbols, which is 256, we obtain a * sample size bound by 8192. * * For a sample of at most 8KB of data per data range: 16 consecutive bytes * from up to 512 locations.
*/ #define MAX_SAMPLE_SIZE (BTRFS_MAX_UNCOMPRESSED * \
SAMPLING_READ_SIZE / SAMPLING_INTERVAL)
struct bucket_item {
u32 count;
};
struct heuristic_ws { /* Partial copy of input data */
u8 *sample;
u32 sample_size; /* Buckets store counters for each byte value */ struct bucket_item *bucket; /* Sorting buffer */ struct bucket_item *bucket_b; struct list_head list;
};
staticconststruct btrfs_compress_op * const btrfs_compress_op[] = { /* The heuristic is represented as compression type 0 */
&btrfs_heuristic_compress,
&btrfs_zlib_compress,
&btrfs_lzo_compress,
&btrfs_zstd_compress,
};
staticstruct list_head *alloc_workspace(int type, int level)
{ switch (type) { case BTRFS_COMPRESS_NONE: return alloc_heuristic_ws(); case BTRFS_COMPRESS_ZLIB: return zlib_alloc_workspace(level); case BTRFS_COMPRESS_LZO: return lzo_alloc_workspace(); case BTRFS_COMPRESS_ZSTD: return zstd_alloc_workspace(level); default: /* * This can't happen, the type is validated several times * before we get here.
*/
BUG();
}
}
staticvoid free_workspace(int type, struct list_head *ws)
{ switch (type) { case BTRFS_COMPRESS_NONE: return free_heuristic_ws(ws); case BTRFS_COMPRESS_ZLIB: return zlib_free_workspace(ws); case BTRFS_COMPRESS_LZO: return lzo_free_workspace(ws); case BTRFS_COMPRESS_ZSTD: return zstd_free_workspace(ws); default: /* * This can't happen, the type is validated several times * before we get here.
*/
BUG();
}
}
/* * Preallocate one workspace for each compression type so we can * guarantee forward progress in the worst case
*/
workspace = alloc_workspace(type, 0); if (IS_ERR(workspace)) {
btrfs_warn(NULL, "cannot preallocate compression workspace, will try later");
} else {
atomic_set(&wsm->total_ws, 1);
wsm->free_ws = 1;
list_add(workspace, &wsm->idle_ws);
}
}
/* * This finds an available workspace or allocates a new one. * If it's not possible to allocate a new one, waits until there's one. * Preallocation makes a forward progress guarantees and we do not return * errors.
*/ struct list_head *btrfs_get_workspace(int type, int level)
{ struct workspace_manager *wsm; struct list_head *workspace; int cpus = num_online_cpus(); unsigned nofs_flag; struct list_head *idle_ws;
spinlock_t *ws_lock;
atomic_t *total_ws;
wait_queue_head_t *ws_wait; int *free_ws;
/* * Allocation helpers call vmalloc that can't use GFP_NOFS, so we have * to turn it off here because we might get called from the restricted * context of btrfs_compress_bio/btrfs_compress_pages
*/
nofs_flag = memalloc_nofs_save();
workspace = alloc_workspace(type, level);
memalloc_nofs_restore(nofs_flag);
if (IS_ERR(workspace)) {
atomic_dec(total_ws);
wake_up(ws_wait);
/* * Do not return the error but go back to waiting. There's a * workspace preallocated for each type and the compression * time is bounded so we get to a workspace eventually. This * makes our caller's life easier. * * To prevent silent and low-probability deadlocks (when the * initial preallocation fails), check if there are any * workspaces at all.
*/ if (atomic_read(total_ws) == 0) { static DEFINE_RATELIMIT_STATE(_rs, /* once per minute */ 60 * HZ, /* no burst */ 1);
if (__ratelimit(&_rs))
btrfs_warn(NULL, "no compression workspaces, low memory, retrying");
} goto again;
} return workspace;
}
staticstruct list_head *get_workspace(int type, int level)
{ switch (type) { case BTRFS_COMPRESS_NONE: return btrfs_get_workspace(type, level); case BTRFS_COMPRESS_ZLIB: return zlib_get_workspace(level); case BTRFS_COMPRESS_LZO: return btrfs_get_workspace(type, level); case BTRFS_COMPRESS_ZSTD: return zstd_get_workspace(level); default: /* * This can't happen, the type is validated several times * before we get here.
*/
BUG();
}
}
/* * put a workspace struct back on the list or free it if we have enough * idle ones sitting around
*/ void btrfs_put_workspace(int type, struct list_head *ws)
{ struct workspace_manager *wsm; struct list_head *idle_ws;
spinlock_t *ws_lock;
atomic_t *total_ws;
wait_queue_head_t *ws_wait; int *free_ws;
staticvoid put_workspace(int type, struct list_head *ws)
{ switch (type) { case BTRFS_COMPRESS_NONE: return btrfs_put_workspace(type, ws); case BTRFS_COMPRESS_ZLIB: return btrfs_put_workspace(type, ws); case BTRFS_COMPRESS_LZO: return btrfs_put_workspace(type, ws); case BTRFS_COMPRESS_ZSTD: return zstd_put_workspace(ws); default: /* * This can't happen, the type is validated several times * before we get here.
*/
BUG();
}
}
/* * Adjust @level according to the limits of the compression algorithm or * fallback to default
*/ staticint btrfs_compress_set_level(unsignedint type, int level)
{ conststruct btrfs_compress_op *ops = btrfs_compress_op[type];
/* * Check whether the @level is within the valid range for the given type.
*/ bool btrfs_compress_level_valid(unsignedint type, int level)
{ conststruct btrfs_compress_op *ops = btrfs_compress_op[type];
/* Wrapper around find_get_page(), with extra error message. */ int btrfs_compress_filemap_get_folio(struct address_space *mapping, u64 start, struct folio **in_folio_ret)
{ struct folio *in_folio;
/* * The compressed write path should have the folio locked already, thus * we only need to grab one reference.
*/
in_folio = filemap_get_folio(mapping, start >> PAGE_SHIFT); if (IS_ERR(in_folio)) { struct btrfs_inode *inode = BTRFS_I(mapping->host);
/* * Given an address space and start and length, compress the bytes into @pages * that are allocated on demand. * * @type_level is encoded algorithm and level, where level 0 means whatever * default the algorithm chooses and is opaque here; * - compression algo are 0-3 * - the level are bits 4-7 * * @out_pages is an in/out parameter, holds maximum number of pages to allocate * and returns number of actually allocated pages * * @total_in is used to return the number of bytes actually read. It * may be smaller than the input length if we had to exit early because we * ran out of room in the pages array or because we cross the * max_out threshold. * * @total_out is an in/out parameter, must be set to the input length and will * be also used to return the total number of compressed bytes
*/ int btrfs_compress_folios(unsignedint type, int level, struct address_space *mapping,
u64 start, struct folio **folios, unsignedlong *out_folios, unsignedlong *total_in, unsignedlong *total_out)
{ constunsignedlong orig_len = *total_out; struct list_head *workspace; int ret;
level = btrfs_compress_set_level(type, level);
workspace = get_workspace(type, level);
ret = compression_compress_pages(type, workspace, mapping, start, folios,
out_folios, total_in, total_out); /* The total read-in bytes should be no larger than the input. */
ASSERT(*total_in <= orig_len);
put_workspace(type, workspace); return ret;
}
staticint btrfs_decompress_bio(struct compressed_bio *cb)
{ struct list_head *workspace; int ret; int type = cb->compress_type;
workspace = get_workspace(type, 0);
ret = compression_decompress_bio(workspace, cb);
put_workspace(type, workspace);
if (!ret)
zero_fill_bio(&cb->orig_bbio->bio); return ret;
}
/* * a less complex decompression routine. Our compressed data fits in a * single page, and we want to read a single page out of it. * start_byte tells us the offset into the compressed data we're interested in
*/ int btrfs_decompress(int type, const u8 *data_in, struct folio *dest_folio, unsignedlong dest_pgoff, size_t srclen, size_t destlen)
{ struct btrfs_fs_info *fs_info = folio_to_fs_info(dest_folio); struct list_head *workspace; const u32 sectorsize = fs_info->sectorsize; int ret;
/* * The full destination page range should not exceed the page size. * And the @destlen should not exceed sectorsize, as this is only called for * inline file extents, which should not exceed sectorsize.
*/
ASSERT(dest_pgoff + destlen <= PAGE_SIZE && destlen <= sectorsize);
void __cold btrfs_exit_compress(void)
{ /* For now scan drains all pages and does not touch the parameters. */
btrfs_compr_pool_scan(NULL, NULL);
shrinker_free(compr_pool.shrinker);
/* * The bvec is a single page bvec from a bio that contains folios from a filemap. * * Since the folio may be a large one, and if the bv_page is not a head page of * a large folio, then page->index is unreliable. * * Thus we need this helper to grab the proper file offset.
*/ static u64 file_offset_from_bvec(conststruct bio_vec *bvec)
{ conststruct page *page = bvec->bv_page; conststruct folio *folio = page_folio(page);
/* * Copy decompressed data from working buffer to pages. * * @buf: The decompressed data buffer * @buf_len: The decompressed data length * @decompressed: Number of bytes that are already decompressed inside the * compressed extent * @cb: The compressed extent descriptor * @orig_bio: The original bio that the caller wants to read for * * An easier to understand graph is like below: * * |<- orig_bio ->| |<- orig_bio->| * |<------- full decompressed extent ----->| * |<----------- @cb range ---->| * | |<-- @buf_len -->| * |<--- @decompressed --->| * * Note that, @cb can be a subpage of the full decompressed extent, but * @cb->start always has the same as the orig_file_offset value of the full * decompressed extent. * * When reading compressed extent, we have to read the full compressed extent, * while @orig_bio may only want part of the range. * Thus this function will ensure only data covered by @orig_bio will be copied * to. * * Return 0 if we have copied all needed contents for @orig_bio. * Return >0 if we need continue decompress.
*/ int btrfs_decompress_buf2page(constchar *buf, u32 buf_len, struct compressed_bio *cb, u32 decompressed)
{ struct bio *orig_bio = &cb->orig_bbio->bio; /* Offset inside the full decompressed extent */
u32 cur_offset;
cur_offset = decompressed; /* The main loop to do the copy */ while (cur_offset < decompressed + buf_len) { struct bio_vec bvec;
size_t copy_len;
u32 copy_start; /* Offset inside the full decompressed extent */
u32 bvec_offset; void *kaddr;
bvec = bio_iter_iovec(orig_bio, orig_bio->bi_iter); /* * cb->start may underflow, but subtracting that value can still * give us correct offset inside the full decompressed extent.
*/
bvec_offset = file_offset_from_bvec(&bvec) - cb->start;
/* Haven't reached the bvec range, exit */ if (decompressed + buf_len <= bvec_offset) return 1;
cur_offset += copy_len;
bio_advance(orig_bio, copy_len); /* Finished the bio */ if (!orig_bio->bi_iter.bi_size) return 0;
} return 1;
}
/* * Shannon Entropy calculation * * Pure byte distribution analysis fails to determine compressibility of data. * Try calculating entropy to estimate the average minimum number of bits * needed to encode the sampled data. * * For convenience, return the percentage of needed bits, instead of amount of * bits directly. * * @ENTROPY_LVL_ACEPTABLE - below that threshold, sample has low byte entropy * and can be compressible with high probability * * @ENTROPY_LVL_HIGH - data are not compressible with high probability * * Use of ilog2() decreases precision, we lower the LVL to 5 to compensate.
*/ #define ENTROPY_LVL_ACEPTABLE (65) #define ENTROPY_LVL_HIGH (80)
/* * For increasead precision in shannon_entropy calculation, * let's do pow(n, M) to save more digits after comma: * * - maximum int bit length is 64 * - ilog2(MAX_SAMPLE_SIZE) -> 13 * - 13 * 4 = 52 < 64 -> M = 4 * * So use pow(n, 4).
*/ staticinline u32 ilog2_w(u64 n)
{ return ilog2(n * n * n * n);
}
static u8 get4bits(u64 num, int shift) {
u8 low4bits;
num >>= shift; /* Reverse order */
low4bits = (COUNTERS_SIZE - 1) - (num % COUNTERS_SIZE); return low4bits;
}
/* * Use 4 bits as radix base * Use 16 u32 counters for calculating new position in buf array * * @array - array that will be sorted * @array_buf - buffer array to store sorting results * must be equal in size to @array * @num - array size
*/ staticvoid radix_sort(struct bucket_item *array, struct bucket_item *array_buf, int num)
{
u64 max_num;
u64 buf_num;
u32 counters[COUNTERS_SIZE];
u32 new_addr;
u32 addr; int bitlen; int shift; int i;
/* * Try avoid useless loop iterations for small numbers stored in big * counters. Example: 48 33 4 ... in 64bit array
*/
max_num = array[0].count; for (i = 1; i < num; i++) {
buf_num = array[i].count; if (buf_num > max_num)
max_num = buf_num;
}
for (i = 0; i < num; i++) {
buf_num = array[i].count;
addr = get4bits(buf_num, shift);
counters[addr]++;
}
for (i = 1; i < COUNTERS_SIZE; i++)
counters[i] += counters[i - 1];
for (i = num - 1; i >= 0; i--) {
buf_num = array[i].count;
addr = get4bits(buf_num, shift);
counters[addr]--;
new_addr = counters[addr];
array_buf[new_addr] = array[i];
}
shift += RADIX_BASE;
/* * Normal radix expects to move data from a temporary array, to * the main one. But that requires some CPU time. Avoid that * by doing another sort iteration to original array instead of * memcpy()
*/
memset(counters, 0, sizeof(counters));
for (i = 0; i < num; i ++) {
buf_num = array_buf[i].count;
addr = get4bits(buf_num, shift);
counters[addr]++;
}
for (i = 1; i < COUNTERS_SIZE; i++)
counters[i] += counters[i - 1];
for (i = num - 1; i >= 0; i--) {
buf_num = array_buf[i].count;
addr = get4bits(buf_num, shift);
counters[addr]--;
new_addr = counters[addr];
array[new_addr] = array_buf[i];
}
shift += RADIX_BASE;
}
}
/* * Size of the core byte set - how many bytes cover 90% of the sample * * There are several types of structured binary data that use nearly all byte * values. The distribution can be uniform and counts in all buckets will be * nearly the same (eg. encrypted data). Unlikely to be compressible. * * Other possibility is normal (Gaussian) distribution, where the data could * be potentially compressible, but we have to take a few more steps to decide * how much. * * @BYTE_CORE_SET_LOW - main part of byte values repeated frequently, * compression algo can easy fix that * @BYTE_CORE_SET_HIGH - data have uniform distribution and with high * probability is not compressible
*/ #define BYTE_CORE_SET_LOW (64) #define BYTE_CORE_SET_HIGH (200)
/* Sort in reverse order */
radix_sort(ws->bucket, ws->bucket_b, BUCKET_SIZE);
for (i = 0; i < BYTE_CORE_SET_LOW; i++)
coreset_sum += bucket[i].count;
if (coreset_sum > core_set_threshold) return i;
for (; i < BYTE_CORE_SET_HIGH && bucket[i].count > 0; i++) {
coreset_sum += bucket[i].count; if (coreset_sum > core_set_threshold) break;
}
return i;
}
/* * Count byte values in buckets. * This heuristic can detect textual data (configs, xml, json, html, etc). * Because in most text-like data byte set is restricted to limited number of * possible characters, and that restriction in most cases makes data easy to * compress. * * @BYTE_SET_THRESHOLD - consider all data within this byte set size: * less - compressible * more - need additional analysis
*/ #define BYTE_SET_THRESHOLD (64)
for (i = 0; i < BYTE_SET_THRESHOLD; i++) { if (ws->bucket[i].count > 0)
byte_set_size++;
}
/* * Continue collecting count of byte values in buckets. If the byte * set size is bigger then the threshold, it's pointless to continue, * the detection technique would fail for this type of data.
*/ for (; i < BUCKET_SIZE; i++) { if (ws->bucket[i].count > 0) {
byte_set_size++; if (byte_set_size > BYTE_SET_THRESHOLD) return byte_set_size;
}
}
/* * Compression handles the input data by chunks of 128KiB * (defined by BTRFS_MAX_UNCOMPRESSED) * * We do the same for the heuristic and loop over the whole range. * * MAX_SAMPLE_SIZE - calculated under assumption that heuristic will * process no more than BTRFS_MAX_UNCOMPRESSED at a time.
*/ if (end - start > BTRFS_MAX_UNCOMPRESSED)
end = start + BTRFS_MAX_UNCOMPRESSED;
index = start >> PAGE_SHIFT;
index_end = end >> PAGE_SHIFT;
/* Don't miss unaligned end */ if (!PAGE_ALIGNED(end))
index_end++;
curr_sample_pos = 0; while (index < index_end) {
page = find_get_page(inode->i_mapping, index);
in_data = kmap_local_page(page); /* Handle case where the start is not aligned to PAGE_SIZE */
i = start % PAGE_SIZE; while (i < PAGE_SIZE - SAMPLING_READ_SIZE) { /* Don't sample any garbage from the last page */ if (start > end - SAMPLING_READ_SIZE) break;
memcpy(&ws->sample[curr_sample_pos], &in_data[i],
SAMPLING_READ_SIZE);
i += SAMPLING_INTERVAL;
start += SAMPLING_INTERVAL;
curr_sample_pos += SAMPLING_READ_SIZE;
}
kunmap_local(in_data);
put_page(page);
index++;
}
ws->sample_size = curr_sample_pos;
}
/* * Compression heuristic. * * The following types of analysis can be performed: * - detect mostly zero data * - detect data with low "byte set" size (text, etc) * - detect data with low/high "core byte" set * * Return non-zero if the compression should be done, 0 otherwise.
*/ int btrfs_compress_heuristic(struct btrfs_inode *inode, u64 start, u64 end)
{ struct list_head *ws_list = get_workspace(0, 0); struct heuristic_ws *ws;
u32 i;
u8 byte; int ret = 0;
for (i = 0; i < ws->sample_size; i++) {
byte = ws->sample[i];
ws->bucket[byte].count++;
}
i = byte_set_size(ws); if (i < BYTE_SET_THRESHOLD) {
ret = 2; goto out;
}
i = byte_core_set_size(ws); if (i <= BYTE_CORE_SET_LOW) {
ret = 3; goto out;
}
if (i >= BYTE_CORE_SET_HIGH) {
ret = 0; goto out;
}
i = shannon_entropy(ws); if (i <= ENTROPY_LVL_ACEPTABLE) {
ret = 4; goto out;
}
/* * For the levels below ENTROPY_LVL_HIGH, additional analysis would be * needed to give green light to compression. * * For now just assume that compression at that level is not worth the * resources because: * * 1. it is possible to defrag the data later * * 2. the data would turn out to be hardly compressible, eg. 150 byte * values, every bucket has counter at level ~54. The heuristic would * be confused. This can happen when data have some internal repeated * patterns like "abbacbbc...". This can be detected by analyzing * pairs of bytes, which is too costly.
*/ if (i < ENTROPY_LVL_HIGH) {
ret = 5; goto out;
} else {
ret = 0; goto out;
}
out:
put_workspace(0, ws_list); return ret;
}
/* * Convert the compression suffix (eg. after "zlib" starting with ":") to level. * * If the resulting level exceeds the algo's supported levels, it will be clamped. * * Return <0 if no valid string can be found. * Return 0 if everything is fine.
*/ int btrfs_compress_str2level(unsignedint type, constchar *str, int *level_ret)
{ int level = 0; int ret;
if (!type) {
*level_ret = btrfs_compress_set_level(type, level); return 0;
}
if (str[0] == ':') {
ret = kstrtoint(str + 1, 10, &level); if (ret) return ret;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.