// SPDX-License-Identifier: GPL-2.0 /* * VMware Balloon driver. * * Copyright (C) 2000-2018, VMware, Inc. All Rights Reserved. * * This is VMware physical memory management driver for Linux. The driver * acts like a "balloon" that can be inflated to reclaim physical pages by * reserving them in the guest and invalidating them in the monitor, * freeing up the underlying machine pages so they can be allocated to * other guests. The balloon can also be deflated to allow the guest to * use more physical memory. Higher level policies can control the sizes * of balloons in VMs in order to manage physical memory resources.
*/
MODULE_AUTHOR("VMware, Inc.");
MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver");
MODULE_ALIAS("dmi:*:svnVMware*:*");
MODULE_ALIAS("vmware_vmmemctl");
MODULE_LICENSE("GPL");
staticbool __read_mostly vmwballoon_shrinker_enable;
module_param(vmwballoon_shrinker_enable, bool, 0444);
MODULE_PARM_DESC(vmwballoon_shrinker_enable, "Enable non-cooperative out-of-memory protection. Disabled by default as it may degrade performance.");
/* Delay in seconds after shrink before inflation. */ #define VMBALLOON_SHRINK_DELAY (5)
/* Maximum number of refused pages we accumulate during inflation cycle */ #define VMW_BALLOON_MAX_REFUSED 16
/* Magic number for the balloon mount-point */ #define BALLOON_VMW_MAGIC 0x0ba11007
/* * Hypervisor communication port definitions.
*/ #define VMW_BALLOON_HV_PORT 0x5670 #define VMW_BALLOON_HV_MAGIC 0x456c6d6f #define VMW_BALLOON_GUEST_ID 1 /* Linux */
enum vmwballoon_capabilities { /* * Bit 0 is reserved and not associated to any capability.
*/
VMW_BALLOON_BASIC_CMDS = (1 << 1),
VMW_BALLOON_BATCHED_CMDS = (1 << 2),
VMW_BALLOON_BATCHED_2M_CMDS = (1 << 3),
VMW_BALLOON_SIGNALLED_WAKEUP_CMD = (1 << 4),
VMW_BALLOON_64_BIT_TARGET = (1 << 5)
};
/** * enum vmballoon_cmd_type - backdoor commands. * * Availability of the commands is as followed: * * %VMW_BALLOON_CMD_START, %VMW_BALLOON_CMD_GET_TARGET and * %VMW_BALLOON_CMD_GUEST_ID are always available. * * If the host reports %VMW_BALLOON_BASIC_CMDS are supported then * %VMW_BALLOON_CMD_LOCK and %VMW_BALLOON_CMD_UNLOCK commands are available. * * If the host reports %VMW_BALLOON_BATCHED_CMDS are supported then * %VMW_BALLOON_CMD_BATCHED_LOCK and VMW_BALLOON_CMD_BATCHED_UNLOCK commands * are available. * * If the host reports %VMW_BALLOON_BATCHED_2M_CMDS are supported then * %VMW_BALLOON_CMD_BATCHED_2M_LOCK and %VMW_BALLOON_CMD_BATCHED_2M_UNLOCK * are supported. * * If the host reports VMW_BALLOON_SIGNALLED_WAKEUP_CMD is supported then * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command is supported. * * @VMW_BALLOON_CMD_START: Communicating supported version with the hypervisor. * @VMW_BALLOON_CMD_GET_TARGET: Gets the balloon target size. * @VMW_BALLOON_CMD_LOCK: Informs the hypervisor about a ballooned page. * @VMW_BALLOON_CMD_UNLOCK: Informs the hypervisor about a page that is about * to be deflated from the balloon. * @VMW_BALLOON_CMD_GUEST_ID: Informs the hypervisor about the type of OS that * runs in the VM. * @VMW_BALLOON_CMD_BATCHED_LOCK: Inform the hypervisor about a batch of * ballooned pages (up to 512). * @VMW_BALLOON_CMD_BATCHED_UNLOCK: Inform the hypervisor about a batch of * pages that are about to be deflated from the * balloon (up to 512). * @VMW_BALLOON_CMD_BATCHED_2M_LOCK: Similar to @VMW_BALLOON_CMD_BATCHED_LOCK * for 2MB pages. * @VMW_BALLOON_CMD_BATCHED_2M_UNLOCK: Similar to * @VMW_BALLOON_CMD_BATCHED_UNLOCK for 2MB * pages. * @VMW_BALLOON_CMD_VMCI_DOORBELL_SET: A command to set doorbell notification * that would be invoked when the balloon * size changes. * @VMW_BALLOON_CMD_LAST: Value of the last command.
*/ enum vmballoon_cmd_type {
VMW_BALLOON_CMD_START,
VMW_BALLOON_CMD_GET_TARGET,
VMW_BALLOON_CMD_LOCK,
VMW_BALLOON_CMD_UNLOCK,
VMW_BALLOON_CMD_GUEST_ID, /* No command 5 */
VMW_BALLOON_CMD_BATCHED_LOCK = 6,
VMW_BALLOON_CMD_BATCHED_UNLOCK,
VMW_BALLOON_CMD_BATCHED_2M_LOCK,
VMW_BALLOON_CMD_BATCHED_2M_UNLOCK,
VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
VMW_BALLOON_CMD_LAST = VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
};
/** * struct vmballoon_batch_entry - a batch entry for lock or unlock. * * @status: the status of the operation, which is written by the hypervisor. * @reserved: reserved for future use. Must be set to zero. * @pfn: the physical frame number of the page to be locked or unlocked.
*/ struct vmballoon_batch_entry {
u64 status : 5;
u64 reserved : PAGE_SHIFT - 5;
u64 pfn : 52;
} __packed;
struct vmballoon { /** * @max_page_size: maximum supported page size for ballooning. * * Protected by @conf_sem
*/ enum vmballoon_page_size_type max_page_size;
/** * @size: balloon actual size in basic page size (frames). * * While we currently do not support size which is bigger than 32-bit, * in preparation for future support, use 64-bits.
*/
atomic64_t size;
/** * @target: balloon target size in basic page size (frames). * * We do not protect the target under the assumption that setting the * value is always done through a single write. If this assumption ever * breaks, we would have to use X_ONCE for accesses, and suffer the less * optimized code. Although we may read stale target value if multiple * accesses happen at once, the performance impact should be minor.
*/ unsignedlong target;
/** * @reset_required: reset flag * * Setting this flag may introduce races, but the code is expected to * handle them gracefully. In the worst case, another operation will * fail as reset did not take place. Clearing the flag is done while * holding @conf_sem for write.
*/ bool reset_required;
/** * @batch_page: pointer to communication batch page. * * When batching is used, batch_page points to a page, which holds up to * %VMW_BALLOON_BATCH_MAX_PAGES entries for locking or unlocking.
*/ struct vmballoon_batch_entry *batch_page;
/** * @batch_max_pages: maximum pages that can be locked/unlocked. * * Indicates the number of pages that the hypervisor can lock or unlock * at once, according to whether batching is enabled. If batching is * disabled, only a single page can be locked/unlock on each operation. * * Protected by @conf_sem.
*/ unsignedint batch_max_pages;
/** * @page: page to be locked/unlocked by the hypervisor * * @page is only used when batching is disabled and a single page is * reclaimed on each iteration. * * Protected by @comm_lock.
*/ struct page *page;
/** * @shrink_timeout: timeout until the next inflation. * * After an shrink event, indicates the time in jiffies after which * inflation is allowed again. Can be written concurrently with reads, * so must use READ_ONCE/WRITE_ONCE when accessing.
*/ unsignedlong shrink_timeout;
/* * Send "start" command to the host, communicating supported version * of the protocol.
*/ staticint vmballoon_send_start(struct vmballoon *b, unsignedlong req_caps)
{ unsignedlong status, capabilities;
status = __vmballoon_cmd(b, VMW_BALLOON_CMD_START, req_caps, 0,
&capabilities);
switch (status) { case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES:
b->capabilities = capabilities; break; case VMW_BALLOON_SUCCESS:
b->capabilities = VMW_BALLOON_BASIC_CMDS; break; default: return -EIO;
}
/* * 2MB pages are only supported with batching. If batching is for some * reason disabled, do not use 2MB pages, since otherwise the legacy * mechanism is used with 2MB pages, causing a failure.
*/
b->max_page_size = VMW_BALLOON_4K_PAGE; if ((b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) &&
(b->capabilities & VMW_BALLOON_BATCHED_CMDS))
b->max_page_size = VMW_BALLOON_2M_PAGE;
return 0;
}
/** * vmballoon_send_guest_id - communicate guest type to the host. * * @b: pointer to the balloon. * * Communicate guest type to the host so that it can adjust ballooning * algorithm to the one most appropriate for the guest. This command * is normally issued after sending "start" command and is part of * standard reset sequence. * * Return: zero on success or appropriate error code.
*/ staticint vmballoon_send_guest_id(struct vmballoon *b)
{ unsignedlong status;
status = vmballoon_cmd(b, VMW_BALLOON_CMD_GUEST_ID,
VMW_BALLOON_GUEST_ID, 0);
return status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
}
/** * vmballoon_page_order() - return the order of the page * @page_size: the size of the page. * * Return: the allocation order.
*/ staticinline unsignedint vmballoon_page_order(enum vmballoon_page_size_type page_size)
{ return page_size == VMW_BALLOON_2M_PAGE ? VMW_BALLOON_2M_ORDER : 0;
}
/** * vmballoon_page_in_frames() - returns the number of frames in a page. * @page_size: the size of the page. * * Return: the number of 4k frames.
*/ staticinlineunsignedint
vmballoon_page_in_frames(enum vmballoon_page_size_type page_size)
{ return 1 << vmballoon_page_order(page_size);
}
/** * vmballoon_mark_page_offline() - mark a page as offline * @page: pointer for the page. * @page_size: the size of the page.
*/ staticvoid
vmballoon_mark_page_offline(struct page *page, enum vmballoon_page_size_type page_size)
{ int i;
for (i = 0; i < vmballoon_page_in_frames(page_size); i++)
__SetPageOffline(page + i);
}
/** * vmballoon_mark_page_online() - mark a page as online * @page: pointer for the page. * @page_size: the size of the page.
*/ staticvoid
vmballoon_mark_page_online(struct page *page, enum vmballoon_page_size_type page_size)
{ int i;
for (i = 0; i < vmballoon_page_in_frames(page_size); i++)
__ClearPageOffline(page + i);
}
/** * vmballoon_send_get_target() - Retrieve desired balloon size from the host. * * @b: pointer to the balloon. * * Return: zero on success, EINVAL if limit does not fit in 32-bit, as required * by the host-guest protocol and EIO if an error occurred in communicating with * the host.
*/ staticint vmballoon_send_get_target(struct vmballoon *b)
{ unsignedlong status; unsignedlong limit;
limit = totalram_pages();
/* Ensure limit fits in 32-bits if 64-bit targets are not supported */ if (!(b->capabilities & VMW_BALLOON_64_BIT_TARGET) &&
limit != (u32)limit) return -EINVAL;
status = vmballoon_cmd(b, VMW_BALLOON_CMD_GET_TARGET, limit, 0);
return status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
}
/** * vmballoon_alloc_page_list - allocates a list of pages. * * @b: pointer to the balloon. * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation. * @req_n_pages: the number of requested pages. * * Tries to allocate @req_n_pages. Add them to the list of balloon pages in * @ctl.pages and updates @ctl.n_pages to reflect the number of pages. * * Return: zero on success or error code otherwise.
*/ staticint vmballoon_alloc_page_list(struct vmballoon *b, struct vmballoon_ctl *ctl, unsignedint req_n_pages)
{ struct page *page; unsignedint i;
for (i = 0; i < req_n_pages; i++) { /* * First check if we happen to have pages that were allocated * before. This happens when 2MB page rejected during inflation * by the hypervisor, and then split into 4KB pages.
*/ if (!list_empty(&ctl->prealloc_pages)) {
page = list_first_entry(&ctl->prealloc_pages, struct page, lru);
list_del(&page->lru);
} else { if (ctl->page_size == VMW_BALLOON_2M_PAGE)
page = alloc_pages(__GFP_HIGHMEM|__GFP_NOWARN|
__GFP_NOMEMALLOC, VMW_BALLOON_2M_ORDER); else
page = balloon_page_alloc();
/** * vmballoon_handle_one_result - Handle lock/unlock result for a single page. * * @b: pointer for %struct vmballoon. * @page: pointer for the page whose result should be handled. * @page_size: size of the page. * @status: status of the operation as provided by the hypervisor.
*/ staticint vmballoon_handle_one_result(struct vmballoon *b, struct page *page, enum vmballoon_page_size_type page_size, unsignedlong status)
{ /* On success do nothing. The page is already on the balloon list. */ if (likely(status == VMW_BALLOON_SUCCESS)) return 0;
pr_debug("%s: failed comm pfn %lx status %lu page_size %s\n", __func__,
page_to_pfn(page), status,
vmballoon_page_size_names[page_size]);
/** * vmballoon_status_page - returns the status of (un)lock operation * * @b: pointer to the balloon. * @idx: index for the page for which the operation is performed. * @p: pointer to where the page struct is returned. * * Following a lock or unlock operation, returns the status of the operation for * an individual page. Provides the page that the operation was performed on on * the @page argument. * * Returns: The status of a lock or unlock operation for an individual page.
*/ staticunsignedlong vmballoon_status_page(struct vmballoon *b, int idx, struct page **p)
{ if (static_branch_likely(&vmw_balloon_batching)) { /* batching mode */
*p = pfn_to_page(b->batch_page[idx].pfn); return b->batch_page[idx].status;
}
/* non-batching mode */
*p = b->page;
/* * If a failure occurs, the indication will be provided in the status * of the entire operation, which is considered before the individual * page status. So for non-batching mode, the indication is always of * success.
*/ return VMW_BALLOON_SUCCESS;
}
/** * vmballoon_lock_op - notifies the host about inflated/deflated pages. * @b: pointer to the balloon. * @num_pages: number of inflated/deflated pages. * @page_size: size of the page. * @op: the type of operation (lock or unlock). * * Notify the host about page(s) that were ballooned (or removed from the * balloon) so that host can use it without fear that guest will need it (or * stop using them since the VM does). Host may reject some pages, we need to * check the return value and maybe submit a different page. The pages that are * inflated/deflated are pointed by @b->page. * * Return: result as provided by the hypervisor.
*/ staticunsignedlong vmballoon_lock_op(struct vmballoon *b, unsignedint num_pages, enum vmballoon_page_size_type page_size, enum vmballoon_op op)
{ unsignedlong cmd, pfn;
/* In non-batching mode, PFNs must fit in 32-bit */ if (unlikely(pfn != (u32)pfn)) return VMW_BALLOON_ERROR_PPN_INVALID;
}
return vmballoon_cmd(b, cmd, pfn, num_pages);
}
/** * vmballoon_add_page - adds a page towards lock/unlock operation. * * @b: pointer to the balloon. * @idx: index of the page to be ballooned in this batch. * @p: pointer to the page that is about to be ballooned. * * Adds the page to be ballooned. Must be called while holding @comm_lock.
*/ staticvoid vmballoon_add_page(struct vmballoon *b, unsignedint idx, struct page *p)
{
lockdep_assert_held(&b->comm_lock);
/** * vmballoon_lock - lock or unlock a batch of pages. * * @b: pointer to the balloon. * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation. * * Notifies the host of about ballooned pages (after inflation or deflation, * according to @ctl). If the host rejects the page put it on the * @ctl refuse list. These refused page are then released when moving to the * next size of pages. * * Note that we neither free any @page here nor put them back on the ballooned * pages list. Instead we queue it for later processing. We do that for several * reasons. First, we do not want to free the page under the lock. Second, it * allows us to unify the handling of lock and unlock. In the inflate case, the * caller will check if there are too many refused pages and release them. * Although it is not identical to the past behavior, it should not affect * performance.
*/ staticint vmballoon_lock(struct vmballoon *b, struct vmballoon_ctl *ctl)
{ unsignedlong batch_status; struct page *page; unsignedint i, num_pages;
num_pages = ctl->n_pages; if (num_pages == 0) return 0;
/* communication with the host is done under the communication lock */
spin_lock(&b->comm_lock);
i = 0;
list_for_each_entry(page, &ctl->pages, lru)
vmballoon_add_page(b, i++, page);
/* * Iterate over the pages in the provided list. Since we are changing * @ctl->n_pages we are saving the original value in @num_pages and * use this value to bound the loop.
*/ for (i = 0; i < num_pages; i++) { unsignedlong status;
status = vmballoon_status_page(b, i, &page);
/* * Failure of the whole batch overrides a single operation * results.
*/ if (batch_status != VMW_BALLOON_SUCCESS)
status = batch_status;
/* Continue if no error happened */ if (!vmballoon_handle_one_result(b, page, ctl->page_size,
status)) continue;
/* * Error happened. Move the pages to the refused list and update * the pages number.
*/
list_move(&page->lru, &ctl->refused_pages);
ctl->n_pages--;
ctl->n_refused_pages++;
}
/** * vmballoon_release_page_list() - Releases a page list * * @page_list: list of pages to release. * @n_pages: pointer to the number of pages. * @page_size: whether the pages in the list are 2MB (or else 4KB). * * Releases the list of pages and zeros the number of pages.
*/ staticvoid vmballoon_release_page_list(struct list_head *page_list, int *n_pages, enum vmballoon_page_size_type page_size)
{ struct page *page, *tmp;
/* * Release pages that were allocated while attempting to inflate the * balloon but were refused by the host for one reason or another.
*/ staticvoid vmballoon_release_refused_pages(struct vmballoon *b, struct vmballoon_ctl *ctl)
{
vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_REFUSED_FREE,
ctl->page_size);
/** * vmballoon_change - retrieve the required balloon change * * @b: pointer for the balloon. * * Return: the required change for the balloon size. A positive number * indicates inflation, a negative number indicates a deflation.
*/ static int64_t vmballoon_change(struct vmballoon *b)
{
int64_t size, target;
/* * We must cast first because of int sizes * Otherwise we might get huge positives instead of negatives
*/
if (b->reset_required) return 0;
/* consider a 2MB slack on deflate, unless the balloon is emptied */ if (target < size && target != 0 &&
size - target < vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE)) return 0;
/* If an out-of-memory recently occurred, inflation is disallowed. */ if (target > size && time_before(jiffies, READ_ONCE(b->shrink_timeout))) return 0;
return target - size;
}
/** * vmballoon_enqueue_page_list() - Enqueues list of pages after inflation. * * @b: pointer to balloon. * @pages: list of pages to enqueue. * @n_pages: pointer to number of pages in list. The value is zeroed. * @page_size: whether the pages are 2MB or 4KB pages. * * Enqueues the provides list of pages in the ballooned page list, clears the * list and zeroes the number of pages that was provided.
*/ staticvoid vmballoon_enqueue_page_list(struct vmballoon *b, struct list_head *pages, unsignedint *n_pages, enum vmballoon_page_size_type page_size)
{ unsignedlong flags; struct page *page;
if (page_size == VMW_BALLOON_4K_PAGE) {
balloon_page_list_enqueue(&b->b_dev_info, pages);
} else { /* * Keep the huge pages in a local list which is not available * for the balloon compaction mechanism.
*/
spin_lock_irqsave(&b->b_dev_info.pages_lock, flags);
/** * vmballoon_dequeue_page_list() - Dequeues page lists for deflation. * * @b: pointer to balloon. * @pages: list of pages to enqueue. * @n_pages: pointer to number of pages in list. The value is zeroed. * @page_size: whether the pages are 2MB or 4KB pages. * @n_req_pages: the number of requested pages. * * Dequeues the number of requested pages from the balloon for deflation. The * number of dequeued pages may be lower, if not enough pages in the requested * size are available.
*/ staticvoid vmballoon_dequeue_page_list(struct vmballoon *b, struct list_head *pages, unsignedint *n_pages, enum vmballoon_page_size_type page_size, unsignedint n_req_pages)
{ struct page *page, *tmp; unsignedint i = 0; unsignedlong flags;
/* In the case of 4k pages, use the compaction infrastructure */ if (page_size == VMW_BALLOON_4K_PAGE) {
*n_pages = balloon_page_list_dequeue(&b->b_dev_info, pages,
n_req_pages); return;
}
list_move(&page->lru, pages); if (++i == n_req_pages) break;
}
__count_vm_events(BALLOON_DEFLATE,
i * vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE));
spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags);
*n_pages = i;
}
/** * vmballoon_split_refused_pages() - Split the 2MB refused pages to 4k. * * If inflation of 2MB pages was denied by the hypervisor, it is likely to be * due to one or few 4KB pages. These 2MB pages may keep being allocated and * then being refused. To prevent this case, this function splits the refused * pages into 4KB pages and adds them into @prealloc_pages list. * * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation.
*/ staticvoid vmballoon_split_refused_pages(struct vmballoon_ctl *ctl)
{ struct page *page, *tmp; unsignedint i, order;
order = vmballoon_page_order(ctl->page_size);
list_for_each_entry_safe(page, tmp, &ctl->refused_pages, lru) {
list_del(&page->lru);
split_page(page, order); for (i = 0; i < (1 << order); i++)
list_add(&page[i].lru, &ctl->prealloc_pages);
}
ctl->n_refused_pages = 0;
}
/** * vmballoon_inflate() - Inflate the balloon towards its target size. * * @b: pointer to the balloon.
*/ staticvoid vmballoon_inflate(struct vmballoon *b)
{
int64_t to_inflate_frames; struct vmballoon_ctl ctl = {
.pages = LIST_HEAD_INIT(ctl.pages),
.refused_pages = LIST_HEAD_INIT(ctl.refused_pages),
.prealloc_pages = LIST_HEAD_INIT(ctl.prealloc_pages),
.page_size = b->max_page_size,
.op = VMW_BALLOON_INFLATE
};
while ((to_inflate_frames = vmballoon_change(b)) > 0) { unsignedint to_inflate_pages, page_in_frames; int alloc_error, lock_error = 0;
/* * If allocation failed or the number of refused pages exceeds * the maximum allowed, move to the next page size.
*/ if (alloc_error ||
ctl.n_refused_pages >= VMW_BALLOON_MAX_REFUSED) { if (ctl.page_size == VMW_BALLOON_4K_PAGE) break;
/* * Split the refused pages to 4k. This will also empty * the refused pages list.
*/
vmballoon_split_refused_pages(&ctl);
ctl.page_size--;
}
cond_resched();
}
/* * Release pages that were allocated while attempting to inflate the * balloon but were refused by the host for one reason or another, * and update the statistics.
*/ if (ctl.n_refused_pages != 0)
vmballoon_release_refused_pages(b, &ctl);
/** * vmballoon_deflate() - Decrease the size of the balloon. * * @b: pointer to the balloon * @n_frames: the number of frames to deflate. If zero, automatically * calculated according to the target size. * @coordinated: whether to coordinate with the host * * Decrease the size of the balloon allowing guest to use more memory. * * Return: The number of deflated frames (i.e., basic page size units)
*/ staticunsignedlong vmballoon_deflate(struct vmballoon *b, uint64_t n_frames, bool coordinated)
{ unsignedlong deflated_frames = 0; unsignedlong tried_frames = 0; struct vmballoon_ctl ctl = {
.pages = LIST_HEAD_INIT(ctl.pages),
.refused_pages = LIST_HEAD_INIT(ctl.refused_pages),
.page_size = VMW_BALLOON_4K_PAGE,
.op = VMW_BALLOON_DEFLATE
};
/* free pages to reach target */ while (true) { unsignedint to_deflate_pages, n_unlocked_frames; unsignedint page_in_frames;
int64_t to_deflate_frames; bool deflated_all;
/* * If we were requested a specific number of frames, we try to * deflate this number of frames. Otherwise, deflation is * performed according to the target and balloon size.
*/
to_deflate_frames = n_frames ? n_frames - tried_frames :
-vmballoon_change(b);
/* break if no work to do */ if (to_deflate_frames <= 0) break;
/* * Calculate the number of frames based on current page size, * but limit the deflated frames to a single chunk
*/
to_deflate_pages = min_t(unsignedlong, b->batch_max_pages,
DIV_ROUND_UP_ULL(to_deflate_frames,
page_in_frames));
/* First take the pages from the balloon pages. */
vmballoon_dequeue_page_list(b, &ctl.pages, &ctl.n_pages,
ctl.page_size, to_deflate_pages);
/* * Before pages are moving to the refused list, count their * frames as frames that we tried to deflate.
*/
tried_frames += ctl.n_pages * page_in_frames;
/* * Unlock the pages by communicating with the hypervisor if the * communication is coordinated (i.e., not pop). We ignore the * return code. Instead we check if all the pages we manage to * unlock all the pages. If we failed, we will move to the next * page size, and would eventually try again later.
*/ if (coordinated)
vmballoon_lock(b, &ctl);
/* * Check if we deflated enough. We will move to the next page * size if we did not manage to do so. This calculation takes * place now, as once the pages are released, the number of * pages is zeroed.
*/
deflated_all = (ctl.n_pages == to_deflate_pages);
/* Update local and global counters */
n_unlocked_frames = ctl.n_pages * page_in_frames;
atomic64_sub(n_unlocked_frames, &b->size);
deflated_frames += n_unlocked_frames;
/* free the ballooned pages */
vmballoon_release_page_list(&ctl.pages, &ctl.n_pages,
ctl.page_size);
/* Return the refused pages to the ballooned list. */
vmballoon_enqueue_page_list(b, &ctl.refused_pages,
&ctl.n_refused_pages,
ctl.page_size);
/* If we failed to unlock all the pages, move to next size. */ if (!deflated_all) { if (ctl.page_size == b->max_page_size) break;
ctl.page_size++;
}
cond_resched();
}
return deflated_frames;
}
/** * vmballoon_deinit_batching - disables batching mode. * * @b: pointer to &struct vmballoon. * * Disables batching, by deallocating the page for communication with the * hypervisor and disabling the static key to indicate that batching is off.
*/ staticvoid vmballoon_deinit_batching(struct vmballoon *b)
{
free_page((unsignedlong)b->batch_page);
b->batch_page = NULL;
static_branch_disable(&vmw_balloon_batching);
b->batch_max_pages = 1;
}
/** * vmballoon_init_batching - enable batching mode. * * @b: pointer to &struct vmballoon. * * Enables batching, by allocating a page for communication with the hypervisor * and enabling the static_key to use batching. * * Return: zero on success or an appropriate error-code.
*/ staticint vmballoon_init_batching(struct vmballoon *b)
{ struct page *page;
page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!page) return -ENOMEM;
if (!vmci_handle_is_invalid(b->vmci_doorbell)) {
vmci_doorbell_destroy(b->vmci_doorbell);
b->vmci_doorbell = VMCI_INVALID_HANDLE;
}
}
/** * vmballoon_vmci_init - Initialize vmci doorbell. * * @b: pointer to the balloon. * * Return: zero on success or when wakeup command not supported. Error-code * otherwise. * * Initialize vmci doorbell, to get notified as soon as balloon changes.
*/ staticint vmballoon_vmci_init(struct vmballoon *b)
{ unsignedlong error;
if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) == 0) return 0;
/** * vmballoon_pop - Quickly release all pages allocate for the balloon. * * @b: pointer to the balloon. * * This function is called when host decides to "reset" balloon for one reason * or another. Unlike normal "deflate" we do not (shall not) notify host of the * pages being released.
*/ staticvoid vmballoon_pop(struct vmballoon *b)
{ unsignedlong size;
while ((size = atomic64_read(&b->size)))
vmballoon_deflate(b, size, false);
}
/* * Perform standard reset sequence by popping the balloon (in case it * is not empty) and then restarting protocol. This operation normally * happens when host responds with VMW_BALLOON_ERROR_RESET to a command.
*/ staticvoid vmballoon_reset(struct vmballoon *b)
{ int error;
down_write(&b->conf_sem);
vmballoon_vmci_cleanup(b);
/* free all pages, skipping monitor unlock */
vmballoon_pop(b);
if (vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES)) goto unlock;
if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) { if (vmballoon_init_batching(b)) { /* * We failed to initialize batching, inform the monitor * about it by sending a null capability. * * The guest will retry in one second.
*/
vmballoon_send_start(b, 0); goto unlock;
}
} elseif ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) {
vmballoon_deinit_batching(b);
}
error = vmballoon_vmci_init(b); if (error)
pr_err_once("failed to initialize vmci doorbell\n");
if (vmballoon_send_guest_id(b))
pr_err_once("failed to send guest ID to the host\n");
unlock:
up_write(&b->conf_sem);
}
/** * vmballoon_work - periodic balloon worker for reset, inflation and deflation. * * @work: pointer to the &work_struct which is provided by the workqueue. * * Resets the protocol if needed, gets the new size and adjusts balloon as * needed. Repeat in 1 sec.
*/ staticvoid vmballoon_work(struct work_struct *work)
{ struct delayed_work *dwork = to_delayed_work(work); struct vmballoon *b = container_of(dwork, struct vmballoon, dwork);
int64_t change = 0;
if (b->reset_required)
vmballoon_reset(b);
down_read(&b->conf_sem);
/* * Update the stats while holding the semaphore to ensure that * @stats_enabled is consistent with whether the stats are actually * enabled
*/
vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_TIMER);
if (!vmballoon_send_get_target(b))
change = vmballoon_change(b);
/* * We are using a freezable workqueue so that balloon operations are * stopped while the system transitions to/from sleep/hibernation.
*/
queue_delayed_work(system_freezable_wq,
dwork, round_jiffies_relative(HZ));
}
/** * vmballoon_shrinker_scan() - deflate the balloon due to memory pressure. * @shrinker: pointer to the balloon shrinker. * @sc: page reclaim information. * * Returns: number of pages that were freed during deflation.
*/ staticunsignedlong vmballoon_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
{ struct vmballoon *b = &balloon; unsignedlong deflated_frames;
/* * Delay future inflation for some time to mitigate the situations in * which balloon continuously grows and shrinks. Use WRITE_ONCE() since * the access is asynchronous.
*/
WRITE_ONCE(b->shrink_timeout, jiffies + HZ * VMBALLOON_SHRINK_DELAY);
up_read(&b->conf_sem);
return deflated_frames;
}
/** * vmballoon_shrinker_count() - return the number of ballooned pages. * @shrinker: pointer to the balloon shrinker. * @sc: page reclaim information. * * Returns: number of 4k pages that are allocated for the balloon and can * therefore be reclaimed under pressure.
*/ staticunsignedlong vmballoon_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
{ struct vmballoon *b = &balloon;
staticint vmballoon_register_shrinker(struct vmballoon *b)
{ /* Do nothing if the shrinker is not enabled */ if (!vmwballoon_shrinker_enable) return 0;
b->shrinker = shrinker_alloc(0, "vmw-balloon"); if (!b->shrinker) return -ENOMEM;
if (!b->stats) { /* allocation failed */
r = -ENOMEM; goto out;
}
static_key_enable(&balloon_stat_enabled.key);
out:
up_write(&b->conf_sem); return r;
}
/** * vmballoon_debug_show - shows statistics of balloon operations. * @f: pointer to the &struct seq_file. * @offset: ignored. * * Provides the statistics that can be accessed in vmmemctl in the debugfs. * To avoid the overhead - mainly that of memory - of collecting the statistics, * we only collect statistics after the first time the counters are read. * * Return: zero on success or an error code.
*/ staticint vmballoon_debug_show(struct seq_file *f, void *offset)
{ struct vmballoon *b = f->private; int i, j;
/* enables stats if they are disabled */ if (!b->stats) { int r = vmballoon_enable_stats(b);
#ifdef CONFIG_BALLOON_COMPACTION /** * vmballoon_migratepage() - migrates a balloon page. * @b_dev_info: balloon device information descriptor. * @newpage: the page to which @page should be migrated. * @page: a ballooned page that should be migrated. * @mode: migration mode, ignored. * * This function is really open-coded, but that is according to the interface * that balloon_compaction provides. * * Return: zero on success, -EAGAIN when migration cannot be performed * momentarily, and -EBUSY if migration failed and should be retried * with that specific page.
*/ staticint vmballoon_migratepage(struct balloon_dev_info *b_dev_info, struct page *newpage, struct page *page, enum migrate_mode mode)
{ unsignedlong status, flags; struct vmballoon *b; int ret = 0;
b = container_of(b_dev_info, struct vmballoon, b_dev_info);
/* * If the semaphore is taken, there is ongoing configuration change * (i.e., balloon reset), so try again.
*/ if (!down_read_trylock(&b->conf_sem)) return -EAGAIN;
spin_lock(&b->comm_lock); /* * We must start by deflating and not inflating, as otherwise the * hypervisor may tell us that it has enough memory and the new page is * not needed. Since the old page is isolated, we cannot use the list * interface to unlock it, as the LRU field is used for isolation. * Instead, we use the native interface directly.
*/
vmballoon_add_page(b, 0, page);
status = vmballoon_lock_op(b, 1, VMW_BALLOON_4K_PAGE,
VMW_BALLOON_DEFLATE);
if (status == VMW_BALLOON_SUCCESS)
status = vmballoon_status_page(b, 0, &page);
/* * If a failure happened, let the migration mechanism know that it * should not retry.
*/ if (status != VMW_BALLOON_SUCCESS) {
spin_unlock(&b->comm_lock);
ret = -EBUSY; goto out_unlock;
}
/* * The page is isolated, so it is safe to delete it without holding * @pages_lock . We keep holding @comm_lock since we will need it in a * second.
*/
balloon_page_finalize(page);
put_page(page);
if (status == VMW_BALLOON_SUCCESS)
status = vmballoon_status_page(b, 0, &newpage);
spin_unlock(&b->comm_lock);
if (status != VMW_BALLOON_SUCCESS) { /* * A failure happened. While we can deflate the page we just * inflated, this deflation can also encounter an error. Instead * we will decrease the size of the balloon to reflect the * change.
*/
atomic64_dec(&b->size);
} else { /* * Success. Take a reference for the page, and we will add it to * the list after acquiring the lock.
*/
get_page(newpage);
}
/* Update the balloon list under the @pages_lock */
spin_lock_irqsave(&b->b_dev_info.pages_lock, flags);
/* * On inflation success, we already took a reference for the @newpage. * If we succeed just insert it to the list and update the statistics * under the lock.
*/ if (status == VMW_BALLOON_SUCCESS) {
balloon_page_insert(&b->b_dev_info, newpage);
__count_vm_event(BALLOON_MIGRATE);
}
/* * We deflated successfully, so regardless to the inflation success, we * need to reduce the number of isolated_pages.
*/
b->b_dev_info.isolated_pages--;
spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags);
out_unlock:
up_read(&b->conf_sem); return ret;
}
/** * vmballoon_compaction_init() - initialized compaction for the balloon. * * @b: pointer to the balloon. * * If during the initialization a failure occurred, this function does not * perform cleanup. The caller must call vmballoon_compaction_deinit() in this * case. * * Return: zero on success or error code on failure.
*/ static __init void vmballoon_compaction_init(struct vmballoon *b)
{
b->b_dev_info.migratepage = vmballoon_migratepage;
}
error = vmballoon_register_shrinker(&balloon); if (error) return error;
/* * Initialization of compaction must be done after the call to * balloon_devinfo_init() .
*/
balloon_devinfo_init(&balloon.b_dev_info);
vmballoon_compaction_init(&balloon);
/* * Using late_initcall() instead of module_init() allows the balloon to use the * VMCI doorbell even when the balloon is built into the kernel. Otherwise the * VMCI is probed only after the balloon is initialized. If the balloon is used * as a module, late_initcall() is equivalent to module_init().
*/
late_initcall(vmballoon_init);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.