/* Initialize to an unsupported value */ unsignedint page_reporting_order = -1;
staticint page_order_update_notify(constchar *val, conststruct kernel_param *kp)
{ /* * If param is set beyond this limit, order is set to default * pageblock_order value
*/ return param_set_uint_minmax(val, kp, 0, MAX_PAGE_ORDER);
}
staticconststruct kernel_param_ops page_reporting_param_ops = {
.set = &page_order_update_notify, /* * For the get op, use param_get_int instead of param_get_uint. * This is to make sure that when unset the initialized value of * -1 is shown correctly
*/
.get = ¶m_get_int,
};
/* * This symbol is also a kernel parameter. Export the page_reporting_order * symbol so that other drivers can access it to control order values without * having to introduce another configurable parameter. Only one driver can * register with the page_reporting driver for the service, so we have just * one control parameter for the use case(which can be accessed in both * drivers)
*/
EXPORT_SYMBOL_GPL(page_reporting_order);
/* Check to see if we are in desired state */
state = atomic_read(&prdev->state); if (state == PAGE_REPORTING_REQUESTED) return;
/* * If reporting is already active there is nothing we need to do. * Test against 0 as that represents PAGE_REPORTING_IDLE.
*/
state = atomic_xchg(&prdev->state, PAGE_REPORTING_REQUESTED); if (state != PAGE_REPORTING_IDLE) return;
/* * Delay the start of work to allow a sizable queue to build. For * now we are limiting this to running no more than once every * couple of seconds.
*/
schedule_delayed_work(&prdev->work, PAGE_REPORTING_DELAY);
}
/* * We use RCU to protect the pr_dev_info pointer. In almost all * cases this should be present, however in the unlikely case of * a shutdown this will be NULL and we should exit.
*/
rcu_read_lock();
prdev = rcu_dereference(pr_dev_info); if (likely(prdev))
__page_reporting_request(prdev);
/* * Drain the now reported pages back into their respective * free lists/areas. We assume at least one page is populated.
*/ do { struct page *page = sg_page(sg); int mt = get_pageblock_migratetype(page); unsignedint order = get_order(sg->length);
__putback_isolated_page(page, order, mt);
/* If the pages were not reported due to error skip flagging */ if (!reported) continue;
/* * If page was not comingled with another page we can * consider the result to be "reported" since the page * hasn't been modified, otherwise we will need to * report on the new larger page when we make our way * up to that higher order.
*/ if (PageBuddy(page) && buddy_order(page) == order)
__SetPageReported(page);
} while ((sg = sg_next(sg)));
/* reinitialize scatterlist now that it is empty */
sg_init_table(sgl, nents);
}
/* * The page reporting cycle consists of 4 stages, fill, report, drain, and * idle. We will cycle through the first 3 stages until we cannot obtain a * full scatterlist of pages, in that case we will switch to idle.
*/ staticint
page_reporting_cycle(struct page_reporting_dev_info *prdev, struct zone *zone, unsignedint order, unsignedint mt, struct scatterlist *sgl, unsignedint *offset)
{ struct free_area *area = &zone->free_area[order]; struct list_head *list = &area->free_list[mt]; unsignedint page_len = PAGE_SIZE << order; struct page *page, *next; long budget; int err = 0;
/* * Perform early check, if free area is empty there is * nothing to process so we can skip this free_list.
*/ if (list_empty(list)) return err;
spin_lock_irq(&zone->lock);
/* * Limit how many calls we will be making to the page reporting * device for this list. By doing this we avoid processing any * given list for too long. * * The current value used allows us enough calls to process over a * sixteenth of the current list plus one additional call to handle * any pages that may have already been present from the previous * list processed. This should result in us reporting all pages on * an idle system in about 30 seconds. * * The division here should be cheap since PAGE_REPORTING_CAPACITY * should always be a power of 2.
*/
budget = DIV_ROUND_UP(area->nr_free, PAGE_REPORTING_CAPACITY * 16);
/* loop through free list adding unreported pages to sg list */
list_for_each_entry_safe(page, next, list, lru) { /* We are going to skip over the reported pages. */ if (PageReported(page)) continue;
/* * If we fully consumed our budget then update our * state to indicate that we are requesting additional * processing and exit this list.
*/ if (budget < 0) {
atomic_set(&prdev->state, PAGE_REPORTING_REQUESTED);
next = page; break;
}
/* Attempt to pull page from list and place in scatterlist */ if (*offset) { if (!__isolate_free_page(page, order)) {
next = page; break;
}
/* Add page to scatter list */
--(*offset);
sg_set_page(&sgl[*offset], page, page_len, 0);
continue;
}
/* * Make the first non-reported page in the free list * the new head of the free list before we release the * zone lock.
*/ if (!list_is_first(&page->lru, list))
list_rotate_to_front(&page->lru, list);
/* release lock before waiting on report processing */
spin_unlock_irq(&zone->lock);
/* begin processing pages in local list */
err = prdev->report(prdev, sgl, PAGE_REPORTING_CAPACITY);
/* reset offset since the full list was reported */
*offset = PAGE_REPORTING_CAPACITY;
/* update budget to reflect call to report function */
budget--;
/* reacquire zone lock and resume processing */
spin_lock_irq(&zone->lock);
/* flush reported pages from the sg list */
page_reporting_drain(prdev, sgl, PAGE_REPORTING_CAPACITY, !err);
/* * Reset next to first entry, the old next isn't valid * since we dropped the lock to report the pages
*/
next = list_first_entry(list, struct page, lru);
/* exit on error */ if (err) break;
}
/* Rotate any leftover pages to the head of the freelist */ if (!list_entry_is_head(next, list, lru) && !list_is_first(&next->lru, list))
list_rotate_to_front(&next->lru, list);
/* Generate minimum watermark to be able to guarantee progress */
watermark = low_wmark_pages(zone) +
(PAGE_REPORTING_CAPACITY << page_reporting_order);
/* * Cancel request if insufficient free memory or if we failed * to allocate page reporting statistics for the zone.
*/ if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA)) return err;
/* Process each free list starting from lowest order/mt */ for (order = page_reporting_order; order < NR_PAGE_ORDERS; order++) { for (mt = 0; mt < MIGRATE_TYPES; mt++) { /* We do not pull pages from the isolate free list */ if (is_migrate_isolate(mt)) continue;
/* report the leftover pages before going idle */
leftover = PAGE_REPORTING_CAPACITY - offset; if (leftover) {
sgl = &sgl[offset];
err = prdev->report(prdev, sgl, leftover);
/* flush any remaining pages out from the last report */
spin_lock_irq(&zone->lock);
page_reporting_drain(prdev, sgl, leftover, !err);
spin_unlock_irq(&zone->lock);
}
/* * Change the state to "Active" so that we can track if there is * anyone requests page reporting after we complete our pass. If * the state is not altered by the end of the pass we will switch * to idle and quit scheduling reporting runs.
*/
atomic_set(&prdev->state, state);
/* allocate scatterlist to store pages being reported on */
sgl = kmalloc_array(PAGE_REPORTING_CAPACITY, sizeof(*sgl), GFP_KERNEL); if (!sgl) goto err_out;
kfree(sgl);
err_out: /* * If the state has reverted back to requested then there may be * additional pages to be processed. We will defer for 2s to allow * more pages to accumulate.
*/
state = atomic_cmpxchg(&prdev->state, state, PAGE_REPORTING_IDLE); if (state == PAGE_REPORTING_REQUESTED)
schedule_delayed_work(&prdev->work, PAGE_REPORTING_DELAY);
}
int page_reporting_register(struct page_reporting_dev_info *prdev)
{ int err = 0;
mutex_lock(&page_reporting_mutex);
/* nothing to do if already in use */ if (rcu_dereference_protected(pr_dev_info,
lockdep_is_held(&page_reporting_mutex))) {
err = -EBUSY; goto err_out;
}
/* * If the page_reporting_order value is not set, we check if * an order is provided from the driver that is performing the * registration. If that is not provided either, we default to * pageblock_order.
*/
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.