/* * memfd_create system call and file sealing support * * Code was originally included in shmem.c, and broken out to facilitate * use by hugetlbfs as well as tmpfs. * * This file is released under the GPL.
*/
/* * We need a tag: a new tag would expand every xa_node by 8 bytes, * so reuse a tag which we firmly believe is never set or cleared on tmpfs * or hugetlbfs because they are memory only filesystems.
*/ #define MEMFD_TAG_PINNED PAGECACHE_TAG_TOWRITE #define LAST_SCAN 4 /* about 150ms max */
/* * This is a helper function used by memfd_pin_user_pages() in GUP (gup.c). * It is mainly called to allocate a folio in a memfd when the caller * (memfd_pin_folios()) cannot find a folio in the page cache at a given * index in the mapping.
*/ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx)
{ #ifdef CONFIG_HUGETLB_PAGE struct folio *folio;
gfp_t gfp_mask;
if (is_file_hugepages(memfd)) { /* * The folio would most likely be accessed by a DMA driver, * therefore, we have zone memory constraints where we can * alloc from. Also, the folio will be pinned for an indefinite * amount of time, so it is not expected to be migrated away.
*/ struct inode *inode = file_inode(memfd); struct hstate *h = hstate_file(memfd); int err = -ENOMEM; long nr_resv;
/* * Setting SEAL_WRITE requires us to verify there's no pending writer. However, * via get_user_pages(), drivers might have some pending I/O without any active * user-space mappings (eg., direct-IO, AIO). Therefore, we look at all folios * and see whether it has an elevated ref-count. If so, we tag them and wait for * them to be dropped. * The caller must guarantee that no new user will acquire writable references * to those folios to avoid races.
*/ staticint memfd_wait_for_pins(struct address_space *mapping)
{
XA_STATE(xas, &mapping->i_pages, 0); struct folio *folio; int error, scan;
memfd_tag_pins(&xas);
error = 0; for (scan = 0; scan <= LAST_SCAN; scan++) { int latency = 0;
if (!xa_is_value(folio) &&
memfd_folio_has_extra_refs(folio)) { /* * On the last scan, we clean up all those tags * we inserted; but make a note that we still * found folios pinned.
*/ if (scan == LAST_SCAN)
error = -EBUSY; else
clear = false;
} if (clear)
xas_clear_mark(&xas, MEMFD_TAG_PINNED);
if (++latency < XA_CHECK_SCHED) continue;
latency = 0;
/* * SEALING * Sealing allows multiple parties to share a tmpfs or hugetlbfs file * but restrict access to a specific subset of file operations. Seals * can only be added, but never removed. This way, mutually untrusted * parties can share common memory regions with a well-defined policy. * A malicious peer can thus never perform unwanted operations on a * shared object. * * Seals are only supported on special tmpfs or hugetlbfs files and * always affect the whole underlying inode. Once a seal is set, it * may prevent some kinds of access to the file. Currently, the * following seals are defined: * SEAL_SEAL: Prevent further seals from being set on this file * SEAL_SHRINK: Prevent the file from shrinking * SEAL_GROW: Prevent the file from growing * SEAL_WRITE: Prevent write access to the file * SEAL_EXEC: Prevent modification of the exec bits in the file mode * * As we don't require any trust relationship between two parties, we * must prevent seals from being removed. Therefore, sealing a file * only adds a given set of seals to the file, it never touches * existing seals. Furthermore, the "setting seals"-operation can be * sealed itself, which basically prevents any further seal from being * added. * * Semantics of sealing are only defined on volatile files. Only * anonymous tmpfs and hugetlbfs files support sealing. More * importantly, seals are never written to disk. Therefore, there's * no plan to support it on other file types.
*/
if (!(file->f_mode & FMODE_WRITE)) return -EPERM; if (seals & ~(unsignedint)F_ALL_SEALS) return -EINVAL;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.