/* * The task created the shm object, for * task_lock(shp->shm_creator)
*/ struct task_struct *shm_creator;
/* * List by creator. task_lock(->shm_creator) required for read/write. * If list_empty(), then the creator is dead already.
*/ struct list_head shm_clist; struct ipc_namespace *ns;
} __randomize_layout;
/* shm_mode upper byte flags */ #define SHM_DEST 01000 /* segment will be destroyed on last detach */ #define SHM_LOCKED 02000 /* segment will not be swapped */
/* * Called with shm_ids.rwsem (writer) and the shp structure locked. * Only shm_ids.rwsem remains locked on exit.
*/ staticvoid do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
{ struct shmid_kernel *shp;
if (shp->shm_nattch) {
shp->shm_perm.mode |= SHM_DEST; /* Do not find it any more */
ipc_set_key_private(&shm_ids(ns), &shp->shm_perm);
shm_unlock(shp);
} else
shm_destroy(ns, shp);
}
/* * shm_lock_(check_) routines are called in the paths where the rwsem * is not necessarily held.
*/ staticinlinestruct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
{ struct kern_ipc_perm *ipcp;
rcu_read_lock();
ipcp = ipc_obtain_object_idr(&shm_ids(ns), id); if (IS_ERR(ipcp)) goto err;
ipc_lock_object(ipcp); /* * ipc_rmid() may have already freed the ID while ipc_lock_object() * was spinning: here verify that the structure is still valid. * Upon races with RMID, return -EIDRM, thus indicating that * the ID points to a removed identifier.
*/ if (ipc_valid_object(ipcp)) { /* return a locked ipc object upon success */ return container_of(ipcp, struct shmid_kernel, shm_perm);
}
ipc_unlock_object(ipcp);
ipcp = ERR_PTR(-EIDRM);
err:
rcu_read_unlock(); /* * Callers of shm_lock() must validate the status of the returned ipc * object pointer and error out as appropriate.
*/ return ERR_CAST(ipcp);
}
/* * It has to be called with shp locked. * It must be called before ipc_rmid()
*/ staticinlinevoid shm_clist_rm(struct shmid_kernel *shp)
{ struct task_struct *creator;
/* ensure that shm_creator does not disappear */
rcu_read_lock();
/* * A concurrent exit_shm may do a list_del_init() as well. * Just do nothing if exit_shm already did the work
*/ if (!list_empty(&shp->shm_clist)) { /* * shp->shm_creator is guaranteed to be valid *only* * if shp->shm_clist is not empty.
*/
creator = shp->shm_creator;
task_lock(creator); /* * list_del_init() is a nop if the entry was already removed * from the list.
*/
list_del_init(&shp->shm_clist);
task_unlock(creator);
}
rcu_read_unlock();
}
/* This is called by fork, once for every shm attach. */ staticvoid shm_open(struct vm_area_struct *vma)
{ struct file *file = vma->vm_file; struct shm_file_data *sfd = shm_file_data(file); int err;
/* Always call underlying open if present */ if (sfd->vm_ops->open)
sfd->vm_ops->open(vma);
err = __shm_open(sfd); /* * We raced in the idr lookup or with shm_destroy(). * Either way, the ID is busted.
*/
WARN_ON_ONCE(err);
}
/* * shm_destroy - free the struct shmid_kernel * * @ns: namespace * @shp: struct to free * * It has to be called with shp and shm_ids.rwsem (writer) locked, * but returns with shp unlocked and freed.
*/ staticvoid shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
{ struct file *shm_file;
/* * shm_may_destroy - identifies whether shm segment should be destroyed now * * Returns true if and only if there are no active users of the segment and * one of the following is true: * * 1) shmctl(id, IPC_RMID, NULL) was called for this shp * * 2) sysctl kernel.shm_rmid_forced is set to 1.
*/ staticbool shm_may_destroy(struct shmid_kernel *shp)
{ return (shp->shm_nattch == 0) &&
(shp->ns->shm_rmid_forced ||
(shp->shm_perm.mode & SHM_DEST));
}
/* * remove the attach descriptor vma. * free memory for segment if it is marked destroyed. * The descriptor has already been removed from the current->mm->mmap list * and will later be kfree()d.
*/ staticvoid __shm_close(struct shm_file_data *sfd)
{ struct shmid_kernel *shp; struct ipc_namespace *ns = sfd->ns;
down_write(&shm_ids(ns).rwsem); /* remove from the list of attaches of the shm segment */
shp = shm_lock(ns, sfd->id);
/* * We raced in the idr lookup or with shm_destroy(). * Either way, the ID is busted.
*/ if (WARN_ON_ONCE(IS_ERR(shp))) goto done; /* no-op */
/* * We want to destroy segments without users and with already * exit'ed originating process. * * As shp->* are changed under rwsem, it's safe to skip shp locking.
*/ if (!list_empty(&shp->shm_clist)) return 0;
if (shm_may_destroy(shp)) {
shm_lock_by_ptr(shp);
shm_destroy(ns, shp);
} return 0;
}
/* Locking assumes this will only be called with task == current */ void exit_shm(struct task_struct *task)
{ for (;;) { struct shmid_kernel *shp; struct ipc_namespace *ns;
task_lock(task);
if (list_empty(&task->sysvshm.shm_clist)) {
task_unlock(task); break;
}
/* * 1) Get pointer to the ipc namespace. It is worth to say * that this pointer is guaranteed to be valid because * shp lifetime is always shorter than namespace lifetime * in which shp lives. * We taken task_lock it means that shp won't be freed.
*/
ns = shp->ns;
/* * 2) If kernel.shm_rmid_forced is not set then only keep track of * which shmids are orphaned, so that a later set of the sysctl * can clean them up.
*/ if (!ns->shm_rmid_forced) goto unlink_continue;
/* * 3) get a reference to the namespace. * The refcount could be already 0. If it is 0, then * the shm objects will be free by free_ipc_work().
*/
ns = get_ipc_ns_not_zero(ns); if (!ns) {
unlink_continue:
list_del_init(&shp->shm_clist);
task_unlock(task); continue;
}
/* * 4) get a reference to shp. * This cannot fail: shm_clist_rm() is called before * ipc_rmid(), thus the refcount cannot be 0.
*/
WARN_ON(!ipc_rcu_getref(&shp->shm_perm));
/* * 5) unlink the shm segment from the list of segments * created by current. * This must be done last. After unlinking, * only the refcounts obtained above prevent IPC_RMID * from destroying the segment or the namespace.
*/
list_del_init(&shp->shm_clist);
task_unlock(task);
/* * 6) we have all references * Thus lock & if needed destroy shp.
*/
down_write(&shm_ids(ns).rwsem);
shm_lock_by_ptr(shp); /* * rcu_read_lock was implicitly taken in shm_lock_by_ptr, it's * safe to call ipc_rcu_putref here
*/
ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
if (ipc_valid_object(&shp->shm_perm)) { if (shm_may_destroy(shp))
shm_destroy(ns, shp); else
shm_unlock(shp);
} else { /* * Someone else deleted the shp from namespace * idr/kht while we have waited. * Just unlock and continue.
*/
shm_unlock(shp);
}
up_write(&shm_ids(ns).rwsem);
put_ipc_ns(ns); /* paired with get_ipc_ns_not_zero */
}
}
/* * In case of remap_file_pages() emulation, the file can represent an * IPC ID that was removed, and possibly even reused by another shm * segment already. Propagate this case as an error to caller.
*/
ret = __shm_open(sfd); if (ret) return ret;
/* * shmid gets reported as "inode#" in /proc/pid/maps. * proc-ps tools use this. Changing this will break them.
*/
file_inode(file)->i_ino = shp->shm_perm.id;
/* * Calculate and add used RSS and swap pages of a shm. * Called with shm_ids.rwsem held as a reader
*/ staticvoid shm_add_rss_swap(struct shmid_kernel *shp, unsignedlong *rss_add, unsignedlong *swp_add)
{ struct inode *inode;
/* * Called with shm_ids.rwsem held as a reader
*/ staticvoid shm_get_stat(struct ipc_namespace *ns, unsignedlong *rss, unsignedlong *swp)
{ int next_id; int total, in_use;
*rss = 0;
*swp = 0;
in_use = shm_ids(ns).in_use;
for (total = 0, next_id = 0; total < in_use; next_id++) { struct kern_ipc_perm *ipc; struct shmid_kernel *shp;
/* * This function handles some shmctl commands which require the rwsem * to be held in write mode. * NOTE: no locks must be held, the rwsem is taken inside this function.
*/ staticint shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, struct shmid64_ds *shmid64)
{ struct kern_ipc_perm *ipcp; struct shmid_kernel *shp; int err;
/* * Semantically SHM_STAT_ANY ought to be identical to * that functionality provided by the /proc/sysvipc/ * interface. As such, only audit these calls and * do not do traditional S_IRUGO permission checks on * the ipc object.
*/ if (cmd == SHM_STAT_ANY)
audit_ipc_obj(&shp->shm_perm); else {
err = -EACCES; if (ipcperms(ns, &shp->shm_perm, S_IRUGO)) goto out_unlock;
}
err = security_shm_shmctl(&shp->shm_perm, cmd); if (err) goto out_unlock;
ipc_lock_object(&shp->shm_perm);
if (!ipc_valid_object(&shp->shm_perm)) {
ipc_unlock_object(&shp->shm_perm);
err = -EIDRM; goto out_unlock;
}
if (cmd == IPC_STAT) { /* * As defined in SUS: * Return 0 on success
*/
err = 0;
} else { /* * SHM_STAT and SHM_STAT_ANY (both Linux specific) * Return the full id, including the sequence number
*/
err = shp->shm_perm.id;
}
#ifdef CONFIG_ARCH_WANT_IPC_PARSE_VERSION long ksys_old_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
{ int version = ipc_parse_version(&cmd);
#ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION long compat_ksys_old_shmctl(int shmid, int cmd, void __user *uptr)
{ int version = compat_ipc_parse_version(&cmd);
/* * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists. * * NOTE! Despite the name, this is NOT a direct system call entrypoint. The * "raddr" thing points to kernel space, and there has to be a wrapper around * this.
*/ long do_shmat(int shmid, char __user *shmaddr, int shmflg,
ulong *raddr, unsignedlong shmlba)
{ struct shmid_kernel *shp; unsignedlong addr = (unsignedlong)shmaddr; unsignedlong size; struct file *file, *base; int err; unsignedlong flags = MAP_SHARED; unsignedlong prot; int acc_mode; struct ipc_namespace *ns; struct shm_file_data *sfd; int f_flags; unsignedlong populate = 0;
err = -EINVAL; if (shmid < 0) goto out;
if (addr) { if (addr & (shmlba - 1)) { if (shmflg & SHM_RND) {
addr &= ~(shmlba - 1); /* round down */
/* * Ensure that the round-down is non-nil * when remapping. This can happen for * cases when addr < shmlba.
*/ if (!addr && (shmflg & SHM_REMAP)) goto out;
} else #ifndef __ARCH_FORCE_SHMLBA if (addr & ~PAGE_MASK) #endif goto out;
}
/* * We cannot rely on the fs check since SYSV IPC does have an * additional creator id...
*/
ns = current->nsproxy->ipc_ns;
rcu_read_lock();
shp = shm_obtain_object_check(ns, shmid); if (IS_ERR(shp)) {
err = PTR_ERR(shp); goto out_unlock;
}
err = -EACCES; if (ipcperms(ns, &shp->shm_perm, acc_mode)) goto out_unlock;
err = security_shm_shmat(&shp->shm_perm, shmaddr, shmflg); if (err) goto out_unlock;
ipc_lock_object(&shp->shm_perm);
/* check if shm_destroy() is tearing down shp */ if (!ipc_valid_object(&shp->shm_perm)) {
ipc_unlock_object(&shp->shm_perm);
err = -EIDRM; goto out_unlock;
}
/* * We need to take a reference to the real shm file to prevent the * pointer from becoming stale in cases where the lifetime of the outer * file extends beyond that of the shm segment. It's not usually * possible, but it can happen during remap_file_pages() emulation as * that unmaps the memory, then does ->mmap() via file reference only. * We'll deny the ->mmap() if the shm segment was since removed, but to * detect shm ID reuse we need to compare the file pointers.
*/
base = get_file(shp->shm_file);
shp->shm_nattch++;
size = i_size_read(file_inode(base));
ipc_unlock_object(&shp->shm_perm);
rcu_read_unlock();
/* * detach and kill segment if marked destroyed. * The work is done in shm_close.
*/ long ksys_shmdt(char __user *shmaddr)
{ struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsignedlong addr = (unsignedlong)shmaddr; int retval = -EINVAL; #ifdef CONFIG_MMU
loff_t size = 0; struct file *file;
VMA_ITERATOR(vmi, mm, addr); #endif
if (addr & ~PAGE_MASK) return retval;
if (mmap_write_lock_killable(mm)) return -EINTR;
/* * This function tries to be smart and unmap shm segments that * were modified by partial mlock or munmap calls: * - It first determines the size of the shm segment that should be * unmapped: It searches for a vma that is backed by shm and that * started at address shmaddr. It records it's size and then unmaps * it. * - Then it unmaps all shm vmas that started at shmaddr and that * are within the initially determined size and that are from the * same shm segment from which we determined the size. * Errors from do_munmap are ignored: the function only fails if * it's called with invalid parameters or if it's called to unmap * a part of a vma. Both calls in this function are for full vmas, * the parameters are directly copied from the vma itself and always * valid - therefore do_munmap cannot fail. (famous last words?)
*/ /* * If it had been mremap()'d, the starting address would not * match the usual checks anyway. So assume all vma's are * above the starting address given.
*/
#ifdef CONFIG_MMU
for_each_vma(vmi, vma) { /* * Check if the starting address would match, i.e. it's * a fragment created by mprotect() and/or munmap(), or it * otherwise it starts at this address with no hassles.
*/ if ((vma->vm_ops == &shm_vm_ops) &&
(vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {
/* * Record the file of the shm segment being * unmapped. With mremap(), someone could place * page from another segment but with equal offsets * in the range we are unmapping.
*/
file = vma->vm_file;
size = i_size_read(file_inode(vma->vm_file));
do_vmi_align_munmap(&vmi, vma, mm, vma->vm_start,
vma->vm_end, NULL, false); /* * We discovered the size of the shm segment, so * break out of here and fall through to the next * loop that uses the size information to stop * searching for matching vma's.
*/
retval = 0;
vma = vma_next(&vmi); break;
}
}
/* * We need look no further than the maximum address a fragment * could possibly have landed at. Also cast things to loff_t to * prevent overflows and make comparisons vs. equal-width types.
*/
size = PAGE_ALIGN(size); while (vma && (loff_t)(vma->vm_end - addr) <= size) { /* finding a matching vma now does not alter retval */ if ((vma->vm_ops == &shm_vm_ops) &&
((vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) &&
(vma->vm_file == file)) {
do_vmi_align_munmap(&vmi, vma, mm, vma->vm_start,
vma->vm_end, NULL, false);
}
vma = vma_next(&vmi);
}
#else/* CONFIG_MMU */
vma = vma_lookup(mm, addr); /* under NOMMU conditions, the exact address to be destroyed must be * given
*/ if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
retval = 0;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.