/* fill in kstatfs */
buf->f_type = CEPH_SUPER_MAGIC; /* ?? */
/* * Express utilization in terms of large blocks to avoid * overflow on 32-bit machines.
*/
buf->f_frsize = 1 << CEPH_BLOCK_SHIFT;
/* * By default use root quota for stats; fallback to overall filesystem * usage if using 'noquotadf' mount option or if the root dir doesn't * have max_bytes quota set.
*/ if (ceph_test_mount_opt(fsc, NOQUOTADF) ||
!ceph_quota_update_statfs(fsc, buf)) {
buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
}
/* * NOTE: for the time being, we make bsize == frsize to humor * not-yet-ancient versions of glibc that are broken. * Someday, we will probably want to report a real block * size... whatever that may mean for a network file system!
*/
buf->f_bsize = buf->f_frsize;
/* Must convert the fsid, for consistent values across arches */
buf->f_fsid.val[0] = 0;
mutex_lock(&monc->mutex); for (i = 0 ; i < sizeof(monc->monmap->fsid) / sizeof(__le32) ; ++i)
buf->f_fsid.val[0] ^= le32_to_cpu(((__le32 *)&monc->monmap->fsid)[i]);
mutex_unlock(&monc->mutex);
/* fold the fs_cluster_id into the upper bits */
buf->f_fsid.val[1] = monc->fs_cluster_id;
/* * Remove adjacent slashes and then the trailing slash, unless it is * the only remaining character. * * E.g. "//dir1////dir2///" --> "/dir1/dir2", "///" --> "/".
*/ staticvoid canonicalize_path(char *path)
{ int i, j = 0;
for (i = 0; path[i] != '\0'; i++) { if (path[i] != '/' || j < 1 || path[j - 1] != '/')
path[j++] = path[i];
}
++fsid_start; /* start of cluster fsid */
fs_name_start = strchr(fsid_start, '.'); if (!fs_name_start) return invalfc(fc, "missing file system name");
if (ceph_parse_fsid(fsid_start, &fsid)) return invalfc(fc, "Invalid FSID");
++fs_name_start; /* start of file system name */
len = dev_name_end - fs_name_start;
if (!namespace_equals(fsopt, fs_name_start, len)) return invalfc(fc, "Mismatching mds_namespace");
kfree(fsopt->mds_namespace);
fsopt->mds_namespace = kstrndup(fs_name_start, len, GFP_KERNEL); if (!fsopt->mds_namespace) return -ENOMEM;
dout("file system (mds namespace) '%s'\n", fsopt->mds_namespace);
fsopt->new_dev_syntax = true; return 0;
}
/* * Parse the source parameter for new device format. Distinguish the device * spec from the path. Try parsing new device format and fallback to old * format if needed. * * New device syntax will looks like: * <device_spec>=/<path> * where * <device_spec> is name@fsid.fsname * <path> is optional, but if present must begin with '/' * (monitor addresses are passed via mount option) * * Old device syntax is: * <server_spec>[,<server_spec>...]:[<path>] * where * <server_spec> is <ip>[:<port>] * <path> is optional, but if present must begin with '/'
*/ staticint ceph_parse_source(struct fs_parameter *param, struct fs_context *fc)
{ struct ceph_parse_opts_ctx *pctx = fc->fs_private; struct ceph_mount_options *fsopt = pctx->opts; char *dev_name = param->string, *dev_name_end; int ret;
dout("'%s'\n", dev_name); if (!dev_name || !*dev_name) return invalfc(fc, "Empty source");
dev_name_end = strchr(dev_name, '/'); if (dev_name_end) { /* * The server_path will include the whole chars from userland * including the leading '/'.
*/
kfree(fsopt->server_path);
fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL); if (!fsopt->server_path) return -ENOMEM;
dev_name_end--; /* back up to separator */ if (dev_name_end < dev_name) return invalfc(fc, "Path missing in source");
dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); if (fsopt->server_path)
dout("server path '%s'\n", fsopt->server_path);
dout("trying new device syntax");
ret = ceph_parse_new_source(dev_name, dev_name_end, fc); if (ret) { if (ret != -EINVAL) return ret;
dout("trying old device syntax");
ret = ceph_parse_old_source(dev_name, dev_name_end, fc); if (ret) return ret;
}
if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0)
seq_puts(m, ",copyfrom");
/* dump mds_namespace when old device syntax is in use */ if (fsopt->mds_namespace && !fsopt->new_dev_syntax)
seq_show_option(m, "mds_namespace", fsopt->mds_namespace);
if (fsopt->mon_addr)
seq_printf(m, ",mon_addr=%s", fsopt->mon_addr);
if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER)
seq_show_option(m, "recover_session", "clean");
if (!(fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS))
seq_puts(m, ",wsync"); if (fsopt->flags & CEPH_MOUNT_OPT_NOPAGECACHE)
seq_puts(m, ",nopagecache"); if (fsopt->flags & CEPH_MOUNT_OPT_SPARSEREAD)
seq_puts(m, ",sparseread");
if (fsopt->wsize != CEPH_MAX_WRITE_SIZE)
seq_printf(m, ",wsize=%u", fsopt->wsize); if (fsopt->rsize != CEPH_MAX_READ_SIZE)
seq_printf(m, ",rsize=%u", fsopt->rsize); if (fsopt->rasize != CEPH_RASIZE_DEFAULT)
seq_printf(m, ",rasize=%u", fsopt->rasize); if (fsopt->congestion_kb != default_congestion_kb())
seq_printf(m, ",write_congestion_kb=%u", fsopt->congestion_kb); if (fsopt->caps_max)
seq_printf(m, ",caps_max=%d", fsopt->caps_max); if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
seq_printf(m, ",caps_wanted_delay_min=%u",
fsopt->caps_wanted_delay_min); if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
seq_printf(m, ",caps_wanted_delay_max=%u",
fsopt->caps_wanted_delay_max); if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT)
seq_printf(m, ",readdir_max_entries=%u", fsopt->max_readdir); if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
seq_printf(m, ",readdir_max_bytes=%u", fsopt->max_readdir_bytes); if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
seq_show_option(m, "snapdirname", fsopt->snapdir_name);
return 0;
}
/* * handle any mon messages the standard library doesn't understand. * return error if we don't either.
*/ staticint extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg)
{ struct ceph_fs_client *fsc = client->private; int type = le16_to_cpu(msg->hdr.type);
switch (type) { case CEPH_MSG_MDS_MAP:
ceph_mdsc_handle_mdsmap(fsc->mdsc, msg); return 0; case CEPH_MSG_FS_MAP_USER:
ceph_mdsc_handle_fsmap(fsc->mdsc, msg); return 0; default: return -1;
}
}
/* * create a new fs client * * Success or not, this function consumes @fsopt and @opt.
*/ staticstruct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, struct ceph_options *opt)
{ struct ceph_fs_client *fsc; int err;
err = -ENOMEM; /* * The number of concurrent works can be high but they don't need * to be processed in parallel, limit concurrency.
*/
fsc->inode_wq = alloc_workqueue("ceph-inode", WQ_UNBOUND, 0); if (!fsc->inode_wq) goto fail_client;
fsc->cap_wq = alloc_workqueue("ceph-cap", 0, 1); if (!fsc->cap_wq) goto fail_inode_wq;
/* * ceph_umount_begin - initiate forced umount. Tear down the * mount, skipping steps that may hang while waiting for server(s).
*/ void ceph_umount_begin(struct super_block *sb)
{ struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
/* * Bootstrap mount by opening the root directory. Note the mount * @started time from caller, and time out if this takes too long.
*/ staticstruct dentry *open_root_dentry(struct ceph_fs_client *fsc, constchar *path, unsignedlong started)
{ struct ceph_client *cl = fsc->client; struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req = NULL; int err; struct dentry *root;
/* open dir */
doutc(cl, "opening '%s'\n", path);
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); if (IS_ERR(req)) return ERR_CAST(req);
req->r_path1 = kstrdup(path, GFP_NOFS); if (!req->r_path1) {
root = ERR_PTR(-ENOMEM); goto out;
}
if (!fscrypt_is_dummy_policy_set(&fsopt->dummy_enc_policy)) return 0;
/* No changing encryption context on remount. */ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE &&
!fscrypt_is_dummy_policy_set(&fsc->fsc_dummy_enc_policy)) { if (fscrypt_dummy_policies_equal(&fsopt->dummy_enc_policy,
&fsc->fsc_dummy_enc_policy)) return 0;
errorfc(fc, "Can't set test_dummy_encryption on remount"); return -EINVAL;
}
/* Also make sure fsopt doesn't contain a conflicting value. */ if (fscrypt_is_dummy_policy_set(&fsc->fsc_dummy_enc_policy)) { if (fscrypt_dummy_policies_equal(&fsopt->dummy_enc_policy,
&fsc->fsc_dummy_enc_policy)) return 0;
errorfc(fc, "Conflicting test_dummy_encryption options"); return -EINVAL;
}
out_splat: if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) {
pr_info("No mds server is up or the cluster is laggy\n");
err = -EHOSTUNREACH;
}
err = ceph_apply_test_dummy_encryption(sb, fc, fsopt); if (err) return err;
if (fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS)
ceph_set_mount_opt(fsc, ASYNC_DIROPS); else
ceph_clear_mount_opt(fsc, ASYNC_DIROPS);
if (fsopt->flags & CEPH_MOUNT_OPT_SPARSEREAD)
ceph_set_mount_opt(fsc, SPARSEREAD); else
ceph_clear_mount_opt(fsc, SPARSEREAD);
if (strcmp_null(fsc->mount_options->mon_addr, fsopt->mon_addr)) {
kfree(fsc->mount_options->mon_addr);
fsc->mount_options->mon_addr = fsopt->mon_addr;
fsopt->mon_addr = NULL;
pr_notice_client(fsc->client, "monitor addresses recorded, but not used for reconnection");
}
/* * Set up the filesystem mount context.
*/ staticint ceph_init_fs_context(struct fs_context *fc)
{ struct ceph_parse_opts_ctx *pctx; struct ceph_mount_options *fsopt;
pctx = kzalloc(sizeof(*pctx), GFP_KERNEL); if (!pctx) return -ENOMEM;
pctx->copts = ceph_alloc_options(); if (!pctx->copts) goto nomem;
pctx->opts = kzalloc(sizeof(*pctx->opts), GFP_KERNEL); if (!pctx->opts) goto nomem;
/* * Return true if it successfully increases the blocker counter, * or false if the mdsc is in stopping and flushed state.
*/ staticbool __inc_stopping_blocker(struct ceph_mds_client *mdsc)
{
spin_lock(&mdsc->stopping_lock); if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHING) {
spin_unlock(&mdsc->stopping_lock); returnfalse;
}
atomic_inc(&mdsc->stopping_blockers);
spin_unlock(&mdsc->stopping_lock); returntrue;
}
/* * Though the kill_anon_super() will finally trigger the * sync_filesystem() anyway, we still need to do it here and * then bump the stage of shutdown. This will allow us to * drop any further message, which will increase the inodes' * i_count reference counters but makes no sense any more, * from MDSs. * * Without this when evicting the inodes it may fail in the * kill_anon_super(), which will trigger a warning when * destroying the fscrypt keyring and then possibly trigger * a further crash in ceph module when the iput() tries to * evict the inodes later.
*/
sync_filesystem(s);
if (atomic64_read(&mdsc->dirty_folios) > 0) {
wait_queue_head_t *wq = &mdsc->flush_end_wq; long timeleft = wait_event_killable_timeout(*wq,
atomic64_read(&mdsc->dirty_folios) <= 0,
fsc->client->options->mount_timeout); if (!timeleft) /* timed out */
pr_warn_client(cl, "umount timed out, %ld\n", timeleft); elseif (timeleft < 0) /* killed */
pr_warn_client(cl, "umount was killed, %ld\n", timeleft);
}
ret = param_set_bool(val, kp); if (ret) {
pr_err("Failed to parse sending metrics switch value '%s'\n",
val); return ret;
} elseif (!disable_send_metrics) { // wake up all the mds clients
spin_lock(&ceph_fsc_lock);
list_for_each_entry(fsc, &ceph_fsc_list, metric_wakeup) {
metric_schedule_delayed(&fsc->mdsc->metric);
}
spin_unlock(&ceph_fsc_lock);
}
/* for both v1 and v2 syntax */ staticbool mount_support = true; staticconststruct kernel_param_ops param_ops_mount_syntax = {
.get = param_get_bool,
};
module_param_cb(mount_syntax_v1, ¶m_ops_mount_syntax, &mount_support, 0444);
module_param_cb(mount_syntax_v2, ¶m_ops_mount_syntax, &mount_support, 0444);
bool enable_unsafe_idmap = false;
module_param(enable_unsafe_idmap, bool, 0644);
MODULE_PARM_DESC(enable_unsafe_idmap, "Allow to use idmapped mounts with MDS without CEPHFS_FEATURE_HAS_OWNER_UIDGID");
module_init(init_ceph);
module_exit(exit_ceph);
MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
MODULE_AUTHOR("Patience Warnick <patience@newdream.net>");
MODULE_DESCRIPTION("Ceph filesystem for Linux");
MODULE_LICENSE("GPL");
Messung V0.5 in Prozent
¤ Dauer der Verarbeitung: 0.16 Sekunden
(vorverarbeitet am 2026-04-25)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.