// SPDX-License-Identifier: GPL-2.0-only /* * stack_user.c * * Code which interfaces ocfs2 with fs/dlm and a userspace stack. * * Copyright (C) 2007 Oracle. All rights reserved.
*/
/* * The control protocol starts with a handshake. Until the handshake * is complete, the control device will fail all write(2)s. * * The handshake is simple. First, the client reads until EOF. Each line * of output is a supported protocol tag. All protocol tags are a single * character followed by a two hex digit version number. Currently the * only things supported is T01, for "Text-base version 0x01". Next, the * client writes the version they would like to use, including the newline. * Thus, the protocol tag is 'T01\n'. If the version tag written is * unknown, -EINVAL is returned. Once the negotiation is complete, the * client can start sending messages. * * The T01 protocol has three messages. First is the "SETN" message. * It has the following syntax: * * SETN<space><8-char-hex-nodenum><newline> * * This is 14 characters. * * The "SETN" message must be the first message following the protocol. * It tells ocfs2_control the local node number. * * Next comes the "SETV" message. It has the following syntax: * * SETV<space><2-char-hex-major><space><2-char-hex-minor><newline> * * This is 11 characters. * * The "SETV" message sets the filesystem locking protocol version as * negotiated by the client. The client negotiates based on the maximum * version advertised in /sys/fs/ocfs2/max_locking_protocol. The major * number from the "SETV" message must match * ocfs2_user_plugin.sp_max_proto.pv_major, and the minor number * must be less than or equal to ...sp_max_version.pv_minor. * * Once this information has been set, mounts will be allowed. From this * point on, the "DOWN" message can be sent for node down notification. * It has the following syntax: * * DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline> * * eg: * * DOWN 632A924FDD844190BDA93C0DF6B94899 00000001\n * * This is 47 characters.
*/
/* * Whether or not the client has done the handshake. * For now, we have just one protocol version.
*/ #define OCFS2_CONTROL_PROTO "T01\n" #define OCFS2_CONTROL_PROTO_LEN 4
/* * ocfs2_live_connection is refcounted because the filesystem and * miscdevice sides can detach in different order. Let's just be safe.
*/ struct ocfs2_live_connection { struct list_head oc_list; struct ocfs2_cluster_connection *oc_conn; enum ocfs2_connection_type oc_type;
atomic_t oc_this_node; int oc_our_slot; struct dlm_lksb oc_version_lksb; char oc_lvb[DLM_LVB_LEN]; struct completion oc_sync_wait;
wait_queue_head_t oc_wait;
};
struct ocfs2_control_private { struct list_head op_list; int op_state; int op_this_node; struct ocfs2_protocol_version op_proto;
};
/* * ocfs2_live_connection structures are created underneath the ocfs2 * mount path. Since the VFS prevents multiple calls to * fill_super(), we can't get dupes here.
*/ staticint ocfs2_live_connection_attach(struct ocfs2_cluster_connection *conn, struct ocfs2_live_connection *c)
{ int rc = 0;
staticvoid ocfs2_control_send_down(constchar *uuid, int nodenum)
{ struct ocfs2_live_connection *c;
mutex_lock(&ocfs2_control_lock);
c = ocfs2_connection_find(uuid); if (c) {
BUG_ON(c->oc_conn == NULL);
c->oc_conn->cc_recovery_handler(nodenum,
c->oc_conn->cc_recovery_data);
}
mutex_unlock(&ocfs2_control_lock);
}
/* * Called whenever configuration elements are sent to /dev/ocfs2_control. * If all configuration elements are present, try to set the global * values. If there is a problem, return an error. Skip any missing * elements, and only bump ocfs2_control_opened when we have all elements * and are successful.
*/ staticint ocfs2_control_install_private(struct file *file)
{ int rc = 0; int set_p = 1; struct ocfs2_control_private *p = file->private_data;
if (!rc && set_p) { /* We set the global values successfully */
atomic_inc(&ocfs2_control_opened);
ocfs2_control_set_handshake_state(file,
OCFS2_CONTROL_HANDSHAKE_VALID);
}
return rc;
}
staticint ocfs2_control_get_this_node(void)
{ int rc;
if (kstrtol(msg->major, 16, &major)) return -EINVAL; if (kstrtol(msg->minor, 16, &minor)) return -EINVAL;
/* * The major must be between 1 and 255, inclusive. The minor * must be between 0 and 255, inclusive. The version passed in * must be within the maximum version supported by the filesystem.
*/ if ((major == LONG_MIN) || (major == LONG_MAX) ||
(major > (u8)-1) || (major < 1)) return -ERANGE; if ((minor == LONG_MIN) || (minor == LONG_MAX) ||
(minor > (u8)-1) || (minor < 0)) return -ERANGE; if ((major != max->pv_major) ||
(minor > max->pv_minor)) return -EINVAL;
switch (ocfs2_control_get_handshake_state(file)) { case OCFS2_CONTROL_HANDSHAKE_INVALID:
ret = -EINVAL; break;
case OCFS2_CONTROL_HANDSHAKE_READ:
ret = ocfs2_control_validate_protocol(file, buf,
count); break;
case OCFS2_CONTROL_HANDSHAKE_PROTOCOL: case OCFS2_CONTROL_HANDSHAKE_VALID:
ret = ocfs2_control_message(file, buf, count); break;
default:
BUG();
ret = -EIO; break;
}
return ret;
}
/* * This is a naive version. If we ever have a new protocol, we'll expand * it. Probably using seq_file.
*/ static ssize_t ocfs2_control_read(struct file *file, char __user *buf,
size_t count,
loff_t *ppos)
{
ssize_t ret;
ret = simple_read_from_buffer(buf, count, ppos,
OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN);
/* Have we read the whole protocol list? */ if (ret > 0 && *ppos >= OCFS2_CONTROL_PROTO_LEN)
ocfs2_control_set_handshake_state(file,
OCFS2_CONTROL_HANDSHAKE_READ);
if (ocfs2_control_get_handshake_state(file) !=
OCFS2_CONTROL_HANDSHAKE_VALID) goto out;
if (atomic_dec_and_test(&ocfs2_control_opened)) { if (!list_empty(&ocfs2_live_connection_list)) { /* XXX: Do bad things! */
printk(KERN_ERR "ocfs2: Unexpected release of ocfs2_control!\n" " Loss of cluster connection requires " "an emergency restart!\n");
emergency_restart();
} /* * Last valid close clears the node number and resets * the locking protocol version
*/
ocfs2_control_this_node = -1;
running_proto.pv_major = 0;
running_proto.pv_minor = 0;
}
staticvoid fsdlm_lock_ast_wrapper(void *astarg)
{ struct ocfs2_dlm_lksb *lksb = astarg; int status = lksb->lksb_fsdlm.sb_status;
/* * For now we're punting on the issue of other non-standard errors * where we can't tell if the unlock_ast or lock_ast should be called. * The main "other error" that's possible is EINVAL which means the * function was called with invalid args, which shouldn't be possible * since the caller here is under our control. Other non-standard * errors probably fall into the same category, or otherwise are fatal * which means we can't carry on anyway.
*/
if (status == -DLM_EUNLOCK || status == -DLM_ECANCEL)
lksb->lksb_conn->cc_proto->lp_unlock_ast(lksb, 0); else
lksb->lksb_conn->cc_proto->lp_lock_ast(lksb);
}
staticint user_plock(struct ocfs2_cluster_connection *conn,
u64 ino, struct file *file, int cmd, struct file_lock *fl)
{ /* * This more or less just demuxes the plock request into any * one of three dlm calls. * * Internally, fs/dlm will pass these to a misc device, which * a userspace daemon will read and write to.
*/
/* * Compare a requested locking protocol version against the current one. * * If the major numbers are different, they are incompatible. * If the current minor is greater than the request, they are incompatible. * If the current minor is less than or equal to the request, they are * compatible, and the requester should run at the current minor version.
*/ staticint fs_protocol_compare(struct ocfs2_protocol_version *existing, struct ocfs2_protocol_version *request)
{ if (existing->pv_major != request->pv_major) return 1;
if (existing->pv_minor > request->pv_minor) return 1;
if (existing->pv_minor < request->pv_minor)
request->pv_minor = existing->pv_minor;
return 0;
}
staticvoid lvb_to_version(char *lvb, struct ocfs2_protocol_version *ver)
{ struct ocfs2_protocol_version *pv =
(struct ocfs2_protocol_version *)lvb; /* * ocfs2_protocol_version has two u8 variables, so we don't * need any endian conversion.
*/
ver->pv_major = pv->pv_major;
ver->pv_minor = pv->pv_minor;
}
staticvoid version_to_lvb(struct ocfs2_protocol_version *ver, char *lvb)
{ struct ocfs2_protocol_version *pv =
(struct ocfs2_protocol_version *)lvb; /* * ocfs2_protocol_version has two u8 variables, so we don't * need any endian conversion.
*/
pv->pv_major = ver->pv_major;
pv->pv_minor = ver->pv_minor;
}
/* get_protocol_version() * * To exchange ocfs2 versioning, we use the LVB of the version dlm lock. * The algorithm is: * 1. Attempt to take the lock in EX mode (non-blocking). * 2. If successful (which means it is the first mount), write the * version number and downconvert to PR lock. * 3. If unsuccessful (returns -EAGAIN), read the version from the LVB after * taking the PR lock.
*/
rc = dlm_new_lockspace(conn->cc_name, conn->cc_cluster_name,
DLM_LSFL_NEWEXCL, DLM_LVB_LEN,
&ocfs2_ls_ops, conn, &ops_rv, &fsdlm); if (rc) { if (rc == -EEXIST || rc == -EPROTO)
printk(KERN_ERR "ocfs2: Unable to create the " "lockspace %s (%d), because a ocfs2-tools " "program is running on this file system " "with the same name lockspace\n",
conn->cc_name, rc); goto out;
}
if (ops_rv == -EOPNOTSUPP) {
lc->oc_type = WITH_CONTROLD;
printk(KERN_NOTICE "ocfs2: You seem to be using an older " "version of dlm_controld and/or ocfs2-tools." " Please consider upgrading.\n");
} elseif (ops_rv) {
rc = ops_rv; goto out;
}
conn->cc_lockspace = fsdlm;
rc = ocfs2_live_connection_attach(conn, lc); if (rc) goto out;
if (lc->oc_type == NO_CONTROLD) {
rc = get_protocol_version(conn); if (rc) {
printk(KERN_ERR "ocfs2: Could not determine" " locking version\n");
user_cluster_disconnect(conn);
lc = NULL; goto out;
}
wait_event(lc->oc_wait, (atomic_read(&lc->oc_this_node) > 0));
}
/* * running_proto must have been set before we allowed any mounts * to proceed.
*/ if (fs_protocol_compare(&running_proto, &conn->cc_version)) {
printk(KERN_ERR "Unable to mount with fs locking protocol version " "%u.%u because negotiated protocol is %u.%u\n",
conn->cc_version.pv_major, conn->cc_version.pv_minor,
running_proto.pv_major, running_proto.pv_minor);
rc = -EPROTO;
ocfs2_live_connection_drop(lc);
lc = NULL;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.