msg.msg_iter = *it; while (iov_iter_count(it)) {
ret = sock_recvmsg(sock, &msg, msg.msg_flags); if (ret <= 0) { if (ret == -EAGAIN)
ret = 0; return ret;
}
iov_iter_advance(it, ret);
}
WARN_ON(msg_data_left(&msg)); return 1;
}
/* * Read as much as possible. * * Return: * 1 - done, nothing (else) to read * 0 - socket is empty, need to wait * <0 - error
*/ staticint ceph_tcp_recv(struct ceph_connection *con)
{ int ret;
dout("%s con %p %s %zu\n", __func__, con,
iov_iter_is_discard(&con->v2.in_iter) ? "discard" : "need",
iov_iter_count(&con->v2.in_iter));
ret = do_recvmsg(con->sock, &con->v2.in_iter);
dout("%s con %p ret %d left %zu\n", __func__, con, ret,
iov_iter_count(&con->v2.in_iter)); return ret;
}
if (WARN_ON(!iov_iter_is_bvec(it))) return -EINVAL;
while (iov_iter_count(it)) { /* iov_iter_iovec() for ITER_BVEC */
bvec_set_page(&bv, it->bvec->bv_page,
min(iov_iter_count(it),
it->bvec->bv_len - it->iov_offset),
it->bvec->bv_offset + it->iov_offset);
/* * MSG_SPLICE_PAGES cannot properly handle pages with * page_count == 0, we need to fall back to sendmsg if * that's the case. * * Same goes for slab pages: skb_can_coalesce() allows * coalescing neighboring slab objects into a single frag * which triggers one of hardened usercopy checks.
*/ if (sendpage_ok(bv.bv_page))
msg.msg_flags |= MSG_SPLICE_PAGES; else
msg.msg_flags &= ~MSG_SPLICE_PAGES;
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bv, 1, bv.bv_len);
ret = sock_sendmsg(sock, &msg); if (ret <= 0) { if (ret == -EAGAIN)
ret = 0; return ret;
}
iov_iter_advance(it, ret);
}
return 1;
}
/* * Write as much as possible. The socket is expected to be corked, * so we don't bother with MSG_MORE here. * * Return: * 1 - done, nothing (else) to write * 0 - socket is full, need to wait * <0 - error
*/ staticint ceph_tcp_send(struct ceph_connection *con)
{ int ret;
dout("%s con %p have %zu try_sendpage %d\n", __func__, con,
iov_iter_count(&con->v2.out_iter), con->v2.out_iter_sendpage); if (con->v2.out_iter_sendpage)
ret = do_try_sendpage(con->sock, &con->v2.out_iter); else
ret = do_sendmsg(con->sock, &con->v2.out_iter);
dout("%s con %p ret %d left %zu\n", __func__, con, ret,
iov_iter_count(&con->v2.out_iter)); return ret;
}
/* * Discards trailing empty segments, unless there is just one segment. * A frame always has at least one (possibly empty) segment.
*/ staticint calc_segment_count(constint *lens, int len_cnt)
{ int i;
for (i = len_cnt - 1; i >= 0; i--) { if (lens[i]) return i + 1;
}
return 1;
}
staticvoid init_frame_desc(struct ceph_frame_desc *desc, int tag, constint *lens, int len_cnt)
{ int i;
memset(desc, 0, sizeof(*desc));
desc->fd_tag = tag;
desc->fd_seg_cnt = calc_segment_count(lens, len_cnt);
BUG_ON(desc->fd_seg_cnt > CEPH_FRAME_MAX_SEGMENT_COUNT); for (i = 0; i < desc->fd_seg_cnt; i++) {
desc->fd_lens[i] = lens[i];
desc->fd_aligns[i] = frame_aligns[i];
}
}
/* * Preamble crc covers everything up to itself (28 bytes) and * is calculated and verified irrespective of the connection mode * (i.e. even if the frame is encrypted).
*/ staticvoid encode_preamble(conststruct ceph_frame_desc *desc, void *p)
{ void *crcp = p + CEPH_PREAMBLE_LEN - CEPH_CRC_LEN; void *start = p; int i;
memset(p, 0, CEPH_PREAMBLE_LEN);
ceph_encode_8(&p, desc->fd_tag);
ceph_encode_8(&p, desc->fd_seg_cnt); for (i = 0; i < desc->fd_seg_cnt; i++) {
ceph_encode_32(&p, desc->fd_lens[i]);
ceph_encode_16(&p, desc->fd_aligns[i]);
}
/* * This would fire for FRAME_TAG_WAIT (it has one empty * segment), but we should never get it as client.
*/ if (!desc->fd_lens[desc->fd_seg_cnt - 1]) {
pr_err("last segment empty, segment count %d\n",
desc->fd_seg_cnt); return -EINVAL;
}
late_status = ceph_decode_8(&p); if ((late_status & FRAME_LATE_STATUS_ABORTED_MASK) !=
FRAME_LATE_STATUS_COMPLETE) { /* we should never get an aborted message as client */
pr_err("bad late_status 0x%x\n", late_status); return -EINVAL;
}
if (con_secret_len < CEPH_GCM_KEY_LEN + 2 * CEPH_GCM_IV_LEN) {
pr_err("con_secret too small %d\n", con_secret_len); return -EINVAL;
}
noio_flag = memalloc_noio_save();
con->v2.gcm_tfm = crypto_alloc_aead("gcm(aes)", 0, 0);
memalloc_noio_restore(noio_flag); if (IS_ERR(con->v2.gcm_tfm)) {
ret = PTR_ERR(con->v2.gcm_tfm);
con->v2.gcm_tfm = NULL;
pr_err("failed to allocate gcm tfm context: %d\n", ret); return ret;
}
WARN_ON((unsignedlong)con_secret &
crypto_aead_alignmask(con->v2.gcm_tfm));
ret = crypto_aead_setkey(con->v2.gcm_tfm, con_secret, CEPH_GCM_KEY_LEN); if (ret) {
pr_err("failed to set gcm key: %d\n", ret); return ret;
}
WARN_ON(crypto_aead_ivsize(con->v2.gcm_tfm) != CEPH_GCM_IV_LEN);
ret = crypto_aead_setauthsize(con->v2.gcm_tfm, CEPH_GCM_TAG_LEN); if (ret) {
pr_err("failed to set gcm tag size: %d\n", ret); return ret;
}
con->v2.gcm_req = aead_request_alloc(con->v2.gcm_tfm, GFP_NOIO); if (!con->v2.gcm_req) {
pr_err("failed to allocate gcm request\n"); return -ENOMEM;
}
/** * init_sgs_pages: set up scatterlist on an array of page pointers * @sg: scatterlist to populate * @pages: pointer to page array * @dpos: position in the array to start (bytes) * @dlen: len to add to sg (bytes) * @pad: pointer to pad destination (if any) * * Populate the scatterlist from the page array, starting at an arbitrary * byte in the array and running for a specified length.
*/ staticvoid init_sgs_pages(struct scatterlist **sg, struct page **pages, int dpos, int dlen, u8 *pad)
{ int idx = dpos >> PAGE_SHIFT; int off = offset_in_page(dpos); int resid = dlen;
do { int len = min(resid, (int)PAGE_SIZE - off);
sg_set_page(*sg, pages[idx], len, off);
*sg = sg_next(*sg);
off = 0;
++idx;
resid -= len;
} while (resid);
/* Process sparse read data that lives in a buffer */ staticint process_v2_sparse_read(struct ceph_connection *con, struct page **pages, int spos)
{ struct ceph_msg_data_cursor *cursor = &con->v2.in_cursor; int ret;
for (;;) { char *buf = NULL;
ret = con->ops->sparse_read(con, cursor, &buf); if (ret <= 0) return ret;
do { int idx = spos >> PAGE_SHIFT; int soff = offset_in_page(spos); struct page *spage = con->v2.in_enc_pages[idx]; int len = min_t(int, ret, PAGE_SIZE - soff);
/* * base: * preamble * control body (ctrl_len bytes) * space for control crc * * extdata (optional): * control body (extdata_len bytes) * * Compute control crc and gather base and extdata into: * * preamble * control body (ctrl_len + extdata_len bytes) * control crc * * Preamble should already be encoded at the start of base.
*/ staticvoid prepare_head_plain(struct ceph_connection *con, void *base, int ctrl_len, void *extdata, int extdata_len, bool to_be_signed)
{ int base_len = CEPH_PREAMBLE_LEN + ctrl_len + CEPH_CRC_LEN; void *crcp = base + base_len - CEPH_CRC_LEN;
u32 crc;
/* * base: * preamble * control body (ctrl_len bytes) * space for padding, if needed * space for control remainder auth tag * space for preamble auth tag * * Encrypt preamble and the inline portion, then encrypt the remainder * and gather into: * * preamble * control body (48 bytes) * preamble auth tag * control body (ctrl_len - 48 bytes) * zero padding, if needed * control remainder auth tag * * Preamble should already be encoded at the start of base.
*/ staticint prepare_head_secure_big(struct ceph_connection *con, void *base, int ctrl_len)
{ int rem_len = ctrl_len - CEPH_PREAMBLE_INLINE_LEN; void *rem = CTRL_BODY(base) + CEPH_PREAMBLE_INLINE_LEN; void *rem_tag = rem + padded_len(rem_len); void *pmbl_tag = rem_tag + CEPH_GCM_TAG_LEN; struct scatterlist sgs[2]; int ret;
sg_init_table(sgs, 2);
sg_set_buf(&sgs[0], base, rem - base);
sg_set_buf(&sgs[1], pmbl_tag, CEPH_GCM_TAG_LEN);
ret = gcm_crypt(con, true, sgs, sgs, rem - base); if (ret) return ret;
/* control remainder padding? */ if (need_padding(rem_len))
memset(rem + rem_len, 0, padding_len(rem_len));
sg_init_one(&sgs[0], rem, pmbl_tag - rem);
ret = gcm_crypt(con, true, sgs, sgs, rem_tag - rem); if (ret) return ret;
if (!front_len(msg) && !middle_len(msg)) { if (!data_len(msg)) { /* * Empty message: once the head is written, * we are done -- there is no epilogue.
*/
con->v2.out_state = OUT_S_FINISH_MESSAGE; return;
}
/* * Unfortunately the kernel crypto API doesn't support streaming * (piecewise) operation for AEAD algorithms, so we can't get away * with a fixed size buffer and a couple sgs. Instead, we have to * allocate pages for the entire tail of the message (currently up * to ~32M) and two sgs arrays (up to ~256K each)...
*/ staticint prepare_message_secure(struct ceph_connection *con)
{ void *zerop = page_address(ceph_zero_page); struct sg_table enc_sgt = {}; struct sg_table sgt = {}; struct page **enc_pages; int enc_page_cnt; int tail_len; int ret;
ret = prepare_head_secure_small(con, con->v2.out_buf, sizeof(struct ceph_msg_header2)); if (ret) return ret;
tail_len = tail_onwire_len(con->out_msg, true); if (!tail_len) { /* * Empty message: once the head is written, * we are done -- there is no epilogue.
*/
con->v2.out_state = OUT_S_FINISH_MESSAGE; return 0;
}
encode_epilogue_secure(con, false);
ret = setup_message_sgs(&sgt, con->out_msg, zerop, zerop, zerop,
&con->v2.out_epil, NULL, 0, false); if (ret) goto out;
enc_page_cnt = calc_pages_for(0, tail_len);
enc_pages = ceph_alloc_page_vector(enc_page_cnt, GFP_NOIO); if (IS_ERR(enc_pages)) {
ret = PTR_ERR(enc_pages); goto out;
}
if (con->v2.in_enc_resid) {
con->v2.in_state = IN_S_PREPARE_READ_ENC_PAGE; return;
}
/* * We are set to read the last piece of ciphertext (ending * with epilogue) + auth tag.
*/
WARN_ON(con->v2.in_enc_i != con->v2.in_enc_page_cnt);
con->v2.in_state = IN_S_HANDLE_EPILOGUE;
}
staticint prepare_read_tail_secure(struct ceph_connection *con)
{ struct page **enc_pages; int enc_page_cnt; int tail_len;
if (req_feat & ~server_feat) {
pr_err("msgr2 feature set mismatch: my required > server's supported 0x%llx, need 0x%llx\n",
server_feat, req_feat & ~server_feat);
con->error_msg = "missing required protocol features"; return -EINVAL;
} if (server_req_feat & ~feat) {
pr_err("msgr2 feature set mismatch: server's required > my supported 0x%llx, missing 0x%llx\n",
feat, server_req_feat & ~feat);
con->error_msg = "missing required protocol features"; return -EINVAL;
}
/* no reset_out_kvecs() as our banner may still be pending */
ret = prepare_hello(con); if (ret) {
pr_err("prepare_hello failed: %d\n", ret); return ret;
}
/* * Set our address to the address our first peer (i.e. monitor) * sees that we are connecting from. If we are behind some sort * of NAT and want to be identified by some private (not NATed) * address, ip option should be used.
*/ if (ceph_addr_is_blank(my_addr)) {
memcpy(&my_addr->in_addr, &addr_for_me.in_addr, sizeof(my_addr->in_addr));
ceph_addr_set_port(my_addr, 0);
dout("%s con %p set my addr %s, as seen by peer %s\n",
__func__, con, ceph_pr_addr(my_addr),
ceph_pr_addr(&con->peer_addr));
} else {
dout("%s con %p my addr already set %s\n",
__func__, con, ceph_pr_addr(my_addr));
}
/* no reset_out_kvecs() as our hello may still be pending */
ret = prepare_auth_request(con); if (ret) { if (ret != -EAGAIN)
pr_err("prepare_auth_request failed: %d\n", ret); return ret;
}
con->state = CEPH_CON_S_V2_AUTH; return 0;
bad:
pr_err("failed to decode hello\n"); return -EINVAL;
}
staticint process_auth_bad_method(struct ceph_connection *con, void *p, void *end)
{ int allowed_protos[8], allowed_modes[8]; int allowed_proto_cnt, allowed_mode_cnt; int used_proto, result; int ret; int i;
ceph_decode_32_safe(&p, end, used_proto, bad);
ceph_decode_32_safe(&p, end, result, bad);
dout("%s con %p used_proto %d result %d\n", __func__, con, used_proto,
result);
ceph_decode_32_safe(&p, end, allowed_proto_cnt, bad); if (allowed_proto_cnt > ARRAY_SIZE(allowed_protos)) {
pr_err("allowed_protos too big %d\n", allowed_proto_cnt); return -EINVAL;
} for (i = 0; i < allowed_proto_cnt; i++) {
ceph_decode_32_safe(&p, end, allowed_protos[i], bad);
dout("%s con %p allowed_protos[%d] %d\n", __func__, con,
i, allowed_protos[i]);
}
ceph_decode_32_safe(&p, end, allowed_mode_cnt, bad); if (allowed_mode_cnt > ARRAY_SIZE(allowed_modes)) {
pr_err("allowed_modes too big %d\n", allowed_mode_cnt); return -EINVAL;
} for (i = 0; i < allowed_mode_cnt; i++) {
ceph_decode_32_safe(&p, end, allowed_modes[i], bad);
dout("%s con %p allowed_modes[%d] %d\n", __func__, con,
i, allowed_modes[i]);
}
mutex_unlock(&con->mutex);
ret = con->ops->handle_auth_bad_method(con, used_proto, result,
allowed_protos,
allowed_proto_cnt,
allowed_modes,
allowed_mode_cnt);
mutex_lock(&con->mutex); if (con->state != CEPH_CON_S_V2_AUTH) {
dout("%s con %p state changed to %d\n", __func__, con,
con->state); return -EAGAIN;
}
dout("%s con %p handle_auth_bad_method ret %d\n", __func__, con, ret); return ret;
bad:
pr_err("failed to decode auth_bad_method\n"); return -EINVAL;
}
staticint process_auth_reply_more(struct ceph_connection *con, void *p, void *end)
{ int payload_len; int ret;
dout("%s con %p payload_len %d\n", __func__, con, payload_len);
reset_out_kvecs(con);
ret = prepare_auth_request_more(con, p, payload_len); if (ret) { if (ret != -EAGAIN)
pr_err("prepare_auth_request_more failed: %d\n", ret); return ret;
}
return 0;
bad:
pr_err("failed to decode auth_reply_more\n"); return -EINVAL;
}
/* * Align session_key and con_secret to avoid GFP_ATOMIC allocation * inside crypto_shash_setkey() and crypto_aead_setkey() called from * setup_crypto(). __aligned(16) isn't guaranteed to work for stack * objects, so do it by hand.
*/ staticint process_auth_done(struct ceph_connection *con, void *p, void *end)
{
u8 session_key_buf[CEPH_KEY_LEN + 16];
u8 con_secret_buf[CEPH_MAX_CON_SECRET_LEN + 16];
u8 *session_key = PTR_ALIGN(&session_key_buf[0], 16);
u8 *con_secret = PTR_ALIGN(&con_secret_buf[0], 16); int session_key_len, con_secret_len; int payload_len;
u64 global_id; int ret;
ret = ceph_hmac_sha256(con, con->v2.out_sign_kvecs,
con->v2.out_sign_kvec_cnt, hmac); if (ret) return ret;
ceph_decode_need(&p, end, SHA256_DIGEST_SIZE, bad); if (crypto_memneq(p, hmac, SHA256_DIGEST_SIZE)) {
con->error_msg = "integrity error, bad auth signature"; return -EBADMSG;
}
dout("%s con %p auth signature ok\n", __func__, con);
/* no reset_out_kvecs() as our auth_signature may still be pending */ if (!con->v2.server_cookie) {
ret = prepare_client_ident(con); if (ret) {
pr_err("prepare_client_ident failed: %d\n", ret); return ret;
}
con->state = CEPH_CON_S_V2_SESSION_CONNECT;
} else {
ret = prepare_session_reconnect(con); if (ret) {
pr_err("prepare_session_reconnect failed: %d\n", ret); return ret;
}
con->state = CEPH_CON_S_V2_SESSION_RECONNECT;
}
return 0;
bad:
pr_err("failed to decode auth_signature\n"); return -EINVAL;
}
/* is this who we intended to talk to? */ if (memcmp(&addr, &con->peer_addr, sizeof(con->peer_addr))) {
pr_err("bad peer addr/nonce, want %s/%u, got %s/%u\n",
ceph_pr_addr(&con->peer_addr),
le32_to_cpu(con->peer_addr.nonce),
ceph_pr_addr(&addr), le32_to_cpu(addr.nonce));
con->error_msg = "wrong peer at address"; return -EINVAL;
}
if (client->required_features & ~features) {
pr_err("RADOS feature set mismatch: my required > server's supported 0x%llx, need 0x%llx\n",
features, client->required_features & ~features);
con->error_msg = "missing required protocol features"; return -EINVAL;
}
/* * Both name->type and name->num are set in ceph_con_open() but * name->num may be bogus in the initial monmap. name->type is * verified in handle_hello().
*/
WARN_ON(!con->peer_name.type);
con->peer_name.num = cpu_to_le64(global_id);
con->v2.peer_global_seq = global_seq;
con->peer_features = features;
WARN_ON(required_features & ~client->supported_features);
con->v2.server_cookie = cookie;
mutex_unlock(&con->mutex); if (con->ops->peer_reset)
con->ops->peer_reset(con);
mutex_lock(&con->mutex); if (con->state != CEPH_CON_S_V2_SESSION_RECONNECT) {
dout("%s con %p state changed to %d\n", __func__, con,
con->state); return -EAGAIN;
}
free_conn_bufs(con);
reset_out_kvecs(con);
ret = prepare_client_ident(con); if (ret) {
pr_err("prepare_client_ident (rst) failed: %d\n", ret); return ret;
}
bad:
pr_err("failed to decode ack\n"); return -EINVAL;
}
staticint process_control(struct ceph_connection *con, void *p, void *end)
{ int tag = con->v2.in_desc.fd_tag; int ret;
dout("%s con %p tag %d len %d\n", __func__, con, tag, (int)(end - p));
switch (tag) { case FRAME_TAG_HELLO:
ret = process_hello(con, p, end); break; case FRAME_TAG_AUTH_BAD_METHOD:
ret = process_auth_bad_method(con, p, end); break; case FRAME_TAG_AUTH_REPLY_MORE:
ret = process_auth_reply_more(con, p, end); break; case FRAME_TAG_AUTH_DONE:
ret = process_auth_done(con, p, end); break; case FRAME_TAG_AUTH_SIGNATURE:
ret = process_auth_signature(con, p, end); break; case FRAME_TAG_SERVER_IDENT:
ret = process_server_ident(con, p, end); break; case FRAME_TAG_IDENT_MISSING_FEATURES:
ret = process_ident_missing_features(con, p, end); break; case FRAME_TAG_SESSION_RECONNECT_OK:
ret = process_session_reconnect_ok(con, p, end); break; case FRAME_TAG_SESSION_RETRY:
ret = process_session_retry(con, p, end); break; case FRAME_TAG_SESSION_RETRY_GLOBAL:
ret = process_session_retry_global(con, p, end); break; case FRAME_TAG_SESSION_RESET:
ret = process_session_reset(con, p, end); break; case FRAME_TAG_KEEPALIVE2_ACK:
ret = process_keepalive2_ack(con, p, end); break; case FRAME_TAG_ACK:
ret = process_ack(con, p, end); break; default:
pr_err("bad tag %d\n", tag);
con->error_msg = "protocol error, bad tag"; return -EINVAL;
} if (ret) {
dout("%s con %p error %d\n", __func__, con, ret); return ret;
}
/* * We could have been closed by ceph_con_close() because * ceph_con_process_message() temporarily drops con->mutex.
*/ if (con->state != CEPH_CON_S_OPEN) {
dout("%s con %p state changed to %d\n", __func__, con,
con->state); return -EAGAIN;
}
prepare_read_preamble(con); return 0;
}
staticint __handle_control(struct ceph_connection *con, void *p)
{ void *end = p + con->v2.in_desc.fd_lens[0]; struct ceph_msg *msg; int ret;
if (con->v2.in_desc.fd_tag != FRAME_TAG_MESSAGE) return process_control(con, p, end);
ret = process_message_header(con, p, end); if (ret < 0) return ret; if (ret == 0) {
prepare_skip_message(con); return 0;
}
if (con_secure(con)) {
ret = decrypt_preamble(con); if (ret) { if (ret == -EBADMSG)
con->error_msg = "integrity error, bad preamble auth tag"; return ret;
}
}
ret = decode_preamble(con->v2.in_buf, desc); if (ret) { if (ret == -EBADMSG)
con->error_msg = "integrity error, bad crc"; else
con->error_msg = "protocol error, bad preamble"; return ret;
}
dout("%s con %p tag %d seg_cnt %d %d+%d+%d+%d\n", __func__,
con, desc->fd_tag, desc->fd_seg_cnt, desc->fd_lens[0],
desc->fd_lens[1], desc->fd_lens[2], desc->fd_lens[3]);
if (!con_secure(con)) return prepare_read_control(con);
if (desc->fd_lens[0] > CEPH_PREAMBLE_INLINE_LEN) return prepare_read_control_remainder(con);
staticint handle_control_remainder(struct ceph_connection *con)
{ int ret;
WARN_ON(!con_secure(con));
ret = decrypt_control_remainder(con); if (ret) { if (ret == -EBADMSG)
con->error_msg = "integrity error, bad control remainder auth tag"; return ret;
}
if (con_secure(con)) {
ret = decrypt_tail(con); if (ret) { if (ret == -EBADMSG)
con->error_msg = "integrity error, bad epilogue auth tag"; return ret;
}
/* just late_status */
ret = decode_epilogue(con->v2.in_buf, NULL, NULL, NULL); if (ret) {
con->error_msg = "protocol error, bad epilogue"; return ret;
}
} else {
ret = decode_epilogue(con->v2.in_buf, &front_crc,
&middle_crc, &data_crc); if (ret) {
con->error_msg = "protocol error, bad epilogue"; return ret;
}
ret = verify_epilogue_crcs(con, front_crc, middle_crc,
data_crc); if (ret) {
con->error_msg = "integrity error, bad crc"; return ret;
}
}
return process_message(con);
}
staticvoid finish_skip(struct ceph_connection *con)
{
dout("%s con %p\n", __func__, con);
if (con_secure(con))
gcm_inc_nonce(&con->v2.in_gcm_nonce);
__finish_skip(con);
}
staticint populate_in_iter(struct ceph_connection *con)
{ int ret;
dout("%s con %p state %d in_state %d\n", __func__, con, con->state,
con->v2.in_state);
WARN_ON(iov_iter_count(&con->v2.in_iter));
if (con->state == CEPH_CON_S_V2_BANNER_PREFIX) {
ret = process_banner_prefix(con);
} elseif (con->state == CEPH_CON_S_V2_BANNER_PAYLOAD) {
ret = process_banner_payload(con);
} elseif ((con->state >= CEPH_CON_S_V2_HELLO &&
con->state <= CEPH_CON_S_V2_SESSION_RECONNECT) ||
con->state == CEPH_CON_S_OPEN) { switch (con->v2.in_state) { case IN_S_HANDLE_PREAMBLE:
ret = handle_preamble(con); break; case IN_S_HANDLE_CONTROL:
ret = handle_control(con); break; case IN_S_HANDLE_CONTROL_REMAINDER:
ret = handle_control_remainder(con); break; case IN_S_PREPARE_READ_DATA:
ret = prepare_read_data(con); break; case IN_S_PREPARE_READ_DATA_CONT:
prepare_read_data_cont(con);
ret = 0; break; case IN_S_PREPARE_READ_ENC_PAGE:
prepare_read_enc_page(con);
ret = 0; break; case IN_S_PREPARE_SPARSE_DATA:
ret = prepare_sparse_read_data(con); break; case IN_S_PREPARE_SPARSE_DATA_CONT:
ret = prepare_sparse_read_cont(con); break; case IN_S_HANDLE_EPILOGUE:
ret = handle_epilogue(con); break; case IN_S_FINISH_SKIP:
finish_skip(con);
ret = 0; break; default:
WARN(1, "bad in_state %d", con->v2.in_state); return -EINVAL;
}
} else {
WARN(1, "bad state %d", con->state); return -EINVAL;
} if (ret) {
dout("%s con %p error %d\n", __func__, con, ret); return ret;
}
if (WARN_ON(!iov_iter_count(&con->v2.in_iter))) return -ENODATA;
dout("%s con %p populated %zu\n", __func__, con,
iov_iter_count(&con->v2.in_iter)); return 1;
}
int ceph_con_v2_try_read(struct ceph_connection *con)
{ int ret;
dout("%s con %p state %d need %zu\n", __func__, con, con->state,
iov_iter_count(&con->v2.in_iter));
if (con->state == CEPH_CON_S_PREOPEN) return 0;
/* * We should always have something pending here. If not, * avoid calling populate_in_iter() as if we read something * (ceph_tcp_recv() would immediately return 1).
*/ if (WARN_ON(!iov_iter_count(&con->v2.in_iter))) return -ENODATA;
for (;;) {
ret = ceph_tcp_recv(con); if (ret <= 0) return ret;
ret = populate_in_iter(con); if (ret <= 0) { if (ret && ret != -EAGAIN && !con->error_msg)
con->error_msg = "read processing error"; return ret;
}
}
}
/* * We've written all data. Queue epilogue. Once it's written, * we are done.
*/
reset_out_kvecs(con);
prepare_epilogue_plain(con, false);
con->v2.out_state = OUT_S_FINISH_MESSAGE;
}
if (con->v2.out_enc_resid) {
WARN_ON(con->v2.out_state != OUT_S_QUEUE_ENC_PAGE); return;
}
/* * We've queued the last piece of ciphertext (ending with * epilogue) + auth tag. Once it's written, we are done.
*/
WARN_ON(con->v2.out_enc_i != con->v2.out_enc_page_cnt);
con->v2.out_state = OUT_S_FINISH_MESSAGE;
}
/* * We've zero-filled everything up to epilogue. Queue epilogue * with late_status set to ABORTED and crcs adjusted for zeros. * Once it's written, we are done patching up for the revoke.
*/
reset_out_kvecs(con);
prepare_epilogue_plain(con, true);
con->v2.out_state = OUT_S_FINISH_MESSAGE;
}
/* we end up here both plain and secure modes */ if (con->v2.out_enc_pages) {
WARN_ON(!con->v2.out_enc_page_cnt);
ceph_release_page_vector(con->v2.out_enc_pages,
con->v2.out_enc_page_cnt);
con->v2.out_enc_pages = NULL;
con->v2.out_enc_page_cnt = 0;
} /* message may have been revoked */ if (con->out_msg) {
ceph_msg_put(con->out_msg);
con->out_msg = NULL;
}
con->v2.out_state = OUT_S_GET_NEXT;
}
staticint populate_out_iter(struct ceph_connection *con)
{ int ret;
dout("%s con %p state %d out_state %d\n", __func__, con, con->state,
con->v2.out_state);
WARN_ON(iov_iter_count(&con->v2.out_iter));
int ceph_con_v2_try_write(struct ceph_connection *con)
{ int ret;
dout("%s con %p state %d have %zu\n", __func__, con, con->state,
iov_iter_count(&con->v2.out_iter));
/* open the socket first? */ if (con->state == CEPH_CON_S_PREOPEN) {
WARN_ON(con->peer_addr.type != CEPH_ENTITY_ADDR_TYPE_MSGR2);
/* * Always bump global_seq. Bump connect_seq only if * there is a session (i.e. we are reconnecting and will * send session_reconnect instead of client_ident).
*/
con->v2.global_seq = ceph_get_global_seq(con->msgr, 0); if (con->v2.server_cookie)
con->v2.connect_seq++;
ret = prepare_read_banner_prefix(con); if (ret) {
pr_err("prepare_read_banner_prefix failed: %d\n", ret);
con->error_msg = "connect error"; return ret;
}
reset_out_kvecs(con);
ret = prepare_banner(con); if (ret) {
pr_err("prepare_banner failed: %d\n", ret);
con->error_msg = "connect error"; return ret;
}
ret = ceph_tcp_connect(con); if (ret) {
pr_err("ceph_tcp_connect failed: %d\n", ret);
con->error_msg = "connect error"; return ret;
}
}
if (!iov_iter_count(&con->v2.out_iter)) {
ret = populate_out_iter(con); if (ret <= 0) { if (ret && ret != -EAGAIN && !con->error_msg)
con->error_msg = "write processing error"; return ret;
}
}
tcp_sock_set_cork(con->sock->sk, true); for (;;) {
ret = ceph_tcp_send(con); if (ret <= 0) break;
ret = populate_out_iter(con); if (ret <= 0) { if (ret && ret != -EAGAIN && !con->error_msg)
con->error_msg = "write processing error"; break;
}
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.