/* Cfr RFC 8684 Section 3.3.0: * If a checksum is present but its use had * not been negotiated in the MP_CAPABLE handshake, the receiver MUST * close the subflow with a RST, as it is not behaving as negotiated. * If a checksum is not present when its use has been negotiated, the * receiver MUST close the subflow with a RST, as it is considered * broken * We parse even option with mismatching csum presence, so that * later in subflow_data_ready we can trigger the reset.
*/ if (opsize != expected_opsize &&
(expected_opsize != TCPOLEN_MPTCP_MPC_ACK_DATA ||
opsize != TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM)) break;
/* try to be gentle vs future versions on the initial syn */
version = *ptr++ & MPTCP_VERSION_MASK; if (opsize != TCPOLEN_MPTCP_MPC_SYN) { if (version != MPTCP_SUPPORTED_VERSION) break;
} elseif (version < MPTCP_SUPPORTED_VERSION) { break;
}
flags = *ptr++; if (!mptcp_cap_flag_sha256(flags) ||
(flags & MPTCP_CAP_EXTENSIBILITY)) break;
/* RFC 6824, Section 3.1: * "For the Checksum Required bit (labeled "A"), if either * host requires the use of checksums, checksums MUST be used. * In other words, the only way for checksums not to be used * is if both hosts in their SYNs set A=0."
*/ if (flags & MPTCP_CAP_CHECKSUM_REQD)
mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD;
/* Ensure that casting the whole status to u32 is efficient and safe */
BUILD_BUG_ON(sizeof_field(struct mptcp_options_received, status) != sizeof(u32));
BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct mptcp_options_received, status), sizeof(u32)));
*(u32 *)&mp_opt->status = 0;
/* When skb is not available, we better over-estimate the emitted * options len. A full DSS option (28 bytes) is longer than * TCPOLEN_MPTCP_MPC_ACK_DATA(22) or TCPOLEN_MPTCP_MPJ_ACK(24), so * tell the caller to defer the estimate to * mptcp_established_options_dss(), which will reserve enough space.
*/ if (!skb) returnfalse;
/* MPC/MPJ needed only on 3rd ack packet, DATA_FIN and TCP shutdown take precedence */ if (READ_ONCE(subflow->fully_established) || snd_data_fin_enable ||
subflow->snd_isn != TCP_SKB_CB(skb)->seq ||
sk->sk_state != TCP_ESTABLISHED) returnfalse;
/* we will check ops->data_len in mptcp_write_options() to * discriminate between TCPOLEN_MPTCP_MPC_ACK_DATA and * TCPOLEN_MPTCP_MPC_ACK
*/
opts->data_len = data_len;
opts->suboptions = OPTION_MPTCP_MPC_ACK;
opts->sndr_key = subflow->local_key;
opts->rcvr_key = subflow->remote_key;
opts->csum_reqd = READ_ONCE(msk->csum_enabled);
opts->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk));
/* Section 3.1. * The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK * packets that start the first subflow of an MPTCP connection, * as well as the first packet that carries data
*/ if (data_len > 0) {
len = TCPOLEN_MPTCP_MPC_ACK_DATA; if (opts->csum_reqd) { /* we need to propagate more info to csum the pseudo hdr */
opts->data_seq = mpext->data_seq;
opts->subflow_seq = mpext->subflow_seq;
opts->csum = mpext->csum;
len += TCPOLEN_MPTCP_DSS_CHECKSUM;
}
*size = ALIGN(len, 4);
} else {
*size = TCPOLEN_MPTCP_MPC_ACK;
}
/* we can use the full delegate action helper only from BH context * If we are in process context - sk is flushing the backlog at * socket lock release time - just set the appropriate flag, will * be handled by the release callback
*/ if (sock_owned_by_user(sk))
set_bit(MPTCP_DELEGATE_ACK, &subflow->delegated_status); else
mptcp_subflow_delegate(subflow, MPTCP_DELEGATE_ACK); returntrue;
} returnfalse;
}
staticvoid mptcp_write_data_fin(struct mptcp_subflow_context *subflow, struct sk_buff *skb, struct mptcp_ext *ext)
{ /* The write_seq value has already been incremented, so the actual * sequence number for the DATA_FIN is one less.
*/
u64 data_fin_tx_seq = READ_ONCE(mptcp_sk(subflow->conn)->write_seq) - 1;
if (!ext->use_map || !skb->len) { /* RFC6824 requires a DSS mapping with specific values * if DATA_FIN is set but no data payload is mapped
*/
ext->data_fin = 1;
ext->use_map = 1;
ext->dsn64 = 1;
ext->data_seq = data_fin_tx_seq;
ext->subflow_seq = 0;
ext->data_len = 1;
} elseif (ext->data_seq + ext->data_len == data_fin_tx_seq) { /* If there's an existing DSS mapping and it is the * final mapping, DATA_FIN consumes 1 additional byte of * mapping space.
*/
ext->data_fin = 1;
ext->data_len++;
}
}
if (mpext) { if (opts->csum_reqd)
map_size += TCPOLEN_MPTCP_DSS_CHECKSUM;
opts->ext_copy = *mpext;
}
dss_size = map_size; if (skb && snd_data_fin_enable)
mptcp_write_data_fin(subflow, skb, &opts->ext_copy);
opts->suboptions = OPTION_MPTCP_DSS;
ret = true;
}
/* passive sockets msk will set the 'can_ack' after accept(), even * if the first subflow may have the already the remote key handy
*/
opts->ext_copy.use_ack = 0; if (!READ_ONCE(msk->can_ack)) {
*size = ALIGN(dss_size, 4); return ret;
}
/* add addr will strip the existing options, be sure to avoid breaking * MPC/MPJ handshakes
*/ if (!mptcp_pm_should_add_signal(msk) ||
(opts->suboptions & (OPTION_MPTCP_MPJ_ACK | OPTION_MPTCP_MPC_ACK)) ||
!mptcp_pm_add_addr_signal(msk, skb, opt_size, remaining, &addr,
&echo, &drop_other_suboptions)) returnfalse;
/* * Later on, mptcp_write_options() will enforce mutually exclusion with * DSS, bail out if such option is set and we can't drop it.
*/ if (drop_other_suboptions)
remaining += opt_size; elseif (opts->suboptions & OPTION_MPTCP_DSS) returnfalse;
len = mptcp_add_addr_len(addr.family, echo, !!addr.port); if (remaining < len) returnfalse;
*size = len; if (drop_other_suboptions) {
pr_debug("drop other suboptions\n");
opts->suboptions = 0;
/* note that e.g. DSS could have written into the memory * aliased by ahmac, we must reset the field here * to avoid appending the hmac even for ADD_ADDR echo * options
*/
opts->ahmac = 0;
*size -= opt_size;
}
opts->addr = addr;
opts->suboptions |= OPTION_MPTCP_ADD_ADDR; if (!echo) {
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDRTX);
opts->ahmac = add_addr_generate_hmac(READ_ONCE(msk->local_key),
READ_ONCE(msk->remote_key),
&opts->addr);
} else {
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADDTX);
}
pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d\n",
opts->addr.id, opts->ahmac, echo, ntohs(opts->addr.port));
for (i = 0; i < opts->rm_list.nr; i++)
pr_debug("rm_list_ids[%d]=%d\n", i, opts->rm_list.ids[i]);
MPTCP_ADD_STATS(sock_net(sk), MPTCP_MIB_RMADDRTX, opts->rm_list.nr); returntrue;
}
/* can't send MP_PRIO with MPC, as they share the same option space: * 'backup'. Also it makes no sense at all
*/ if (!subflow->send_mp_prio || (opts->suboptions & OPTIONS_MPTCP_MPC)) returnfalse;
/* account for the trailing 'nop' option */ if (remaining < TCPOLEN_MPTCP_PRIO_ALIGN) returnfalse;
if (unlikely(__mptcp_check_fallback(msk) && !mptcp_check_infinite_map(skb))) returnfalse;
if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) { if (mptcp_established_options_fastclose(sk, &opt_size, remaining, opts) ||
mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
*size += opt_size;
remaining -= opt_size;
} /* MP_RST can be used with MP_FASTCLOSE and MP_FAIL if there is room */ if (mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts)) {
*size += opt_size;
remaining -= opt_size;
} returntrue;
}
/* we reserved enough space for the above options, and exceeding the * TCP option space would be fatal
*/ if (WARN_ON_ONCE(opt_size > remaining)) returnfalse;
staticbool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, struct mptcp_subflow_context *subflow, struct sk_buff *skb, struct mptcp_options_received *mp_opt)
{ /* here we can process OoO, in-window pkts, only in-sequence 4th ack * will make the subflow fully established
*/ if (likely(READ_ONCE(subflow->fully_established))) { /* on passive sockets, check for 3rd ack retransmission * note that msk is always set by subflow_syn_recv_sock() * for mp_join subflows
*/ if (TCP_SKB_CB(skb)->seq == subflow->ssn_offset + 1 &&
TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq &&
subflow->mp_join && (mp_opt->suboptions & OPTIONS_MPTCP_MPJ) &&
!subflow->request_join)
tcp_send_ack(ssk); goto check_notify;
}
/* we must process OoO packets before the first subflow is fully * established. OoO packets are instead a protocol violation * for MP_JOIN subflows as the peer must not send any data * before receiving the forth ack - cfr. RFC 8684 section 3.2.
*/ if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1) { if (subflow->mp_join) goto reset; if (subflow->is_mptfo && mp_opt->suboptions & OPTION_MPTCP_MPC_ACK) goto set_fully_established; return subflow->mp_capable;
}
if (subflow->remote_key_valid &&
(((mp_opt->suboptions & OPTION_MPTCP_DSS) && mp_opt->use_ack) ||
((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) &&
(!mp_opt->echo || subflow->mp_join)))) { /* subflows are fully established as soon as we get any * additional ack, including ADD_ADDR.
*/ goto set_fully_established;
}
/* If the first established packet does not contain MP_CAPABLE + data * then fallback to TCP. Fallback scenarios requires a reset for * MP_JOIN subflows.
*/ if (!(mp_opt->suboptions & OPTIONS_MPTCP_MPC)) { if (subflow->mp_join) goto reset;
subflow->mp_capable = 0; if (!mptcp_try_fallback(ssk, MPTCP_MIB_MPCAPABLEDATAFALLBACK)) {
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_FALLBACKFAILED); goto reset;
} returnfalse;
}
if (unlikely(!READ_ONCE(msk->pm.server_side)))
pr_warn_once("bogus mpc option on established client sk");
set_fully_established: if (mp_opt->deny_join_id0)
WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
check_notify: /* if the subflow is not already linked into the conn_list, we can't * notify the PM: this subflow is still on the listener queue * and the PM possibly acquiring the subflow lock could race with * the listener close
*/ if (likely(subflow->pm_notified) || list_empty(&subflow->node)) returntrue;
/* avoid ack expansion on update conflict, to reduce the risk of * wrongly expanding to a future ack sequence number, which is way * more dangerous than missing an ack
*/
old_snd_una = msk->snd_una;
new_snd_una = mptcp_expand_seq(old_snd_una, mp_opt->data_ack, mp_opt->ack64);
/* ACK for data not even sent yet? Ignore.*/ if (unlikely(after64(new_snd_una, snd_nxt)))
new_snd_una = old_snd_una;
new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd;
if (after64(new_wnd_end, msk->wnd_end))
WRITE_ONCE(msk->wnd_end, new_wnd_end);
/* this assumes mptcp_incoming_options() is invoked after tcp_ack() */ if (after64(msk->wnd_end, snd_nxt))
__mptcp_check_push(sk, ssk);
bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit)
{ /* Skip if DATA_FIN was already received. * If updating simultaneously with the recvmsg loop, values * should match. If they mismatch, the peer is misbehaving and * we will prefer the most recent information.
*/ if (READ_ONCE(msk->rcv_data_fin)) returnfalse;
/* Return false in case of error (or subflow has been reset), * else return true.
*/ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
{ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); struct mptcp_options_received mp_opt; struct mptcp_ext *mpext;
if (__mptcp_check_fallback(msk)) { /* Keep it simple and unconditionally trigger send data cleanup and * pending queue spooling. We will need to acquire the data lock * for more accurate checks, and once the lock is acquired, such * helpers are cheap.
*/
mptcp_data_lock(subflow->conn); if (sk_stream_memory_free(sk))
__mptcp_check_push(subflow->conn, sk);
/* on fallback we just need to ignore the msk-level snd_una, as * this is really plain TCP
*/
__mptcp_snd_una_update(msk, READ_ONCE(msk->snd_nxt));
/* The subflow can be in close state only if check_fully_established() * just sent a reset. If so, tell the caller to ignore the current packet.
*/ if (!check_fully_established(msk, sk, subflow, skb, &mp_opt)) return sk->sk_state != TCP_CLOSE;
if (!(mp_opt.suboptions & OPTION_MPTCP_DSS)) returntrue;
}
/* we can't wait for recvmsg() to update the ack_seq, otherwise * monodirectional flows will stuck
*/ if (mp_opt.use_ack)
ack_update_msk(msk, sk, &mp_opt);
/* Zero-data-length packets are dropped by the caller and not * propagated to the MPTCP layer, so the skb extension does not * need to be allocated or populated. DATA_FIN information, if * present, needs to be updated here before the skb is freed.
*/ if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { if (mp_opt.data_fin && mp_opt.data_len == 1 &&
mptcp_update_rcv_data_fin(msk, mp_opt.data_seq, mp_opt.dsn64))
mptcp_schedule_work((struct sock *)msk);
returntrue;
}
mpext = skb_ext_add(skb, SKB_EXT_MPTCP); if (!mpext) returnfalse;
memset(mpext, 0, sizeof(*mpext));
if (likely(mp_opt.use_map)) { if (mp_opt.mpc_map) { /* this is an MP_CAPABLE carrying MPTCP data * we know this map the first chunk of data
*/
mptcp_crypto_key_sha(subflow->remote_key, NULL,
&mpext->data_seq);
mpext->data_seq++;
mpext->subflow_seq = 1;
mpext->dsn64 = 1;
mpext->mpc_map = 1;
mpext->data_fin = 0;
} else {
mpext->data_seq = mp_opt.data_seq;
mpext->subflow_seq = mp_opt.subflow_seq;
mpext->dsn64 = mp_opt.dsn64;
mpext->data_fin = mp_opt.data_fin;
}
mpext->data_len = mp_opt.data_len;
mpext->use_map = 1;
mpext->csum_reqd = !!(mp_opt.suboptions & OPTION_MPTCP_CSUMREQD);
if (mpext->csum_reqd)
mpext->csum = mp_opt.csum;
}
/* Make sure we do not exceed the maximum possible * scaled window.
*/ if (unlikely(th->syn))
new_win = min(new_win, 65535U) << tp->rx_opt.rcv_wscale; if (!tp->rx_opt.rcv_wscale &&
READ_ONCE(sock_net(ssk)->ipv4.sysctl_tcp_workaround_signed_windows))
new_win = min(new_win, MAX_TCP_WINDOW); else
new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
/* cfr RFC 8684 3.3.1.: * the data sequence number used in the pseudo-header is * always the 64-bit value, irrespective of what length is used in the * DSS option itself.
*/
header.data_seq = cpu_to_be64(data_seq);
header.subflow_seq = htonl(subflow_seq);
header.data_len = htons(data_len);
header.csum = 0;
/* Which options can be used together? * * X: mutually exclusive * O: often used together * C: can be used together in some cases * P: could be used together but we prefer not to (optimisations) * * Opt: | MPC | MPJ | DSS | ADD | RM | PRIO | FAIL | FC | * ------|------|------|------|------|------|------|------|------| * MPC |------|------|------|------|------|------|------|------| * MPJ | X |------|------|------|------|------|------|------| * DSS | X | X |------|------|------|------|------|------| * ADD | X | X | P |------|------|------|------|------| * RM | C | C | C | P |------|------|------|------| * PRIO | X | C | C | C | C |------|------|------| * FAIL | X | X | C | X | X | X |------|------| * FC | X | X | X | X | X | X | X |------| * RST | X | X | X | X | X | X | O | O | * ------|------|------|------|------|------|------|------|------| * * The same applies in mptcp_established_options() function.
*/ if (likely(OPTION_MPTCP_DSS & opts->suboptions)) { struct mptcp_ext *mpext = &opts->ext_copy;
u8 len = TCPOLEN_MPTCP_DSS_BASE;
u8 flags = 0;
if (mpext->use_ack) {
flags = MPTCP_DSS_HAS_ACK; if (mpext->ack64) {
len += TCPOLEN_MPTCP_DSS_ACK64;
flags |= MPTCP_DSS_ACK64;
} else {
len += TCPOLEN_MPTCP_DSS_ACK32;
}
}
if (mpext->use_map) {
len += TCPOLEN_MPTCP_DSS_MAP64;
/* Use only 64-bit mapping flags for now, add * support for optional 32-bit mappings later.
*/
flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64; if (mpext->data_fin)
flags |= MPTCP_DSS_DATA_FIN;
if (opts->csum_reqd)
len += TCPOLEN_MPTCP_DSS_CHECKSUM;
}
if (mpext->use_map) {
put_unaligned_be64(mpext->data_seq, ptr);
ptr += 2;
put_unaligned_be32(mpext->subflow_seq, ptr);
ptr += 1; if (opts->csum_reqd) { /* data_len == 0 is reserved for the infinite mapping, * the checksum will also be set to 0.
*/
put_len_csum(mpext->data_len,
(mpext->data_len ? mptcp_make_csum(mpext) : 0),
ptr);
} else {
put_unaligned_be32(mpext->data_len << 16 |
TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
}
ptr += 1;
}
/* We might need to add MP_FAIL options in rare cases */ if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions)) goto mp_fail;
} elseif (OPTIONS_MPTCP_MPC & opts->suboptions) {
u8 len, flag = MPTCP_CAP_HMAC_SHA256;
if (OPTION_MPTCP_MPC_SYN & opts->suboptions) {
len = TCPOLEN_MPTCP_MPC_SYN;
} elseif (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) {
len = TCPOLEN_MPTCP_MPC_SYNACK;
} elseif (opts->data_len) {
len = TCPOLEN_MPTCP_MPC_ACK_DATA; if (opts->csum_reqd)
len += TCPOLEN_MPTCP_DSS_CHECKSUM;
} else {
len = TCPOLEN_MPTCP_MPC_ACK;
}
if (opts->csum_reqd)
flag |= MPTCP_CAP_CHECKSUM_REQD;
if (!opts->allow_join_id0)
flag |= MPTCP_CAP_DENY_JOIN_ID0;
¤ Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.0.13Bemerkung:
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.