/* Copyright (c) 2018, Mellanox Technologies All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE.
*/
/* Safe, because this is the destroy flow, refcount is 0, so * tls_device_down can't store this field in parallel.
*/
netdev = rcu_dereference_protected(ctx->netdev,
!refcount_read(&ctx->refcount));
spin_lock_irqsave(&tls_device_lock, flags); if (unlikely(!refcount_dec_and_test(&ctx->refcount))) {
spin_unlock_irqrestore(&tls_device_lock, flags); return;
}
list_del(&ctx->list); /* Remove from tls_device_list / tls_device_down_list */
/* Safe, because this is the destroy flow, refcount is 0, so * tls_device_down can't store this field in parallel.
*/
netdev = rcu_dereference_protected(ctx->netdev,
!refcount_read(&ctx->refcount));
/* queue_work inside the spinlock * to make sure tls_device_down waits for that work.
*/
queue_work(destruct_wq, &offload_ctx->destruct_work);
}
spin_unlock_irqrestore(&tls_device_lock, flags);
if (!async_cleanup)
tls_device_free_ctx(ctx);
}
/* We assume that the socket is already connected */ staticstruct net_device *get_netdev_for_sock(struct sock *sk)
{ struct net_device *dev, *lowest_dev = NULL; struct dst_entry *dst;
/* At this point, there should be no references on this * socket and no in-flight SKBs associated with this * socket, so it is safe to free all the resources.
*/ void tls_device_sk_destruct(struct sock *sk)
{ struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
tls_ctx->sk_destruct(sk);
if (tls_ctx->tx_conf == TLS_HW) { if (ctx->open_record)
destroy_record(ctx->open_record);
delete_all_records(ctx);
crypto_free_aead(ctx->aead_send);
clean_acked_data_disable(tcp_sk(sk));
}
/* append tag * device will fill in the tag, we just need to append a placeholder * use socket memory to improve coalescing (re-using a single buffer * increases frag count) * if we can't allocate memory now use the dummy page
*/ if (unlikely(pfrag->size - pfrag->offset < prot->tag_size) &&
!skb_page_frag_refill(prot->tag_size, pfrag, sk->sk_allocation)) {
dummy_tag_frag.page = dummy_page;
dummy_tag_frag.offset = 0;
pfrag = &dummy_tag_frag;
}
tls_append_frag(record, pfrag, prot->tag_size);
/* TLS_HEADER_SIZE is not counted as part of the TLS record, and * we need to leave room for an authentication tag.
*/
max_open_record_len = TLS_MAX_PAYLOAD_SIZE +
prot->prepend_size; do {
rc = tls_do_allocation(sk, ctx, pfrag, prot->prepend_size); if (unlikely(rc)) {
rc = sk_stream_wait_memory(sk, &timeo); if (!rc) continue;
record = ctx->open_record; if (!record) break;
handle_error: if (record_type != TLS_RECORD_TYPE_DATA) { /* avoid sending partial * record with type != * application_data
*/
size = orig_size;
destroy_record(record);
ctx->open_record = NULL;
} elseif (record->len > prot->prepend_size) { goto last_record;
}
info = context->retransmit_hint; if (!info ||
before(seq, info->end_seq - info->len)) { /* if retransmit_hint is irrelevant start * from the beginning of the list
*/
info = list_first_entry_or_null(&context->records_list, struct tls_record_info, list); if (!info) return NULL; /* send the start_marker record if seq number is before the * tls offload start marker sequence number. This record is * required to handle TCP packets which are before TLS offload * started. * And if it's not start marker, look if this seq number * belongs to the list.
*/ if (likely(!tls_record_is_start_marker(info))) { /* we have the first record, get the last record to see * if this seq number belongs to the list.
*/
last = list_last_entry(&context->records_list, struct tls_record_info, list);
/* We just need the _rcu for the READ_ONCE() */
rcu_read_lock();
list_for_each_entry_from_rcu(info, &context->records_list, list) { if (before(seq, info->end_seq)) { if (!context->retransmit_hint ||
after(info->end_seq,
context->retransmit_hint->end_seq)) {
context->hint_record_sn = record_sn;
context->retransmit_hint = info;
}
*p_record_sn = record_sn; goto exit_rcu_unlock;
}
record_sn++;
}
info = NULL;
if (is_async) { /* shouldn't get to wraparound: * too long in async stage, something bad happened
*/ if (WARN_ON_ONCE(resync_async->rcd_delta == USHRT_MAX)) returnfalse;
/* asynchronous stage: log all headers seq such that * req_seq <= seq <= end_seq, and wait for real resync request
*/ if (before(*seq, req_seq)) returnfalse; if (!after(*seq, req_end) &&
resync_async->loglen < TLS_DEVICE_RESYNC_ASYNC_LOGMAX)
resync_async->log[resync_async->loglen++] = *seq;
resync_async->rcd_delta++;
returnfalse;
}
/* synchronous stage: check against the logged entries and * proceed to check the next entries if no match was found
*/ for (i = 0; i < resync_async->loglen; i++) if (req_seq == resync_async->log[i] &&
atomic64_try_cmpxchg(&resync_async->req, &resync_req, 0)) {
*rcd_delta = resync_async->rcd_delta - i;
*seq = req_seq;
resync_async->loglen = 0;
resync_async->rcd_delta = 0; returntrue;
}
if (likely(!is_req_pending) || req_seq != seq ||
!atomic64_try_cmpxchg(&rx_ctx->resync_req, &resync_req, 0)) return; break; case TLS_OFFLOAD_SYNC_TYPE_CORE_NEXT_HINT: if (likely(!rx_ctx->resync_nh_do_now)) return;
/* head of next rec is already in, note that the sock_inq will * include the currently parsed message when called from parser
*/
sock_data = tcp_inq(sk); if (sock_data > rcd_len) {
trace_tls_device_rx_resync_nh_delay(sk, sock_data,
rcd_len); return;
}
rx_ctx->resync_nh_do_now = 0;
seq += rcd_len;
tls_bigint_increment(rcd_sn, prot->rec_seq_size); break; case TLS_OFFLOAD_SYNC_TYPE_DRIVER_REQ_ASYNC:
resync_req = atomic64_read(&rx_ctx->resync_async->req);
is_req_pending = resync_req; if (likely(!is_req_pending)) return;
if (!tls_device_rx_resync_async(rx_ctx->resync_async,
resync_req, &seq, &rcd_delta)) return;
tls_bigint_subtract(rcd_sn, rcd_delta); break;
}
/* device will request resyncs by itself based on stream scan */ if (ctx->resync_type != TLS_OFFLOAD_SYNC_TYPE_CORE_NEXT_HINT) return; /* already scheduled */ if (ctx->resync_nh_do_now) return; /* seen decrypted fragments since last fully-failed record */ if (ctx->resync_nh_reset) {
ctx->resync_nh_reset = 0;
ctx->resync_nh.decrypted_failed = 1;
ctx->resync_nh.decrypted_tgt = TLS_DEVICE_RESYNC_NH_START_IVAL; return;
}
if (++ctx->resync_nh.decrypted_failed <= ctx->resync_nh.decrypted_tgt) return;
/* doing resync, bump the next target in case it fails */ if (ctx->resync_nh.decrypted_tgt < TLS_DEVICE_RESYNC_NH_MAX_IVAL)
ctx->resync_nh.decrypted_tgt *= 2; else
ctx->resync_nh.decrypted_tgt += TLS_DEVICE_RESYNC_NH_MAX_IVAL;
rxm = strp_msg(skb);
/* head of next rec is already in, parser will sync for us */ if (tcp_inq(sk) > rxm->full_len) {
trace_tls_device_rx_resync_nh_schedule(sk);
ctx->resync_nh_do_now = 1;
} else { struct tls_prot_info *prot = &tls_ctx->prot_info;
u8 rcd_sn[TLS_MAX_REC_SEQ_SIZE];
if (skb->decrypted) {
err = skb_store_bits(skb, offset, buf, copy); if (err) goto free_buf;
}
offset += copy;
buf += copy;
}
pos = skb_pagelen(skb);
skb_walk_frags(skb, skb_iter) { int frag_pos;
/* Practically all frags must belong to msg if reencrypt * is needed with current strparser and coalescing logic, * but strparser may "get optimized", so let's be safe.
*/ if (pos + skb_iter->len <= offset) goto done_with_frag; if (pos >= data_len + rxm->offset) break;
if (unlikely(test_bit(TLS_RX_DEV_DEGRADED, &tls_ctx->flags))) { if (likely(is_encrypted || is_decrypted)) return is_decrypted;
/* After tls_device_down disables the offload, the next SKB will * likely have initial fragments decrypted, and final ones not * decrypted. We need to reencrypt that single SKB.
*/ return tls_device_reencrypt(sk, tls_ctx);
}
/* Return immediately if the record is either entirely plaintext or * entirely ciphertext. Otherwise handle reencrypt partially decrypted * record.
*/ if (is_decrypted) {
ctx->resync_nh_reset = 1; return is_decrypted;
} if (is_encrypted) {
tls_device_core_ctrl_rx_resync(tls_ctx, ctx, sk, skb); return 0;
}
/* start at rec_seq - 1 to account for the start marker record */
memcpy(&rcd_sn, ctx->tx.rec_seq, sizeof(rcd_sn));
offload_ctx->unacked_record_sn = be64_to_cpu(rcd_sn) - 1;
/* TLS offload is greatly simplified if we don't send * SKBs where only part of the payload needs to be encrypted. * So mark the last skb in the write queue as end of record.
*/
tcp_write_collapse_fence(sk);
/* Avoid offloading if the device is down * We don't want to offload new flows after * the NETDEV_DOWN event * * device_offload_lock is taken in tls_devices's NETDEV_DOWN * handler thus protecting from the device going down before * ctx was added to tls_device_list.
*/
down_read(&device_offload_lock); if (!(netdev->flags & IFF_UP)) {
rc = -EINVAL; goto release_lock;
}
/* following this assignment tls_is_skb_tx_device_offloaded * will return true and the context might be accessed * by the netdev's xmit function.
*/
smp_store_release(&sk->sk_validate_xmit_skb, tls_validate_xmit_skb);
dev_put(netdev);
/* Avoid offloading if the device is down * We don't want to offload new flows after * the NETDEV_DOWN event * * device_offload_lock is taken in tls_devices's NETDEV_DOWN * handler thus protecting from the device going down before * ctx was added to tls_device_list.
*/
down_read(&device_offload_lock); if (!(netdev->flags & IFF_UP)) {
rc = -EINVAL; goto release_lock;
}
/* Sync with inflight packets. After this point: * TX: no non-encrypted packets will be passed to the driver. * RX: resync requests from the driver will be ignored.
*/
synchronize_net();
/* Release the offload context on the driver side. */ if (ctx->tx_conf == TLS_HW)
netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
TLS_OFFLOAD_CTX_DIR_TX); if (ctx->rx_conf == TLS_HW &&
!test_bit(TLS_RX_DEV_CLOSED, &ctx->flags))
netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
TLS_OFFLOAD_CTX_DIR_RX);
dev_put(netdev);
/* Move the context to a separate list for two reasons: * 1. When the context is deallocated, list_del is called. * 2. It's no longer an offloaded context, so we don't want to * run offload-specific code on this context.
*/
spin_lock_irqsave(&tls_device_lock, flags);
list_move_tail(&ctx->list, &tls_device_down_list);
spin_unlock_irqrestore(&tls_device_lock, flags);
/* Device contexts for RX and TX will be freed in on sk_destruct * by tls_device_free_ctx. rx_conf and tx_conf stay in TLS_HW. * Now release the ref taken above.
*/ if (refcount_dec_and_test(&ctx->refcount)) { /* sk_destruct ran after tls_device_down took a ref, and * it returned early. Complete the destruction here.
*/
list_del(&ctx->list);
tls_device_free_ctx(ctx);
}
}
if (!dev->tlsdev_ops &&
!(dev->features & (NETIF_F_HW_TLS_RX | NETIF_F_HW_TLS_TX))) return NOTIFY_DONE;
switch (event) { case NETDEV_REGISTER: case NETDEV_FEAT_CHANGE: if (netif_is_bond_master(dev)) return NOTIFY_DONE; if ((dev->features & NETIF_F_HW_TLS_RX) &&
!dev->tlsdev_ops->tls_dev_resync) return NOTIFY_BAD;
if (dev->tlsdev_ops &&
dev->tlsdev_ops->tls_dev_add &&
dev->tlsdev_ops->tls_dev_del) return NOTIFY_DONE; else return NOTIFY_BAD; case NETDEV_DOWN: return tls_device_down(dev);
} return NOTIFY_DONE;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.