/* Organization of SPD aka "XFRM rules" ------------------------------------
Basic objects: - policy rule, struct xfrm_policy (=SPD entry) - bundle of transformations, struct dst_entry == struct xfrm_dst (=SA bundle) - instance of a transformer, struct xfrm_state (=SA) - template to clone xfrm_state, struct xfrm_tmpl
SPD is organized as hash table (for policies that meet minimum address prefix length setting, net->xfrm.policy_hthresh). Other policies are stored in lists, sorted into rbtree ordered by destination and source address networks. See net/xfrm/xfrm_policy.c for details.
(To be compatible with existing pfkeyv2 implementations, many rules with priority of 0x7fffffff are allowed to exist and such rules are ordered in an unpredictable way, thanks to bsd folks.)
If "action" is "block", then we prohibit the flow, otherwise: if "xfrms_nr" is zero, the flow passes untransformed. Otherwise, policy entry has list of up to XFRM_MAX_DEPTH transformations, described by templates xfrm_tmpl. Each template is resolved to a complete xfrm_state (see below) and we pack bundle of transformations to a dst_entry returned to requester.
Resolution of xrfm_tmpl ----------------------- Template contains: 1. ->mode Mode: transport or tunnel 2. ->id.proto Protocol: AH/ESP/IPCOMP 3. ->id.daddr Remote tunnel endpoint, ignored for transport mode. Q: allow to resolve security gateway? 4. ->id.spi If not zero, static SPI. 5. ->saddr Local tunnel endpoint, ignored for transport mode. 6. ->algos List of allowed algos. Plain bitmask now. Q: ealgos, aalgos, calgos. What a mess... 7. ->share Sharing mode. Q: how to implement private sharing mode? To add struct sock* to flow id?
Having this template we search through SAD searching for entries with appropriate mode/proto/algo, permitted by selector. If no appropriate entry found, it is requested from key manager.
PROBLEMS: Q: How to find all the bundles referring to a physical path for PMTU discovery? Seems, dst should contain list of all parents... and enter to infinite locking hierarchy disaster. No! It is easier, we will not search for them, let them find us. We add genid to each dst plus pointer to genid of raw IP route, pmtu disc will update pmtu on raw IP route and increase its genid. dst_check() will see this for top level and trigger resyncing metrics. Plus, it will be made via sk->sk_dst_cache. Solved.
*/
struct xfrm_dev_offload { /* The device for this offload. * Device drivers should not use this directly, as that will prevent * them from working with bonding device. Instead, the device passed * to the add/delete callbacks should be used.
*/ struct net_device *dev;
netdevice_tracker dev_tracker; /* This is a private pointer used by the bonding driver (and eventually * should be moved there). Device drivers should not use it. * Protected by xfrm_state.lock AND bond.ipsec_lock in most cases, * except in the .xdo_dev_state_del() flow, where only xfrm_state.lock * is held.
*/ struct net_device *real_dev; unsignedlong offload_handle;
u8 dir : 2;
u8 type : 2;
u8 flags : 2;
};
/* used to fix curlft->add_time when changing date */ long saved_tmo;
/* Last used time */
time64_t lastused;
struct page_frag xfrag;
/* Reference to data common to all the instances of this
* transformer. */ conststruct xfrm_type *type; struct xfrm_mode inner_mode; struct xfrm_mode inner_mode_iaf; struct xfrm_mode outer_mode;
/** * struct xfrm_mode_cbs - XFRM mode callbacks * @owner: module owner or NULL * @init_state: Add/init mode specific state in `xfrm_state *x` * @clone_state: Copy mode specific values from `orig` to new state `x` * @destroy_state: Cleanup mode specific state from `xfrm_state *x` * @user_init: Process mode specific netlink attributes from user * @copy_to_user: Add netlink attributes to `attrs` based on state in `x` * @sa_len: Return space required to store mode specific netlink attributes * @get_inner_mtu: Return avail payload space after removing encap overhead * @input: Process received packet from SA using mode * @output: Output given packet using mode * @prepare_output: Add mode specific encapsulation to packet in skb. On return * `transport_header` should point at ESP header, `network_header` should * point at outer IP header and `mac_header` should opint at the * protocol/nexthdr field of the outer IP. * * One should examine and understand the specific uses of these callbacks in * xfrm for further detail on how and when these functions are called. RTSL.
*/ struct xfrm_mode_cbs { struct module *owner; int (*init_state)(struct xfrm_state *x); int (*clone_state)(struct xfrm_state *x, struct xfrm_state *orig); void (*destroy_state)(struct xfrm_state *x); int (*user_init)(struct net *net, struct xfrm_state *x, struct nlattr **attrs, struct netlink_ext_ack *extack); int (*copy_to_user)(struct xfrm_state *x, struct sk_buff *skb); unsignedint (*sa_len)(conststruct xfrm_state *x);
u32 (*get_inner_mtu)(struct xfrm_state *x, int outer_mtu); int (*input)(struct xfrm_state *x, struct sk_buff *skb); int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*prepare_output)(struct xfrm_state *x, struct sk_buff *skb);
};
int xfrm_register_mode_cbs(u8 mode, conststruct xfrm_mode_cbs *mode_cbs); void xfrm_unregister_mode_cbs(u8 mode);
staticinlineint xfrm_af2proto(unsignedint family)
{ switch(family) { case AF_INET: return IPPROTO_IPIP; case AF_INET6: return IPPROTO_IPV6; default: return 0;
}
}
struct xfrm_tmpl { /* id in template is interpreted as: * daddr - destination of tunnel, may be zero for transport mode. * spi - zero to acquire spi. Not zero if spi is static, then * daddr must be fixed too. * proto - AH/ESP/IPCOMP
*/ struct xfrm_id id;
/* Source address of tunnel. Ignored, if it is not a tunnel. */
xfrm_address_t saddr;
unsignedshort encap_family;
u32 reqid;
/* Mode: transport, tunnel etc. */
u8 mode;
/* Sharing mode: unique, this session only, this user only etc. */
u8 share;
/* May skip this transfomration if no SA is found */
u8 optional;
/** * struct xfrm_policy - xfrm policy * @xp_net: network namespace the policy lives in * @bydst: hlist node for SPD hash table or rbtree list * @byidx: hlist node for index hash table * @state_cache_list: hlist head for policy cached xfrm states * @lock: serialize changes to policy structure members * @refcnt: reference count, freed once it reaches 0 * @pos: kernel internal tie-breaker to determine age of policy * @timer: timer * @genid: generation, used to invalidate old policies * @priority: priority, set by userspace * @index: policy index (autogenerated) * @if_id: virtual xfrm interface id * @mark: packet mark * @selector: selector * @lft: liftime configuration data * @curlft: liftime state * @walk: list head on pernet policy list * @polq: queue to hold packets while aqcuire operaion in progress * @bydst_reinsert: policy tree node needs to be merged * @type: XFRM_POLICY_TYPE_MAIN or _SUB * @action: XFRM_POLICY_ALLOW or _BLOCK * @flags: XFRM_POLICY_LOCALOK, XFRM_POLICY_ICMP * @xfrm_nr: number of used templates in @xfrm_vec * @family: protocol family * @security: SELinux security label * @xfrm_vec: array of templates to resolve state * @rcu: rcu head, used to defer memory release * @xdo: hardware offload state
*/ struct xfrm_policy {
possible_net_t xp_net; struct hlist_node bydst; struct hlist_node byidx;
struct hlist_head state_cache_list;
/* This lock only affects elements except for entry. */
rwlock_t lock;
refcount_t refcnt;
u32 pos; struct timer_list timer;
/* * This structure is used for the duration where packets are being * transformed by IPsec. As soon as the packet leaves IPsec the * area beyond the generic IP part may be overwritten.
*/ struct xfrm_skb_cb { struct xfrm_tunnel_skb_cb header;
/* Sequence number for replay protection. */ union { struct {
__u32 low;
__u32 hi;
} output; struct {
__be32 low;
__be32 hi;
} input;
} seq;
};
/* * This structure is used by the afinfo prepare_input/prepare_output functions * to transmit header information to the mode input/output functions.
*/ struct xfrm_mode_skb_cb { struct xfrm_tunnel_skb_cb header;
/* Copied from header for IPv4, always set to zero and DF for IPv6. */
__be16 id;
__be16 frag_off;
/* IP header length (excluding options or extension headers). */
u8 ihl;
/* TOS for IPv4, class for IPv6. */
u8 tos;
/* TTL for IPv4, hop limitfor IPv6. */
u8 ttl;
/* Protocol for IPv4, NH for IPv6. */
u8 protocol;
/* Option length for IPv4, zero for IPv6. */
u8 optlen;
/* Used by IPv6 only, zero for IPv4. */
u8 flow_lbl[3];
};
/* * This structure is used by the input processing to locate the SPI and * related information.
*/ struct xfrm_spi_skb_cb { struct xfrm_tunnel_skb_cb header;
static __inline__
__be16 xfrm_flowi_sport(conststruct flowi *fl, constunion flowi_uli *uli)
{
__be16 port; switch(fl->flowi_proto) { case IPPROTO_TCP: case IPPROTO_UDP: case IPPROTO_UDPLITE: case IPPROTO_SCTP:
port = uli->ports.sport; break; case IPPROTO_ICMP: case IPPROTO_ICMPV6:
port = htons(uli->icmpt.type); break; case IPPROTO_MH:
port = htons(uli->mht.type); break; case IPPROTO_GRE:
port = htons(ntohl(uli->gre_key) >> 16); break; default:
port = 0; /*XXX*/
} return port;
}
static __inline__
__be16 xfrm_flowi_dport(conststruct flowi *fl, constunion flowi_uli *uli)
{
__be16 port; switch(fl->flowi_proto) { case IPPROTO_TCP: case IPPROTO_UDP: case IPPROTO_UDPLITE: case IPPROTO_SCTP:
port = uli->ports.dport; break; case IPPROTO_ICMP: case IPPROTO_ICMPV6:
port = htons(uli->icmpt.code); break; case IPPROTO_GRE:
port = htons(ntohl(uli->gre_key) & 0xffff); break; default:
port = 0; /*XXX*/
} return port;
}
#ifdef CONFIG_SECURITY_NETWORK_XFRM /* If neither has a context --> match * Otherwise, both must have a context and the sids, doi, alg must match
*/ staticinlinebool xfrm_sec_ctx_match(struct xfrm_sec_ctx *s1, struct xfrm_sec_ctx *s2)
{ return ((!s1 && !s2) ||
(s1 && s2 &&
(s1->ctx_sid == s2->ctx_sid) &&
(s1->ctx_doi == s2->ctx_doi) &&
(s1->ctx_alg == s2->ctx_alg)));
} #else staticinlinebool xfrm_sec_ctx_match(struct xfrm_sec_ctx *s1, struct xfrm_sec_ctx *s2)
{ returntrue;
} #endif
/* A struct encoding bundle of transformations to apply to some set of flow. * * xdst->child points to the next element of bundle. * dst->xfrm points to an instanse of transformer. * * Due to unfortunate limitations of current routing cache, which we * have no time to fix, it mirrors struct rtable and bound to the same * routing key, including saddr,daddr. However, we can have many of * bundles differing by session id. All the bundles grow from a parent * policy rule.
*/ struct xfrm_dst { union { struct dst_entry dst; struct rtable rt; struct rt6_info rt6;
} u; struct dst_entry *route; struct dst_entry *child; struct dst_entry *path; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; int num_pols, num_xfrms;
u32 xfrm_genid;
u32 policy_genid;
u32 route_mtu_cached;
u32 child_mtu_cached;
u32 route_cookie;
u32 path_cookie;
};
#ifdef CONFIG_XFRM int __xfrm_policy_check(struct sock *, int dir, struct sk_buff *skb, unsignedshort family);
staticinlinebool __xfrm_check_nopolicy(struct net *net, struct sk_buff *skb, int dir)
{ if (!net->xfrm.policy_count[dir] && !secpath_exists(skb)) return net->xfrm.policy_default[dir] == XFRM_USERPOLICY_ACCEPT;
returnfalse;
}
staticinlinebool __xfrm_check_dev_nopolicy(struct sk_buff *skb, int dir, unsignedshort family)
{ if (dir != XFRM_POLICY_OUT && family == AF_INET) { /* same dst may be used for traffic originating from * devices with different policy settings.
*/ return IPCB(skb)->flags & IPSKB_NOPOLICY;
} return skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY);
}
staticinlineint __xfrm_policy_check2(struct sock *sk, int dir, struct sk_buff *skb, unsignedint family, int reverse)
{ struct net *net = dev_net(skb->dev); int ndir = dir | (reverse ? XFRM_POLICY_MASK + 1 : 0); struct xfrm_offload *xo = xfrm_offload(skb); struct xfrm_state *x;
if (sk && sk->sk_policy[XFRM_POLICY_IN]) return __xfrm_policy_check(sk, ndir, skb, family);
if (xo) {
x = xfrm_input_state(skb); if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) { bool check = (xo->flags & CRYPTO_DONE) &&
(xo->status & CRYPTO_SUCCESS);
/* The packets here are plain ones and secpath was * needed to indicate that hardware already handled * them and there is no need to do nothing in addition. * * Consume secpath which was set by drivers.
*/
secpath_reset(skb); return check;
}
}
staticinlinebool xfrm_id_proto_valid(u8 proto)
{ switch (proto) { case IPPROTO_AH: case IPPROTO_ESP: case IPPROTO_COMP: #if IS_ENABLED(CONFIG_IPV6) case IPPROTO_ROUTING: case IPPROTO_DSTOPTS: #endif returntrue; default: returnfalse;
}
}
/* IPSEC_PROTO_ANY only matches 3 IPsec protocols, 0 could match all. */ staticinlineint xfrm_id_proto_match(u8 proto, u8 userproto)
{ return (!userproto || proto == userproto ||
(userproto == IPSEC_PROTO_ANY && (proto == IPPROTO_AH ||
proto == IPPROTO_ESP ||
proto == IPPROTO_COMP)));
}
/* XFRM protocol handlers. */ struct xfrm4_protocol { int (*handler)(struct sk_buff *skb); int (*input_handler)(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); int (*cb_handler)(struct sk_buff *skb, int err); int (*err_handler)(struct sk_buff *skb, u32 info);
struct xfrm4_protocol __rcu *next; int priority;
};
struct xfrm6_protocol { int (*handler)(struct sk_buff *skb); int (*input_handler)(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); int (*cb_handler)(struct sk_buff *skb, int err); int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info);
struct xfrm6_protocol __rcu *next; int priority;
};
/* XFRM tunnel handlers. */ struct xfrm_tunnel { int (*handler)(struct sk_buff *skb); int (*cb_handler)(struct sk_buff *skb, int err); int (*err_handler)(struct sk_buff *skb, u32 info);
struct xfrm_tunnel __rcu *next; int priority;
};
struct xfrm6_tunnel { int (*handler)(struct sk_buff *skb); int (*cb_handler)(struct sk_buff *skb, int err); int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info); struct xfrm6_tunnel __rcu *next; int priority;
};
void xfrm_init(void); void xfrm4_init(void); int xfrm_state_init(struct net *net); void xfrm_state_fini(struct net *net); void xfrm4_state_init(void); void xfrm4_protocol_init(void); #ifdef CONFIG_XFRM int xfrm6_init(void); void xfrm6_fini(void); int xfrm6_state_init(void); void xfrm6_state_fini(void); int xfrm6_protocol_init(void); void xfrm6_protocol_fini(void); #else staticinlineint xfrm6_init(void)
{ return 0;
} staticinlinevoid xfrm6_fini(void)
{
;
} #endif
#ifdef CONFIG_XFRM_STATISTICS int xfrm_proc_init(struct net *net); void xfrm_proc_fini(struct net *net); #endif
int xfrm_sysctl_init(struct net *net); #ifdef CONFIG_SYSCTL void xfrm_sysctl_fini(struct net *net); #else staticinlinevoid xfrm_sysctl_fini(struct net *net)
{
} #endif
void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto, struct xfrm_address_filter *filter); int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk, int (*func)(struct xfrm_state *, int, void*), void *); void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net); struct xfrm_state *xfrm_state_alloc(struct net *net); void xfrm_state_free(struct xfrm_state *x); struct xfrm_state *xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, conststruct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm_policy *pol, int *err, unsignedshort family, u32 if_id); struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id,
xfrm_address_t *daddr,
xfrm_address_t *saddr, unsignedshort family,
u8 mode, u8 proto, u32 reqid); struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi, unsignedshort family); int xfrm_state_check_expire(struct xfrm_state *x); void xfrm_state_update_stats(struct net *net); #ifdef CONFIG_XFRM_OFFLOAD staticinlinevoid xfrm_dev_state_update_stats(struct xfrm_state *x)
{ struct xfrm_dev_offload *xdo = &x->xso; struct net_device *dev = READ_ONCE(xdo->dev);
if (dev && dev->xfrmdev_ops &&
dev->xfrmdev_ops->xdo_dev_state_update_stats)
dev->xfrmdev_ops->xdo_dev_state_update_stats(x);
} #else staticinlinevoid xfrm_dev_state_update_stats(struct xfrm_state *x) {} #endif void xfrm_state_insert(struct xfrm_state *x); int xfrm_state_add(struct xfrm_state *x); int xfrm_state_update(struct xfrm_state *x); struct xfrm_state *xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi,
u8 proto, unsignedshort family); struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr,
__be32 spi, u8 proto, unsignedshort family); struct xfrm_state *xfrm_state_lookup_byaddr(struct net *net, u32 mark, const xfrm_address_t *daddr, const xfrm_address_t *saddr,
u8 proto, unsignedshort family); #ifdef CONFIG_XFRM_SUB_POLICY void xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n, unsignedshort family); void xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, unsignedshort family); #else staticinlinevoid xfrm_tmpl_sort(struct xfrm_tmpl **d, struct xfrm_tmpl **s, int n, unsignedshort family)
{
}
staticinlinevoid xfrm_state_sort(struct xfrm_state **d, struct xfrm_state **s, int n, unsignedshort family)
{
} #endif
struct xfrmk_sadinfo {
u32 sadhcnt; /* current hash bkts */
u32 sadhmcnt; /* max allowed hash bkts */
u32 sadcnt; /* current running count */
};
struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num); int xfrm_state_delete(struct xfrm_state *x); int xfrm_state_flush(struct net *net, u8 proto, bool task_valid); int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid); int xfrm_dev_policy_flush(struct net *net, struct net_device *dev, bool task_valid); void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si); void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si);
u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq); int xfrm_init_replay(struct xfrm_state *x, struct netlink_ext_ack *extack);
u32 xfrm_state_mtu(struct xfrm_state *x, int mtu); int __xfrm_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack); int xfrm_init_state(struct xfrm_state *x); int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); int xfrm_input_resume(struct sk_buff *skb, int nexthdr); int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb, int (*finish)(struct net *, struct sock *, struct sk_buff *)); int xfrm_trans_queue(struct sk_buff *skb, int (*finish)(struct net *, struct sock *, struct sk_buff *)); int xfrm_output_resume(struct sock *sk, struct sk_buff *skb, int err); int xfrm_output(struct sock *sk, struct sk_buff *skb); int xfrm4_tunnel_check_size(struct sk_buff *skb); #if IS_ENABLED(CONFIG_IPV6) int xfrm6_tunnel_check_size(struct sk_buff *skb); #else staticinlineint xfrm6_tunnel_check_size(struct sk_buff *skb)
{ return -EMSGSIZE;
} #endif
#if IS_ENABLED(CONFIG_NET_PKTGEN) int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb); #endif
void xfrm_local_error(struct sk_buff *skb, int mtu); int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); int xfrm4_transport_finish(struct sk_buff *skb, int async); int xfrm4_rcv(struct sk_buff *skb);
int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport); void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid); int km_report(struct net *net, u8 proto, struct xfrm_selector *sel,
xfrm_address_t *addr);
struct xfrm_translator { /* Allocate frag_list and put compat translation there */ int (*alloc_compat)(struct sk_buff *skb, conststruct nlmsghdr *src);
/* Allocate nlmsg with 64-bit translaton of received 32-bit message */ struct nlmsghdr *(*rcv_msg_compat)(conststruct nlmsghdr *nlh, int maxtype, conststruct nla_policy *policy, struct netlink_ext_ack *extack);
/* Translate 32-bit user_policy from sockptr */ int (*xlate_user_policy_sockptr)(u8 **pdata32, int optlen);
int xfrm_nat_keepalive_init(unsignedshort family); void xfrm_nat_keepalive_fini(unsignedshort family); int xfrm_nat_keepalive_net_init(struct net *net); int xfrm_nat_keepalive_net_fini(struct net *net); void xfrm_nat_keepalive_state_updated(struct xfrm_state *x);
#endif/* _NET_XFRM_H */
Messung V0.5
¤ Dauer der Verarbeitung: 0.21 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.