#include <net/ipv6.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_skb_utils.h"
#include "qosify-bpf.h"
#define INET_ECN_MASK 3
struct flow_bucket {
__u32 last_update;
__u32 pkt_len_avg;
- __u16 pkt_count;
- struct qosify_dscp_val val;
- __u8 bulk_timeout;
-} __packed;
+ __u32 pkt_count;
+ __u32 bulk_timeout;
+};
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 1);
} config SEC(".maps");
-typedef struct {
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(pinning, 1);
+ __type(key, __u32);
+ __type(value, __u8);
+ __uint(max_entries, 1 << 16);
+} tcp_ports SEC(".maps");
+
+struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(pinning, 1);
__type(key, __u32);
- __type(value, struct qosify_dscp_val);
+ __type(value, __u8);
__uint(max_entries, 1 << 16);
-} port_array_t;
+} udp_ports SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_LRU_HASH);
__uint(pinning, 1);
__type(key, __u32);
- __uint(value_size, sizeof(struct flow_bucket));
+ __type(value, struct flow_bucket);
__uint(max_entries, QOSIFY_FLOW_BUCKETS);
} flow_map SEC(".maps");
-port_array_t tcp_ports SEC(".maps");
-port_array_t udp_ports SEC(".maps");
-
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(pinning, 1);
__uint(map_flags, BPF_F_NO_PREALLOC);
} ipv6_map SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(pinning, 1);
+ __type(key, __u32);
+ __type(value, struct qosify_class);
+ __uint(max_entries, QOSIFY_MAX_CLASS_ENTRIES +
+ QOSIFY_DEFAULT_CLASS_ENTRIES);
+} class_map SEC(".maps");
+
static struct qosify_config *get_config(void)
{
__u32 key = 0;
return bpf_map_lookup_elem(&config, &key);
}
-static __always_inline int proto_is_vlan(__u16 h_proto)
-{
- return !!(h_proto == bpf_htons(ETH_P_8021Q) ||
- h_proto == bpf_htons(ETH_P_8021AD));
-}
-
-static __always_inline int proto_is_ip(__u16 h_proto)
-{
- return !!(h_proto == bpf_htons(ETH_P_IP) ||
- h_proto == bpf_htons(ETH_P_IPV6));
-}
-
-static __always_inline void *skb_ptr(struct __sk_buff *skb, __u32 offset)
-{
- void *start = (void *)(unsigned long long)skb->data;
-
- return start + offset;
-}
-
-static __always_inline void *skb_end_ptr(struct __sk_buff *skb)
-{
- return (void *)(unsigned long long)skb->data_end;
-}
-
-static __always_inline int skb_check(struct __sk_buff *skb, void *ptr)
-{
- if (ptr > skb_end_ptr(skb))
- return -1;
-
- return 0;
-}
-
static __always_inline __u32 cur_time(void)
{
__u32 val = bpf_ktime_get_ns() >> 24;
}
static __always_inline void
-ipv4_change_dsfield(struct iphdr *iph, __u8 mask, __u8 value, bool force)
+ipv4_change_dsfield(struct __sk_buff *skb, __u32 offset,
+ __u8 mask, __u8 value, bool force)
{
- __u32 check = bpf_ntohs(iph->check);
+ struct iphdr *iph;
+ __u32 check;
__u8 dsfield;
+ iph = skb_ptr(skb, offset, sizeof(*iph));
+ if (!iph)
+ return;
+
+ check = bpf_ntohs(iph->check);
if ((iph->tos & mask) && !force)
return;
}
static __always_inline void
-ipv6_change_dsfield(struct ipv6hdr *ipv6h, __u8 mask, __u8 value, bool force)
+ipv6_change_dsfield(struct __sk_buff *skb, __u32 offset,
+ __u8 mask, __u8 value, bool force)
{
- __u16 *p = (__u16 *)ipv6h;
+ struct ipv6hdr *ipv6h;
+ __u16 *p;
__u16 val;
+ ipv6h = skb_ptr(skb, offset, sizeof(*ipv6h));
+ if (!ipv6h)
+ return;
+
+ p = (__u16 *)ipv6h;
if (((*p >> 4) & mask) && !force)
return;
*p = val;
}
-static __always_inline int
-parse_ethernet(struct __sk_buff *skb, __u32 *offset)
-{
- struct ethhdr *eth;
- __u16 h_proto;
- int i;
-
- eth = skb_ptr(skb, *offset);
- if (skb_check(skb, eth + 1))
- return -1;
-
- h_proto = eth->h_proto;
- *offset += sizeof(*eth);
-
-#pragma unroll
- for (i = 0; i < 2; i++) {
- struct vlan_hdr *vlh = skb_ptr(skb, *offset);
-
- if (!proto_is_vlan(h_proto))
- break;
-
- if (skb_check(skb, vlh + 1))
- return -1;
-
- h_proto = vlh->h_vlan_encapsulated_proto;
- *offset += sizeof(*vlh);
- }
-
- return h_proto;
-}
-
static void
-parse_l4proto(struct qosify_config *config, struct __sk_buff *skb,
- __u32 offset, __u8 proto, bool ingress,
- struct qosify_dscp_val *out_val)
+parse_l4proto(struct qosify_config *config, struct skb_parser_info *info,
+ bool ingress, __u8 *out_val)
{
- struct qosify_dscp_val *value;
struct udphdr *udp;
__u32 src, dest, key;
+ __u8 *value;
+ __u8 proto = info->proto;
- udp = skb_ptr(skb, offset);
- if (skb_check(skb, &udp->len))
+ udp = skb_info_ptr(info, sizeof(*udp));
+ if (!udp)
return;
if (config && (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6)) {
return;
}
+ src = READ_ONCE(udp->source);
+ dest = READ_ONCE(udp->dest);
if (ingress)
- key = udp->source;
+ key = src;
else
- key = udp->dest;
+ key = dest;
if (proto == IPPROTO_TCP) {
value = bpf_map_lookup_elem(&tcp_ports, &key);
*out_val = *value;
}
-static __always_inline void
-check_flow_bulk(struct qosify_config *config, struct __sk_buff *skb,
- struct flow_bucket *flow, struct qosify_dscp_val *out_val)
+static __always_inline bool
+check_flow_bulk(struct qosify_flow_config *config, struct __sk_buff *skb,
+ struct flow_bucket *flow, __u8 *out_val)
{
bool trigger = false;
__s32 delta;
__u32 time;
+ int segs = 1;
+ bool ret = false;
if (!config->bulk_trigger_pps)
- return;
+ return false;
+ time = cur_time();
if (!flow->last_update)
goto reset;
- time = cur_time();
delta = time - flow->last_update;
if ((u32)delta > FLOW_TIMEOUT)
goto reset;
- if (flow->pkt_count < 0xffff)
- flow->pkt_count++;
-
+ if (skb->gso_segs)
+ segs = skb->gso_segs;
+ flow->pkt_count += segs;
if (flow->pkt_count > config->bulk_trigger_pps) {
- flow->val = config->dscp_bulk;
- flow->val.flags = QOSIFY_VAL_FLAG_BULK_CHECK;
flow->bulk_timeout = config->bulk_trigger_timeout + 1;
trigger = true;
}
if (delta >= FLOW_CHECK_INTERVAL) {
- if (flow->bulk_timeout && !trigger) {
+ if (flow->bulk_timeout && !trigger)
flow->bulk_timeout--;
- if (!flow->bulk_timeout)
- flow->val.flags = 0;
- }
goto clear;
}
- return;
+ goto out;
reset:
- flow->val.flags = 0;
flow->pkt_len_avg = 0;
clear:
flow->pkt_count = 1;
flow->last_update = time;
+out:
+ if (flow->bulk_timeout) {
+ *out_val = config->dscp_bulk;
+ return true;
+ }
+
+ return false;
}
-static __always_inline void
-check_flow_prio(struct qosify_config *config, struct __sk_buff *skb,
- struct flow_bucket *flow, struct qosify_dscp_val *out_val)
+static __always_inline bool
+check_flow_prio(struct qosify_flow_config *config, struct __sk_buff *skb,
+ struct flow_bucket *flow, __u8 *out_val)
{
- if ((flow->val.flags & QOSIFY_VAL_FLAG_BULK_CHECK) ||
- !config->prio_max_avg_pkt_len)
- return;
+ int cur_len = skb->len;
- if (ewma(&flow->pkt_len_avg, skb->len) > config->prio_max_avg_pkt_len) {
- flow->val.flags = 0;
- return;
+ if (flow->bulk_timeout)
+ return false;
+
+ if (!config->prio_max_avg_pkt_len)
+ return false;
+
+ if (skb->gso_segs > 1)
+ cur_len /= skb->gso_segs;
+
+ if (ewma(&flow->pkt_len_avg, cur_len) <= config->prio_max_avg_pkt_len) {
+ *out_val = config->dscp_prio;
+ return true;
}
- flow->val = config->dscp_prio;
- flow->val.flags = QOSIFY_VAL_FLAG_PRIO_CHECK;
+ return false;
}
-static __always_inline void
-check_flow(struct qosify_config *config, struct __sk_buff *skb,
- struct qosify_dscp_val *out_val)
+static __always_inline bool
+check_flow(struct qosify_flow_config *config, struct __sk_buff *skb,
+ __u8 *out_val)
{
struct flow_bucket flow_data;
struct flow_bucket *flow;
__u32 hash;
-
- if (!(out_val->flags & (QOSIFY_VAL_FLAG_PRIO_CHECK |
- QOSIFY_VAL_FLAG_BULK_CHECK)))
- return;
+ bool ret = false;
if (!config)
- return;
+ return false;
+
+ if (!config->prio_max_avg_pkt_len && !config->bulk_trigger_pps)
+ return false;
hash = bpf_get_hash_recalc(skb);
flow = bpf_map_lookup_elem(&flow_map, &hash);
bpf_map_update_elem(&flow_map, &hash, &flow_data, BPF_ANY);
flow = bpf_map_lookup_elem(&flow_map, &hash);
if (!flow)
- return;
+ return false;
}
+ ret |= check_flow_bulk(config, skb, flow, out_val);
+ ret |= check_flow_prio(config, skb, flow, out_val);
- if (out_val->flags & QOSIFY_VAL_FLAG_BULK_CHECK)
- check_flow_bulk(config, skb, flow, out_val);
- if (out_val->flags & QOSIFY_VAL_FLAG_PRIO_CHECK)
- check_flow_prio(config, skb, flow, out_val);
-
- if (flow->val.flags & out_val->flags)
- *out_val = flow->val;
+ return ret;
}
static __always_inline struct qosify_ip_map_val *
-parse_ipv4(struct qosify_config *config, struct __sk_buff *skb, __u32 *offset,
- bool ingress, struct qosify_dscp_val *out_val)
+parse_ipv4(struct qosify_config *config, struct skb_parser_info *info,
+ bool ingress, __u8 *out_val)
{
- struct qosify_dscp_val *value;
struct iphdr *iph;
__u8 ipproto;
int hdr_len;
void *key;
- iph = skb_ptr(skb, *offset);
- if (skb_check(skb, iph + 1))
- return NULL;
-
- hdr_len = iph->ihl * 4;
- if (bpf_skb_pull_data(skb, *offset + hdr_len + sizeof(struct udphdr)))
- return NULL;
-
- iph = skb_ptr(skb, *offset);
- *offset += hdr_len;
-
- if (skb_check(skb, (void *)(iph + 1)))
+ iph = skb_parse_ipv4(info, sizeof(struct udphdr));
+ if (!iph)
return NULL;
- ipproto = iph->protocol;
- parse_l4proto(config, skb, *offset, ipproto, ingress, out_val);
+ parse_l4proto(config, info, ingress, out_val);
if (ingress)
key = &iph->saddr;
}
static __always_inline struct qosify_ip_map_val *
-parse_ipv6(struct qosify_config *config, struct __sk_buff *skb, __u32 *offset,
- bool ingress, struct qosify_dscp_val *out_val)
+parse_ipv6(struct qosify_config *config, struct skb_parser_info *info,
+ bool ingress, __u8 *out_val)
{
- struct qosify_dscp_val *value;
struct ipv6hdr *iph;
__u8 ipproto;
void *key;
- if (bpf_skb_pull_data(skb, *offset + sizeof(*iph) + sizeof(struct udphdr)))
- return NULL;
-
- iph = skb_ptr(skb, *offset);
- *offset += sizeof(*iph);
-
- if (skb_check(skb, (void *)(iph + 1)))
+ iph = skb_parse_ipv6(info, sizeof(struct udphdr));
+ if (!iph)
return NULL;
- ipproto = iph->nexthdr;
if (ingress)
key = &iph->saddr;
else
key = &iph->daddr;
- parse_l4proto(config, skb, *offset, ipproto, ingress, out_val);
+ parse_l4proto(config, info, ingress, out_val);
return bpf_map_lookup_elem(&ipv6_map, key);
}
-SEC("classifier")
+static __always_inline int
+dscp_lookup_class(uint8_t *dscp, bool ingress, struct qosify_class **out_class,
+ bool counter)
+{
+ struct qosify_class *class;
+ __u8 fallback_flag;
+ __u32 key;
+
+ if (!(*dscp & QOSIFY_DSCP_CLASS_FLAG))
+ return 0;
+
+ fallback_flag = *dscp & QOSIFY_DSCP_FALLBACK_FLAG;
+ key = *dscp & QOSIFY_DSCP_VALUE_MASK;
+ class = bpf_map_lookup_elem(&class_map, &key);
+ if (!class)
+ return -1;
+
+ if (!(class->flags & QOSIFY_CLASS_FLAG_PRESENT))
+ return -1;
+
+ if (counter)
+ class->packets++;
+ *dscp = dscp_val(&class->val, ingress);
+ *dscp |= fallback_flag;
+ *out_class = class;
+
+ return 0;
+}
+
+SEC("tc")
int classify(struct __sk_buff *skb)
{
+ struct skb_parser_info info;
bool ingress = module_flags & QOSIFY_INGRESS;
struct qosify_config *config;
+ struct qosify_class *class = NULL;
struct qosify_ip_map_val *ip_val;
- struct qosify_dscp_val val = {
- .ingress = 0xff,
- .egress = 0xff,
- .flags = 0,
- };
- __u32 offset = 0;
__u32 iph_offset;
+ __u8 dscp = 0;
void *iph;
- __u8 dscp;
bool force;
int type;
config = get_config();
+ if (!config)
+ return TC_ACT_UNSPEC;
+
+ skb_parse_init(&info, skb);
+ if (module_flags & QOSIFY_IP_ONLY) {
+ type = info.proto = skb->protocol;
+ } else if (skb_parse_ethernet(&info)) {
+ skb_parse_vlan(&info);
+ skb_parse_vlan(&info);
+ type = info.proto;
+ } else {
+ return TC_ACT_UNSPEC;
+ }
- if (module_flags & QOSIFY_IP_ONLY)
- type = skb->protocol;
- else
- type = parse_ethernet(skb, &offset);
-
- iph_offset = offset;
+ iph_offset = info.offset;
if (type == bpf_htons(ETH_P_IP))
- ip_val = parse_ipv4(config, skb, &offset, ingress, &val);
+ ip_val = parse_ipv4(config, &info, ingress, &dscp);
else if (type == bpf_htons(ETH_P_IPV6))
- ip_val = parse_ipv6(config, skb, &offset, ingress, &val);
+ ip_val = parse_ipv6(config, &info, ingress, &dscp);
else
- return TC_ACT_OK;
+ return TC_ACT_UNSPEC;
if (ip_val) {
if (!ip_val->seen)
ip_val->seen = 1;
- val = ip_val->dscp;
+ dscp = ip_val->dscp;
}
- check_flow(config, skb, &val);
+ if (dscp_lookup_class(&dscp, ingress, &class, true))
+ return TC_ACT_UNSPEC;
- dscp = dscp_val(&val, ingress);
- if (dscp == 0xff)
- return TC_ACT_OK;
+ if (class) {
+ if (check_flow(&class->config, skb, &dscp) &&
+ dscp_lookup_class(&dscp, ingress, &class, false))
+ return TC_ACT_UNSPEC;
+ }
dscp &= GENMASK(5, 0);
dscp <<= 2;
force = !(dscp & QOSIFY_DSCP_FALLBACK_FLAG);
- iph = skb_ptr(skb, iph_offset);
- if (skb_check(skb, (void *)iph + sizeof(struct ipv6hdr)))
- return TC_ACT_OK;
-
if (type == bpf_htons(ETH_P_IP))
- ipv4_change_dsfield(iph, INET_ECN_MASK, dscp, force);
+ ipv4_change_dsfield(skb, iph_offset, INET_ECN_MASK, dscp, force);
else if (type == bpf_htons(ETH_P_IPV6))
- ipv6_change_dsfield(iph, INET_ECN_MASK, dscp, force);
+ ipv6_change_dsfield(skb, iph_offset, INET_ECN_MASK, dscp, force);
- return TC_ACT_OK;
+ return TC_ACT_UNSPEC;
}
char _license[] SEC("license") = "GPL";