summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2025-02-21 16:08:54 -0800
committerJakub Kicinski <kuba@kernel.org>2025-02-21 16:08:55 -0800
commit27422c373897b27e935159360bb35c4b26ecc2b1 (patch)
treeabb3aa2cf7678fee04f5ac16949956145572c715
parente87700965abeddcdb84c9540107c69ce08b87431 (diff)
parente818d1d1a6eeaa75ad7a44082e546de897308de1 (diff)
Merge branch 'net-fib_rules-add-dscp-mask-support'
Ido Schimmel says: ==================== net: fib_rules: Add DSCP mask support In some deployments users would like to encode path information into certain bits of the IPv6 flow label, the UDP source port and the DSCP field and use this information to route packets accordingly. Redirecting traffic to a routing table based on specific bits in the DSCP field is not currently possible. Only exact match is currently supported by FIB rules. This patchset extends FIB rules to match on the DSCP field with an optional mask. Patches #1-#5 gradually extend FIB rules to match on the DSCP field with an optional mask. Patch #6 adds test cases for the new functionality. iproute2 support can be found here [1]. [1] https://github.com/idosch/iproute2/tree/submit/fib_rule_mask_v1 ==================== Link: https://patch.msgid.link/20250220080525.831924-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--Documentation/netlink/specs/rt_rule.yaml5
-rw-r--r--include/uapi/linux/fib_rules.h1
-rw-r--r--net/core/fib_rules.c1
-rw-r--r--net/ipv4/fib_rules.c47
-rw-r--r--net/ipv6/fib6_rules.c45
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh38
6 files changed, 132 insertions, 5 deletions
diff --git a/Documentation/netlink/specs/rt_rule.yaml b/Documentation/netlink/specs/rt_rule.yaml
index b30c924087fa..de0938d36541 100644
--- a/Documentation/netlink/specs/rt_rule.yaml
+++ b/Documentation/netlink/specs/rt_rule.yaml
@@ -190,6 +190,10 @@ attribute-sets:
name: dport-mask
type: u16
display-hint: hex
+ -
+ name: dscp-mask
+ type: u8
+ display-hint: hex
operations:
enum-model: directional
@@ -225,6 +229,7 @@ operations:
- flowlabel-mask
- sport-mask
- dport-mask
+ - dscp-mask
-
name: newrule-ntf
doc: Notify a rule creation
diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index 95ec01b15c65..2df6e4035d50 100644
--- a/include/uapi/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
@@ -72,6 +72,7 @@ enum {
FRA_FLOWLABEL_MASK, /* flowlabel mask */
FRA_SPORT_MASK, /* sport mask */
FRA_DPORT_MASK, /* dport mask */
+ FRA_DSCP_MASK, /* dscp mask */
__FRA_MAX
};
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 5ddd34cbe7f6..4bc64d912a1c 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -845,6 +845,7 @@ static const struct nla_policy fib_rule_policy[FRA_MAX + 1] = {
[FRA_FLOWLABEL_MASK] = { .type = NLA_BE32 },
[FRA_SPORT_MASK] = { .type = NLA_U16 },
[FRA_DPORT_MASK] = { .type = NLA_U16 },
+ [FRA_DSCP_MASK] = NLA_POLICY_MASK(NLA_U8, INET_DSCP_MASK >> 2),
};
int fib_newrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh,
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 6b3d6a957822..fa58d6620ed6 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -37,6 +37,7 @@ struct fib4_rule {
u8 dst_len;
u8 src_len;
dscp_t dscp;
+ dscp_t dscp_mask;
u8 dscp_full:1; /* DSCP or TOS selector */
__be32 src;
__be32 srcmask;
@@ -192,7 +193,8 @@ INDIRECT_CALLABLE_SCOPE int fib4_rule_match(struct fib_rule *rule,
* to mask the upper three DSCP bits prior to matching to maintain
* legacy behavior.
*/
- if (r->dscp_full && r->dscp != inet_dsfield_to_dscp(fl4->flowi4_tos))
+ if (r->dscp_full &&
+ (r->dscp ^ inet_dsfield_to_dscp(fl4->flowi4_tos)) & r->dscp_mask)
return 0;
else if (!r->dscp_full && r->dscp &&
!fib_dscp_masked_match(r->dscp, fl4))
@@ -235,11 +237,35 @@ static int fib4_nl2rule_dscp(const struct nlattr *nla, struct fib4_rule *rule4,
}
rule4->dscp = inet_dsfield_to_dscp(nla_get_u8(nla) << 2);
+ rule4->dscp_mask = inet_dsfield_to_dscp(INET_DSCP_MASK);
rule4->dscp_full = true;
return 0;
}
+static int fib4_nl2rule_dscp_mask(const struct nlattr *nla,
+ struct fib4_rule *rule4,
+ struct netlink_ext_ack *extack)
+{
+ dscp_t dscp_mask;
+
+ if (!rule4->dscp_full) {
+ NL_SET_ERR_MSG_ATTR(extack, nla,
+ "Cannot specify DSCP mask without DSCP value");
+ return -EINVAL;
+ }
+
+ dscp_mask = inet_dsfield_to_dscp(nla_get_u8(nla) << 2);
+ if (rule4->dscp & ~dscp_mask) {
+ NL_SET_ERR_MSG_ATTR(extack, nla, "Invalid DSCP mask");
+ return -EINVAL;
+ }
+
+ rule4->dscp_mask = dscp_mask;
+
+ return 0;
+}
+
static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct fib_rule_hdr *frh,
struct nlattr **tb,
@@ -271,6 +297,10 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
fib4_nl2rule_dscp(tb[FRA_DSCP], rule4, extack) < 0)
goto errout;
+ if (tb[FRA_DSCP_MASK] &&
+ fib4_nl2rule_dscp_mask(tb[FRA_DSCP_MASK], rule4, extack) < 0)
+ goto errout;
+
/* split local/main if they are not already split */
err = fib_unmerge(net);
if (err)
@@ -366,6 +396,14 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
return 0;
}
+ if (tb[FRA_DSCP_MASK]) {
+ dscp_t dscp_mask;
+
+ dscp_mask = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP_MASK]) << 2);
+ if (!rule4->dscp_full || rule4->dscp_mask != dscp_mask)
+ return 0;
+ }
+
#ifdef CONFIG_IP_ROUTE_CLASSID
if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW])))
return 0;
@@ -391,7 +429,9 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
if (rule4->dscp_full) {
frh->tos = 0;
if (nla_put_u8(skb, FRA_DSCP,
- inet_dscp_to_dsfield(rule4->dscp) >> 2))
+ inet_dscp_to_dsfield(rule4->dscp) >> 2) ||
+ nla_put_u8(skb, FRA_DSCP_MASK,
+ inet_dscp_to_dsfield(rule4->dscp_mask) >> 2))
goto nla_put_failure;
} else {
frh->tos = inet_dscp_to_dsfield(rule4->dscp);
@@ -418,7 +458,8 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
return nla_total_size(4) /* dst */
+ nla_total_size(4) /* src */
+ nla_total_size(4) /* flow */
- + nla_total_size(1); /* dscp */
+ + nla_total_size(1) /* dscp */
+ + nla_total_size(1); /* dscp mask */
}
static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 0144d01417d9..fd5f7112a51f 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -29,6 +29,7 @@ struct fib6_rule {
__be32 flowlabel;
__be32 flowlabel_mask;
dscp_t dscp;
+ dscp_t dscp_mask;
u8 dscp_full:1; /* DSCP or TOS selector */
};
@@ -331,7 +332,7 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule,
return 0;
}
- if (r->dscp && r->dscp != ip6_dscp(fl6->flowlabel))
+ if ((r->dscp ^ ip6_dscp(fl6->flowlabel)) & r->dscp_mask)
return 0;
if ((r->flowlabel ^ flowi6_get_flowlabel(fl6)) & r->flowlabel_mask)
@@ -360,11 +361,35 @@ static int fib6_nl2rule_dscp(const struct nlattr *nla, struct fib6_rule *rule6,
}
rule6->dscp = inet_dsfield_to_dscp(nla_get_u8(nla) << 2);
+ rule6->dscp_mask = inet_dsfield_to_dscp(INET_DSCP_MASK);
rule6->dscp_full = true;
return 0;
}
+static int fib6_nl2rule_dscp_mask(const struct nlattr *nla,
+ struct fib6_rule *rule6,
+ struct netlink_ext_ack *extack)
+{
+ dscp_t dscp_mask;
+
+ if (!rule6->dscp_full) {
+ NL_SET_ERR_MSG_ATTR(extack, nla,
+ "Cannot specify DSCP mask without DSCP value");
+ return -EINVAL;
+ }
+
+ dscp_mask = inet_dsfield_to_dscp(nla_get_u8(nla) << 2);
+ if (rule6->dscp & ~dscp_mask) {
+ NL_SET_ERR_MSG_ATTR(extack, nla, "Invalid DSCP mask");
+ return -EINVAL;
+ }
+
+ rule6->dscp_mask = dscp_mask;
+
+ return 0;
+}
+
static int fib6_nl2rule_flowlabel(struct nlattr **tb, struct fib6_rule *rule6,
struct netlink_ext_ack *extack)
{
@@ -409,10 +434,15 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
goto errout;
}
rule6->dscp = inet_dsfield_to_dscp(frh->tos);
+ rule6->dscp_mask = frh->tos ? inet_dsfield_to_dscp(INET_DSCP_MASK) : 0;
if (tb[FRA_DSCP] && fib6_nl2rule_dscp(tb[FRA_DSCP], rule6, extack) < 0)
goto errout;
+ if (tb[FRA_DSCP_MASK] &&
+ fib6_nl2rule_dscp_mask(tb[FRA_DSCP_MASK], rule6, extack) < 0)
+ goto errout;
+
if ((tb[FRA_FLOWLABEL] || tb[FRA_FLOWLABEL_MASK]) &&
fib6_nl2rule_flowlabel(tb, rule6, extack) < 0)
goto errout;
@@ -482,6 +512,14 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
return 0;
}
+ if (tb[FRA_DSCP_MASK]) {
+ dscp_t dscp_mask;
+
+ dscp_mask = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP_MASK]) << 2);
+ if (!rule6->dscp_full || rule6->dscp_mask != dscp_mask)
+ return 0;
+ }
+
if (tb[FRA_FLOWLABEL] &&
nla_get_be32(tb[FRA_FLOWLABEL]) != rule6->flowlabel)
return 0;
@@ -512,7 +550,9 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
if (rule6->dscp_full) {
frh->tos = 0;
if (nla_put_u8(skb, FRA_DSCP,
- inet_dscp_to_dsfield(rule6->dscp) >> 2))
+ inet_dscp_to_dsfield(rule6->dscp) >> 2) ||
+ nla_put_u8(skb, FRA_DSCP_MASK,
+ inet_dscp_to_dsfield(rule6->dscp_mask) >> 2))
goto nla_put_failure;
} else {
frh->tos = inet_dscp_to_dsfield(rule6->dscp);
@@ -539,6 +579,7 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
return nla_total_size(16) /* dst */
+ nla_total_size(16) /* src */
+ nla_total_size(1) /* dscp */
+ + nla_total_size(1) /* dscp mask */
+ nla_total_size(4) /* flowlabel */
+ nla_total_size(4); /* flowlabel mask */
}
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index 06c51d7ceb4a..b866bab1d92a 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -310,6 +310,25 @@ fib_rule6_test()
"iif dscp no redirect to table"
fi
+ ip rule help 2>&1 | grep -q "DSCP\[/MASK\]"
+ if [ $? -eq 0 ]; then
+ match="dscp 0x0f/0x0f"
+ tosmatch=$(printf 0x"%x" $((0x1f << 2)))
+ tosnomatch=$(printf 0x"%x" $((0x1e << 2)))
+ getmatch="tos $tosmatch"
+ getnomatch="tos $tosnomatch"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "dscp masked redirect to table" \
+ "dscp masked no redirect to table"
+
+ match="dscp 0x0f/0x0f"
+ getmatch="from $SRC_IP6 iif $DEV tos $tosmatch"
+ getnomatch="from $SRC_IP6 iif $DEV tos $tosnomatch"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "iif dscp masked redirect to table" \
+ "iif dscp masked no redirect to table"
+ fi
+
fib_check_iproute_support "flowlabel" "flowlabel"
if [ $? -eq 0 ]; then
match="flowlabel 0xfffff"
@@ -597,6 +616,25 @@ fib_rule4_test()
"$getnomatch" "iif dscp redirect to table" \
"iif dscp no redirect to table"
fi
+
+ ip rule help 2>&1 | grep -q "DSCP\[/MASK\]"
+ if [ $? -eq 0 ]; then
+ match="dscp 0x0f/0x0f"
+ tosmatch=$(printf 0x"%x" $((0x1f << 2)))
+ tosnomatch=$(printf 0x"%x" $((0x1e << 2)))
+ getmatch="tos $tosmatch"
+ getnomatch="tos $tosnomatch"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "dscp masked redirect to table" \
+ "dscp masked no redirect to table"
+
+ match="dscp 0x0f/0x0f"
+ getmatch="from $SRC_IP iif $DEV tos $tosmatch"
+ getnomatch="from $SRC_IP iif $DEV tos $tosnomatch"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "iif dscp masked redirect to table" \
+ "iif dscp masked no redirect to table"
+ fi
}
fib_rule4_vrf_test()