Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add BPF_F_PERMANENT flag for sockmap skmsg redirect #599

Open
wants to merge 7 commits into
base: bpf-next_base
Choose a base branch
from
1 change: 1 addition & 0 deletions include/linux/skmsg.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ struct sk_psock {
u32 cork_bytes;
u32 eval;
bool redir_ingress; /* undefined if sk_redir is null */
bool redir_permanent;
struct sk_msg *cork;
struct sk_psock_progs progs;
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
Expand Down
45 changes: 35 additions & 10 deletions include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -3044,11 +3044,23 @@ union bpf_attr {
* socket level. If the message *msg* is allowed to pass (i.e. if
* the verdict eBPF program returns **SK_PASS**), redirect it to
* the socket referenced by *map* (of type
* **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
* egress interfaces can be used for redirection. The
* **BPF_F_INGRESS** value in *flags* is used to make the
* distinction (ingress path is selected if the flag is present,
* egress path otherwise). This is the only flag supported for now.
* **BPF_MAP_TYPE_SOCKMAP**) at index *key*.
*
* The following *flags* are supported:
*
* **BPF_F_INGRESS**
* Both ingress and egress interfaces can be used for redirection.
* The **BPF_F_INGRESS** value in *flags* is used to make the
* distinction. Ingress path is selected if the flag is present,
* egress path otherwise.
* **BPF_F_PERMANENT**
* Indicates that redirect verdict and the target socket should be
* remembered. The verdict program will not be run for subsequent
* packets.
*
* **BPF_F_PERMANENT** cannot be use together with
* **bpf_msg_apply_bytes**\ () and **bpf_msg_cork_bytes**\ (). If
* **BPF_F_PERMANENT** is set apply_bytes and cork_bytes are ignored.
* Return
* **SK_PASS** on success, or **SK_DROP** on error.
*
Expand Down Expand Up @@ -3321,11 +3333,23 @@ union bpf_attr {
* socket level. If the message *msg* is allowed to pass (i.e. if
* the verdict eBPF program returns **SK_PASS**), redirect it to
* the socket referenced by *map* (of type
* **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
* egress interfaces can be used for redirection. The
* **BPF_F_INGRESS** value in *flags* is used to make the
* distinction (ingress path is selected if the flag is present,
* egress path otherwise). This is the only flag supported for now.
* **BPF_MAP_TYPE_SOCKHASH**) using hash *key*.
*
* The following *flags* are supported:
*
* **BPF_F_INGRESS**
* Both ingress and egress interfaces can be used for redirection.
* The **BPF_F_INGRESS** value in *flags* is used to make the
* distinction. Ingress path is selected if the flag is present,
* egress path otherwise.
* **BPF_F_PERMANENT**
* Indicates that redirect verdict and the target socket should be
* remembered. The verdict program will not be run for subsequent
* packets.
*
* **BPF_F_PERMANENT** cannot be use together with
* **bpf_msg_apply_bytes**\ () and **bpf_msg_cork_bytes**\ (). If
* **BPF_F_PERMANENT** is set apply_bytes and cork_bytes are ignored.
* Return
* **SK_PASS** on success, or **SK_DROP** on error.
*
Expand Down Expand Up @@ -5928,6 +5952,7 @@ enum {
/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
enum {
BPF_F_INGRESS = (1ULL << 0),
BPF_F_PERMANENT = (1ULL << 1),
};

/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
Expand Down
6 changes: 5 additions & 1 deletion net/core/skmsg.c
Original file line number Diff line number Diff line change
Expand Up @@ -878,7 +878,11 @@ int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
msg->sk = sk;
ret = bpf_prog_run_pin_on_cpu(prog, msg);
ret = sk_psock_map_verd(ret, msg->sk_redir);
psock->apply_bytes = msg->apply_bytes;
psock->redir_permanent = msg->flags & BPF_F_PERMANENT;
if (psock->redir_permanent)
msg->cork_bytes = msg->apply_bytes = 0;
else
psock->apply_bytes = msg->apply_bytes;
if (ret == __SK_REDIRECT) {
if (psock->sk_redir) {
sock_put(psock->sk_redir);
Expand Down
4 changes: 2 additions & 2 deletions net/core/sock_map.c
Original file line number Diff line number Diff line change
Expand Up @@ -662,7 +662,7 @@ BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg *, msg,
{
struct sock *sk;

if (unlikely(flags & ~(BPF_F_INGRESS)))
if (unlikely(flags & ~(BPF_F_INGRESS | BPF_F_PERMANENT)))
return SK_DROP;

sk = __sock_map_lookup_elem(map, key);
Expand Down Expand Up @@ -1263,7 +1263,7 @@ BPF_CALL_4(bpf_msg_redirect_hash, struct sk_msg *, msg,
{
struct sock *sk;

if (unlikely(flags & ~(BPF_F_INGRESS)))
if (unlikely(flags & ~(BPF_F_INGRESS | BPF_F_PERMANENT)))
return SK_DROP;

sk = __sock_hash_lookup_elem(map, key);
Expand Down
12 changes: 7 additions & 5 deletions net/ipv4/tcp_bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -433,8 +433,10 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
if (!psock->apply_bytes) {
/* Clean up before releasing the sock lock. */
eval = psock->eval;
psock->eval = __SK_NONE;
psock->sk_redir = NULL;
if (!psock->redir_permanent) {
psock->eval = __SK_NONE;
psock->sk_redir = NULL;
}
}
if (psock->cork) {
cork = true;
Expand All @@ -448,7 +450,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
msg, tosend, flags);
sent = origsize - msg->sg.size;

if (eval == __SK_REDIRECT)
if (!psock->redir_permanent && eval == __SK_REDIRECT)
sock_put(sk_redir);

lock_sock(sk);
Expand All @@ -474,8 +476,8 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
}

if (likely(!ret)) {
if (!psock->apply_bytes) {
psock->eval = __SK_NONE;
if (!psock->apply_bytes && !psock->redir_permanent) {
psock->eval = __SK_NONE;
if (psock->sk_redir) {
sock_put(psock->sk_redir);
psock->sk_redir = NULL;
Expand Down
45 changes: 35 additions & 10 deletions tools/include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -3044,11 +3044,23 @@ union bpf_attr {
* socket level. If the message *msg* is allowed to pass (i.e. if
* the verdict eBPF program returns **SK_PASS**), redirect it to
* the socket referenced by *map* (of type
* **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
* egress interfaces can be used for redirection. The
* **BPF_F_INGRESS** value in *flags* is used to make the
* distinction (ingress path is selected if the flag is present,
* egress path otherwise). This is the only flag supported for now.
* **BPF_MAP_TYPE_SOCKMAP**) at index *key*.
*
* The following *flags* are supported:
*
* **BPF_F_INGRESS**
* Both ingress and egress interfaces can be used for redirection.
* The **BPF_F_INGRESS** value in *flags* is used to make the
* distinction. Ingress path is selected if the flag is present,
* egress path otherwise.
* **BPF_F_PERMANENT**
* Indicates that redirect verdict and the target socket should be
* remembered. The verdict program will not be run for subsequent
* packets.
*
* **BPF_F_PERMANENT** cannot be use together with
* **bpf_msg_apply_bytes**\ () and **bpf_msg_cork_bytes**\ (). If
* **BPF_F_PERMANENT** is set apply_bytes and cork_bytes are ignored.
* Return
* **SK_PASS** on success, or **SK_DROP** on error.
*
Expand Down Expand Up @@ -3321,11 +3333,23 @@ union bpf_attr {
* socket level. If the message *msg* is allowed to pass (i.e. if
* the verdict eBPF program returns **SK_PASS**), redirect it to
* the socket referenced by *map* (of type
* **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
* egress interfaces can be used for redirection. The
* **BPF_F_INGRESS** value in *flags* is used to make the
* distinction (ingress path is selected if the flag is present,
* egress path otherwise). This is the only flag supported for now.
* **BPF_MAP_TYPE_SOCKHASH**) using hash *key*.
*
* The following *flags* are supported:
*
* **BPF_F_INGRESS**
* Both ingress and egress interfaces can be used for redirection.
* The **BPF_F_INGRESS** value in *flags* is used to make the
* distinction. Ingress path is selected if the flag is present,
* egress path otherwise.
* **BPF_F_PERMANENT**
* Indicates that redirect verdict and the target socket should be
* remembered. The verdict program will not be run for subsequent
* packets.
*
* **BPF_F_PERMANENT** cannot be use together with
* **bpf_msg_apply_bytes**\ () and **bpf_msg_cork_bytes**\ (). If
* **BPF_F_PERMANENT** is set apply_bytes and cork_bytes are ignored.
* Return
* **SK_PASS** on success, or **SK_DROP** on error.
*
Expand Down Expand Up @@ -5928,6 +5952,7 @@ enum {
/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
enum {
BPF_F_INGRESS = (1ULL << 0),
BPF_F_PERMANENT = (1ULL << 1),
};

/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
Expand Down
122 changes: 122 additions & 0 deletions tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "test_sockmap_progs_query.skel.h"
#include "test_sockmap_pass_prog.skel.h"
#include "test_sockmap_drop_prog.skel.h"
#include "test_sockmap_msg_verdict.skel.h"
#include "bpf_iter_sockmap.skel.h"

#include "sockmap_helpers.h"
Expand Down Expand Up @@ -524,6 +525,107 @@ static void test_sockmap_skb_verdict_peek(void)
test_sockmap_pass_prog__destroy(pass);
}

static void test_sockmap_msg_verdict(bool is_ingress, bool is_permanent, bool is_self,
bool target_shutdown)
{
int key, sent, recvd, recv_fd, target_fd;
int err, map, verdict, s, c0, c1, p0, p1;
struct test_sockmap_msg_verdict *skel;
char buf[256] = "0123456789";

skel = test_sockmap_msg_verdict__open_and_load();
if (!ASSERT_OK_PTR(skel, "open_and_load"))
return;
verdict = bpf_program__fd(skel->progs.prog_skmsg_verdict);
map = bpf_map__fd(skel->maps.sock_map);


err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0);
if (!ASSERT_OK(err, "bpf_prog_attach"))
goto out;

s = socket_loopback(AF_INET, SOCK_STREAM);
if (!ASSERT_GT(s, -1, "socket_loopback(s)"))
goto out;
err = create_socket_pairs(s, AF_INET, SOCK_STREAM, &c0, &c1, &p0, &p1);
if (!ASSERT_OK(err, "create_socket_pairs(s)"))
goto out;

key = 0;
err = bpf_map_update_elem(map, &key, &p1, BPF_NOEXIST);
if (!ASSERT_OK(err, "bpf_map_update_elem(key0)"))
goto out_close;
key = 1;
err = bpf_map_update_elem(map, &key, &c1, BPF_NOEXIST);
if (!ASSERT_OK(err, "bpf_map_update_elem(key1)"))
goto out_close;
key = 2;
err = bpf_map_update_elem(map, &key, &p0, BPF_NOEXIST);
if (!ASSERT_OK(err, "bpf_map_update_elem(key2)"))
goto out_close;
key = 3;
err = bpf_map_update_elem(map, &key, &c0, BPF_NOEXIST);
if (!ASSERT_OK(err, "bpf_map_update_elem(key3)"))
goto out_close;

if (is_ingress) {
skel->bss->skmsg_redir_flags = BPF_F_INGRESS;
if (is_self) {
skel->bss->skmsg_redir_key = 0;
target_fd = p1;
recv_fd = p1;
} else {
skel->bss->skmsg_redir_key = 1;
target_fd = c1;
recv_fd = c1;
}
} else {
skel->bss->skmsg_redir_flags = 0;
if (is_self) {
skel->bss->skmsg_redir_key = 0;
target_fd = p1;
recv_fd = c1;
} else {
skel->bss->skmsg_redir_key = 2;
target_fd = p0;
recv_fd = c0;
}
}

if (is_permanent)
skel->bss->skmsg_redir_flags |= BPF_F_PERMANENT;

sent = xsend(p1, &buf, sizeof(buf), 0);
ASSERT_EQ(sent, sizeof(buf), "xsend(p1)");
recvd = recv_timeout(recv_fd, &buf, sizeof(buf), SOCK_NONBLOCK, IO_TIMEOUT_SEC);
ASSERT_EQ(recvd, sizeof(buf), "recv_timeout(recv_fd)");

if (target_shutdown) {
signal(SIGPIPE, SIG_IGN);
close(target_fd);
sent = send(p1, &buf, sizeof(buf), 0);
if (is_permanent) {
ASSERT_EQ(sent, -1, "xsend(p1)");
ASSERT_EQ(errno, EPIPE, "xsend(p1)");
} else {
ASSERT_EQ(sent, sizeof(buf), "xsend(p1)");
}
goto out_close;
}

sent = xsend(p1, &buf, sizeof(buf), 0);
ASSERT_EQ(sent, sizeof(buf), "xsend(p1)");
recvd = recv_timeout(recv_fd, &buf, sizeof(buf), SOCK_NONBLOCK, IO_TIMEOUT_SEC);
ASSERT_EQ(recvd, sizeof(buf), "recv_timeout(recv_fd)");
out_close:
close(c0);
close(p0);
close(c1);
close(p1);
out:
test_sockmap_msg_verdict__destroy(skel);
}

void test_sockmap_basic(void)
{
if (test__start_subtest("sockmap create_update_free"))
Expand Down Expand Up @@ -566,4 +668,24 @@ void test_sockmap_basic(void)
test_sockmap_skb_verdict_fionread(false);
if (test__start_subtest("sockmap skb_verdict msg_f_peek"))
test_sockmap_skb_verdict_peek();
if (test__start_subtest("sockmap msg_verdict"))
test_sockmap_msg_verdict(false, false, false, false);
if (test__start_subtest("sockmap msg_verdict ingress"))
test_sockmap_msg_verdict(true, false, false, false);
if (test__start_subtest("sockmap msg_verdict permanent"))
test_sockmap_msg_verdict(false, true, false, false);
if (test__start_subtest("sockmap msg_verdict ingress permanent"))
test_sockmap_msg_verdict(true, true, false, false);
if (test__start_subtest("sockmap msg_verdict permanent self"))
test_sockmap_msg_verdict(false, true, true, false);
if (test__start_subtest("sockmap msg_verdict ingress permanent self"))
test_sockmap_msg_verdict(true, true, true, false);
if (test__start_subtest("sockmap msg_verdict permanent shutdown"))
test_sockmap_msg_verdict(false, true, false, true);
if (test__start_subtest("sockmap msg_verdict ingress permanent shutdown"))
test_sockmap_msg_verdict(true, true, false, true);
if (test__start_subtest("sockmap msg_verdict shutdown"))
test_sockmap_msg_verdict(false, false, false, true);
if (test__start_subtest("sockmap msg_verdict ingress shutdown"))
test_sockmap_msg_verdict(true, false, false, true);
}
3 changes: 2 additions & 1 deletion tools/testing/selftests/bpf/progs/test_sockmap_kern.h
Original file line number Diff line number Diff line change
Expand Up @@ -298,8 +298,9 @@ int bpf_prog6(struct sk_msg_md *msg)

f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
if (f && *f) {
key = 2;
flags = *f;
if (flags & BPF_F_INGRESS)
key = 2;
}
#ifdef SOCKMAP
return bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
Expand Down
25 changes: 25 additions & 0 deletions tools/testing/selftests/bpf/progs/test_sockmap_msg_verdict.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// SPDX-License-Identifier: GPL-2.0
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>

struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 4);
__type(key, int);
__type(value, int);
} sock_map SEC(".maps");

u64 skmsg_redir_flags = 0;
u32 skmsg_redir_key = 0;

SEC("sk_msg")
int prog_skmsg_verdict(struct sk_msg_md *msg)
{
u64 flags = skmsg_redir_flags;
int key = skmsg_redir_key;

bpf_msg_redirect_map(msg, &sock_map, key, flags);
return SK_PASS;
}

char _license[] SEC("license") = "GPL";
Loading
Loading