Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bpf, cgroup: Add BPF support for cgroup1 hierarchy #631

Closed
wants to merge 11 commits into from
1 change: 1 addition & 0 deletions include/linux/cgroup-defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,7 @@ struct cgroup_root {

/* A list running through the active hierarchies */
struct list_head root_list;
struct rcu_head rcu;

/* Hierarchy-specific flags */
unsigned int flags;
Expand Down
4 changes: 3 additions & 1 deletion include/linux/cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ struct css_task_iter {
extern struct file_system_type cgroup_fs_type;
extern struct cgroup_root cgrp_dfl_root;
extern struct css_set init_css_set;
extern spinlock_t css_set_lock;

#define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys;
#include <linux/cgroup_subsys.h>
Expand Down Expand Up @@ -386,7 +387,6 @@ static inline void cgroup_unlock(void)
* as locks used during the cgroup_subsys::attach() methods.
*/
#ifdef CONFIG_PROVE_RCU
extern spinlock_t css_set_lock;
#define task_css_set_check(task, __c) \
rcu_dereference_check((task)->cgroups, \
rcu_read_lock_sched_held() || \
Expand Down Expand Up @@ -853,4 +853,6 @@ static inline void cgroup_bpf_put(struct cgroup *cgrp) {}

#endif /* CONFIG_CGROUP_BPF */

struct cgroup *task_get_cgroup1(struct task_struct *tsk, int hierarchy_id);

#endif /* _LINUX_CGROUP_H */
20 changes: 20 additions & 0 deletions kernel/bpf/helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -2223,6 +2223,25 @@ __bpf_kfunc long bpf_task_under_cgroup(struct task_struct *task,
rcu_read_unlock();
return ret;
}

/**
* bpf_task_get_cgroup1 - Acquires the associated cgroup of a task within a
* specific cgroup1 hierarchy. The cgroup1 hierarchy is identified by its
* hierarchy ID.
* @task: The target task
* @hierarchy_id: The ID of a cgroup1 hierarchy
*
* On success, the cgroup is returen. On failure, NULL is returned.
*/
__bpf_kfunc struct cgroup *
bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id)
{
struct cgroup *cgrp = task_get_cgroup1(task, hierarchy_id);

if (IS_ERR(cgrp))
return NULL;
return cgrp;
}
#endif /* CONFIG_CGROUPS */

/**
Expand Down Expand Up @@ -2529,6 +2548,7 @@ BTF_ID_FLAGS(func, bpf_cgroup_release, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_under_cgroup, KF_RCU)
BTF_ID_FLAGS(func, bpf_task_get_cgroup1, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
#endif
BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_throw)
Expand Down
4 changes: 2 additions & 2 deletions kernel/cgroup/cgroup-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,13 +164,13 @@ struct cgroup_mgctx {
#define DEFINE_CGROUP_MGCTX(name) \
struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name)

extern spinlock_t css_set_lock;
extern struct cgroup_subsys *cgroup_subsys[];
extern struct list_head cgroup_roots;

/* iterate across the hierarchies */
#define for_each_root(root) \
list_for_each_entry((root), &cgroup_roots, root_list)
list_for_each_entry_rcu((root), &cgroup_roots, root_list, \
lockdep_is_held(&cgroup_mutex))

/**
* for_each_subsys - iterate all enabled cgroup subsystems
Expand Down
33 changes: 33 additions & 0 deletions kernel/cgroup/cgroup-v1.c
Original file line number Diff line number Diff line change
Expand Up @@ -1262,6 +1262,39 @@ int cgroup1_get_tree(struct fs_context *fc)
return ret;
}

/**
* task_get_cgroup1 - Acquires the associated cgroup of a task within a
* specific cgroup1 hierarchy. The cgroup1 hierarchy is identified by its
* hierarchy ID.
* @tsk: The target task
* @hierarchy_id: The ID of a cgroup1 hierarchy
*
* On success, the cgroup is returned. On failure, ERR_PTR is returned.
* We limit it to cgroup1 only.
*/
struct cgroup *task_get_cgroup1(struct task_struct *tsk, int hierarchy_id)
{
struct cgroup *cgrp = ERR_PTR(-ENOENT);
struct cgroup_root *root;

rcu_read_lock();
for_each_root(root) {
/* cgroup1 only*/
if (root == &cgrp_dfl_root)
continue;
if (root->hierarchy_id != hierarchy_id)
continue;
spin_lock_irq(&css_set_lock);
cgrp = task_cgroup_from_root(tsk, root);
if (!cgrp || !cgroup_tryget(cgrp))
cgrp = ERR_PTR(-ENOENT);
spin_unlock_irq(&css_set_lock);
break;
}
rcu_read_unlock();
return cgrp;
}

static int __init cgroup1_wq_init(void)
{
/*
Expand Down
45 changes: 30 additions & 15 deletions kernel/cgroup/cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -1315,7 +1315,7 @@ static void cgroup_exit_root_id(struct cgroup_root *root)

void cgroup_free_root(struct cgroup_root *root)
{
kfree(root);
kfree_rcu(root, rcu);
}

static void cgroup_destroy_root(struct cgroup_root *root)
Expand Down Expand Up @@ -1347,10 +1347,9 @@ static void cgroup_destroy_root(struct cgroup_root *root)

spin_unlock_irq(&css_set_lock);

if (!list_empty(&root->root_list)) {
list_del(&root->root_list);
cgroup_root_count--;
}
WARN_ON_ONCE(list_empty(&root->root_list));
list_del_rcu(&root->root_list);
cgroup_root_count--;

if (!have_favordynmods)
cgroup_favor_dynmods(root, false);
Expand Down Expand Up @@ -1390,7 +1389,15 @@ static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset,
}
}

BUG_ON(!res_cgroup);
/*
* If cgroup_mutex is not held, the cgrp_cset_link will be freed
* before we remove the cgroup root from the root_list. Consequently,
* when accessing a cgroup root, the cset_link may have already been
* freed, resulting in a NULL res_cgroup. However, by holding the
* cgroup_mutex, we ensure that res_cgroup can't be NULL.
* If we don't hold cgroup_mutex in the caller, we must do the NULL
* check.
*/
return res_cgroup;
}

Expand All @@ -1413,6 +1420,11 @@ current_cgns_cgroup_from_root(struct cgroup_root *root)

rcu_read_unlock();

/*
* The namespace_sem is held by current, so the root cgroup can't
* be umounted. Therefore, we can ensure that the res is non-NULL.
*/
WARN_ON_ONCE(!res);
return res;
}

Expand Down Expand Up @@ -1449,15 +1461,16 @@ static struct cgroup *current_cgns_cgroup_dfl(void)
static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
struct cgroup_root *root)
{
lockdep_assert_held(&cgroup_mutex);
lockdep_assert_held(&css_set_lock);

return __cset_cgroup_from_root(cset, root);
}

/*
* Return the cgroup for "task" from the given hierarchy. Must be
* called with cgroup_mutex and css_set_lock held.
* called with css_set_lock held to prevent task's groups from being modified.
* Must be called with either cgroup_mutex or rcu read lock to prevent the
* cgroup root from being destroyed.
*/
struct cgroup *task_cgroup_from_root(struct task_struct *task,
struct cgroup_root *root)
Expand Down Expand Up @@ -2020,7 +2033,7 @@ void init_cgroup_root(struct cgroup_fs_context *ctx)
struct cgroup_root *root = ctx->root;
struct cgroup *cgrp = &root->cgrp;

INIT_LIST_HEAD(&root->root_list);
INIT_LIST_HEAD_RCU(&root->root_list);
atomic_set(&root->nr_cgrps, 1);
cgrp->root = root;
init_cgroup_housekeeping(cgrp);
Expand Down Expand Up @@ -2103,7 +2116,7 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
* care of subsystems' refcounts, which are explicitly dropped in
* the failure exit path.
*/
list_add(&root->root_list, &cgroup_roots);
list_add_rcu(&root->root_list, &cgroup_roots);
cgroup_root_count++;

/*
Expand Down Expand Up @@ -6265,7 +6278,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
if (!buf)
goto out;

cgroup_lock();
rcu_read_lock();
spin_lock_irq(&css_set_lock);

for_each_root(root) {
Expand All @@ -6276,6 +6289,11 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
if (root == &cgrp_dfl_root && !READ_ONCE(cgrp_dfl_visible))
continue;

cgrp = task_cgroup_from_root(tsk, root);
/* The root has already been unmounted. */
if (!cgrp)
continue;

seq_printf(m, "%d:", root->hierarchy_id);
if (root != &cgrp_dfl_root)
for_each_subsys(ss, ssid)
Expand All @@ -6286,9 +6304,6 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
seq_printf(m, "%sname=%s", count ? "," : "",
root->name);
seq_putc(m, ':');

cgrp = task_cgroup_from_root(tsk, root);

/*
* On traditional hierarchies, all zombie tasks show up as
* belonging to the root cgroup. On the default hierarchy,
Expand Down Expand Up @@ -6320,7 +6335,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
retval = 0;
out_unlock:
spin_unlock_irq(&css_set_lock);
cgroup_unlock();
rcu_read_unlock();
kfree(buf);
out:
return retval;
Expand Down
Loading
Loading