mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-01-11 17:10:13 +00:00
Add a new listns() system call that allows userspace to iterate through
namespaces in the system. This provides a programmatic interface to
discover and inspect namespaces, enhancing existing namespace apis.
Currently, there is no direct way for userspace to enumerate namespaces
in the system. Applications must resort to scanning /proc/<pid>/ns/
across all processes, which is:
1. Inefficient - requires iterating over all processes
2. Incomplete - misses inactive namespaces that aren't attached to any
running process but are kept alive by file descriptors, bind mounts,
or parent namespace references
3. Permission-heavy - requires access to /proc for many processes
4. No ordering or ownership.
5. No filtering per namespace type: Must always iterate and check all
namespaces.
The list goes on. The listns() system call solves these problems by
providing direct kernel-level enumeration of namespaces. It is similar
to listmount() but obviously tailored to namespaces.
/*
* @req: Pointer to struct ns_id_req specifying search parameters
* @ns_ids: User buffer to receive namespace IDs
* @nr_ns_ids: Size of ns_ids buffer (maximum number of IDs to return)
* @flags: Reserved for future use (must be 0)
*/
ssize_t listns(const struct ns_id_req *req, u64 *ns_ids,
size_t nr_ns_ids, unsigned int flags);
Returns:
- On success: Number of namespace IDs written to ns_ids
- On error: Negative error code
/*
* @size: Structure size
* @ns_id: Starting point for iteration; use 0 for first call, then
* use the last returned ID for subsequent calls to paginate
* @ns_type: Bitmask of namespace types to include (from enum ns_type):
* 0: Return all namespace types
* MNT_NS: Mount namespaces
* NET_NS: Network namespaces
* USER_NS: User namespaces
* etc. Can be OR'd together
* @user_ns_id: Filter results to namespaces owned by this user namespace:
* 0: Return all namespaces (subject to permission checks)
* LISTNS_CURRENT_USER: Namespaces owned by caller's user namespace
* Other value: Namespaces owned by the specified user namespace ID
*/
struct ns_id_req {
__u32 size; /* sizeof(struct ns_id_req) */
__u32 spare; /* Reserved, must be 0 */
__u64 ns_id; /* Last seen namespace ID (for pagination) */
__u32 ns_type; /* Filter by namespace type(s) */
__u32 spare2; /* Reserved, must be 0 */
__u64 user_ns_id; /* Filter by owning user namespace */
};
Example 1: List all namespaces
void list_all_namespaces(void)
{
struct ns_id_req req = {
.size = sizeof(req),
.ns_id = 0, /* Start from beginning */
.ns_type = 0, /* All types */
.user_ns_id = 0, /* All user namespaces */
};
uint64_t ids[100];
ssize_t ret;
printf("All namespaces in the system:\n");
do {
ret = listns(&req, ids, 100, 0);
if (ret < 0) {
perror("listns");
break;
}
for (ssize_t i = 0; i < ret; i++)
printf(" Namespace ID: %llu\n", (unsigned long long)ids[i]);
/* Continue from last seen ID */
if (ret > 0)
req.ns_id = ids[ret - 1];
} while (ret == 100); /* Buffer was full, more may exist */
}
Example 2: List network namespaces only
void list_network_namespaces(void)
{
struct ns_id_req req = {
.size = sizeof(req),
.ns_id = 0,
.ns_type = NET_NS, /* Only network namespaces */
.user_ns_id = 0,
};
uint64_t ids[100];
ssize_t ret;
ret = listns(&req, ids, 100, 0);
if (ret < 0) {
perror("listns");
return;
}
printf("Network namespaces: %zd found\n", ret);
for (ssize_t i = 0; i < ret; i++)
printf(" netns ID: %llu\n", (unsigned long long)ids[i]);
}
Example 3: List namespaces owned by current user namespace
void list_owned_namespaces(void)
{
struct ns_id_req req = {
.size = sizeof(req),
.ns_id = 0,
.ns_type = 0, /* All types */
.user_ns_id = LISTNS_CURRENT_USER, /* Current userns */
};
uint64_t ids[100];
ssize_t ret;
ret = listns(&req, ids, 100, 0);
if (ret < 0) {
perror("listns");
return;
}
printf("Namespaces owned by my user namespace: %zd\n", ret);
for (ssize_t i = 0; i < ret; i++)
printf(" ns ID: %llu\n", (unsigned long long)ids[i]);
}
Example 4: List multiple namespace types
void list_network_and_mount_namespaces(void)
{
struct ns_id_req req = {
.size = sizeof(req),
.ns_id = 0,
.ns_type = NET_NS | MNT_NS, /* Network and mount */
.user_ns_id = 0,
};
uint64_t ids[100];
ssize_t ret;
ret = listns(&req, ids, 100, 0);
printf("Network and mount namespaces: %zd found\n", ret);
}
Example 5: Pagination through large namespace sets
void list_all_with_pagination(void)
{
struct ns_id_req req = {
.size = sizeof(req),
.ns_id = 0,
.ns_type = 0,
.user_ns_id = 0,
};
uint64_t ids[50];
size_t total = 0;
ssize_t ret;
printf("Enumerating all namespaces with pagination:\n");
while (1) {
ret = listns(&req, ids, 50, 0);
if (ret < 0) {
perror("listns");
break;
}
if (ret == 0)
break; /* No more namespaces */
total += ret;
printf(" Batch: %zd namespaces\n", ret);
/* Last ID in this batch becomes start of next batch */
req.ns_id = ids[ret - 1];
if (ret < 50)
break; /* Partial batch = end of results */
}
printf("Total: %zu namespaces\n", total);
}
Permission Model
listns() respects namespace isolation and capabilities:
(1) Global listing (user_ns_id = 0):
- Requires CAP_SYS_ADMIN in the namespace's owning user namespace
- OR the namespace must be in the caller's namespace context (e.g.,
a namespace the caller is currently using)
- User namespaces additionally allow listing if the caller has
CAP_SYS_ADMIN in that user namespace itself
(2) Owner-filtered listing (user_ns_id != 0):
- Requires CAP_SYS_ADMIN in the specified owner user namespace
- OR the namespace must be in the caller's namespace context
- This allows unprivileged processes to enumerate namespaces they own
(3) Visibility:
- Only "active" namespaces are listed
- A namespace is active if it has a non-zero __ns_ref_active count
- This includes namespaces used by running processes, held by open
file descriptors, or kept active by bind mounts
- Inactive namespaces (kept alive only by internal kernel
references) are not visible via listns()
Link: https://patch.msgid.link/20251029-work-namespace-nstree-listns-v4-19-2e6f823ebdc0@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
254 lines
6.6 KiB
C
254 lines
6.6 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _LINUX_USER_NAMESPACE_H
|
|
#define _LINUX_USER_NAMESPACE_H
|
|
|
|
#include <linux/kref.h>
|
|
#include <linux/nsproxy.h>
|
|
#include <linux/ns_common.h>
|
|
#include <linux/rculist_nulls.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/workqueue.h>
|
|
#include <linux/rcuref.h>
|
|
#include <linux/rwsem.h>
|
|
#include <linux/sysctl.h>
|
|
#include <linux/err.h>
|
|
|
|
#define UID_GID_MAP_MAX_BASE_EXTENTS 5
|
|
#define UID_GID_MAP_MAX_EXTENTS 340
|
|
|
|
struct uid_gid_extent {
|
|
u32 first;
|
|
u32 lower_first;
|
|
u32 count;
|
|
};
|
|
|
|
struct uid_gid_map { /* 64 bytes -- 1 cache line */
|
|
union {
|
|
struct {
|
|
struct uid_gid_extent extent[UID_GID_MAP_MAX_BASE_EXTENTS];
|
|
u32 nr_extents;
|
|
};
|
|
struct {
|
|
struct uid_gid_extent *forward;
|
|
struct uid_gid_extent *reverse;
|
|
};
|
|
};
|
|
};
|
|
|
|
#define USERNS_SETGROUPS_ALLOWED 1UL
|
|
|
|
#define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED
|
|
|
|
struct ucounts;
|
|
|
|
enum ucount_type {
|
|
UCOUNT_USER_NAMESPACES,
|
|
UCOUNT_PID_NAMESPACES,
|
|
UCOUNT_UTS_NAMESPACES,
|
|
UCOUNT_IPC_NAMESPACES,
|
|
UCOUNT_NET_NAMESPACES,
|
|
UCOUNT_MNT_NAMESPACES,
|
|
UCOUNT_CGROUP_NAMESPACES,
|
|
UCOUNT_TIME_NAMESPACES,
|
|
#ifdef CONFIG_INOTIFY_USER
|
|
UCOUNT_INOTIFY_INSTANCES,
|
|
UCOUNT_INOTIFY_WATCHES,
|
|
#endif
|
|
#ifdef CONFIG_FANOTIFY
|
|
UCOUNT_FANOTIFY_GROUPS,
|
|
UCOUNT_FANOTIFY_MARKS,
|
|
#endif
|
|
UCOUNT_COUNTS,
|
|
};
|
|
|
|
enum rlimit_type {
|
|
UCOUNT_RLIMIT_NPROC,
|
|
UCOUNT_RLIMIT_MSGQUEUE,
|
|
UCOUNT_RLIMIT_SIGPENDING,
|
|
UCOUNT_RLIMIT_MEMLOCK,
|
|
UCOUNT_RLIMIT_COUNTS,
|
|
};
|
|
|
|
#if IS_ENABLED(CONFIG_BINFMT_MISC)
|
|
struct binfmt_misc;
|
|
#endif
|
|
|
|
struct user_namespace {
|
|
struct uid_gid_map uid_map;
|
|
struct uid_gid_map gid_map;
|
|
struct uid_gid_map projid_map;
|
|
struct user_namespace *parent;
|
|
int level;
|
|
kuid_t owner;
|
|
kgid_t group;
|
|
struct ns_common ns;
|
|
unsigned long flags;
|
|
/* parent_could_setfcap: true if the creator if this ns had CAP_SETFCAP
|
|
* in its effective capability set at the child ns creation time. */
|
|
bool parent_could_setfcap;
|
|
|
|
#ifdef CONFIG_KEYS
|
|
/* List of joinable keyrings in this namespace. Modification access of
|
|
* these pointers is controlled by keyring_sem. Once
|
|
* user_keyring_register is set, it won't be changed, so it can be
|
|
* accessed directly with READ_ONCE().
|
|
*/
|
|
struct list_head keyring_name_list;
|
|
struct key *user_keyring_register;
|
|
struct rw_semaphore keyring_sem;
|
|
#endif
|
|
|
|
/* Register of per-UID persistent keyrings for this namespace */
|
|
#ifdef CONFIG_PERSISTENT_KEYRINGS
|
|
struct key *persistent_keyring_register;
|
|
#endif
|
|
struct work_struct work;
|
|
#ifdef CONFIG_SYSCTL
|
|
struct ctl_table_set set;
|
|
struct ctl_table_header *sysctls;
|
|
#endif
|
|
struct ucounts *ucounts;
|
|
long ucount_max[UCOUNT_COUNTS];
|
|
long rlimit_max[UCOUNT_RLIMIT_COUNTS];
|
|
|
|
#if IS_ENABLED(CONFIG_BINFMT_MISC)
|
|
struct binfmt_misc *binfmt_misc;
|
|
#endif
|
|
} __randomize_layout;
|
|
|
|
struct ucounts {
|
|
struct hlist_nulls_node node;
|
|
struct user_namespace *ns;
|
|
kuid_t uid;
|
|
struct rcu_head rcu;
|
|
rcuref_t count;
|
|
atomic_long_t ucount[UCOUNT_COUNTS];
|
|
atomic_long_t rlimit[UCOUNT_RLIMIT_COUNTS];
|
|
};
|
|
|
|
extern struct user_namespace init_user_ns;
|
|
extern struct ucounts init_ucounts;
|
|
|
|
bool setup_userns_sysctls(struct user_namespace *ns);
|
|
void retire_userns_sysctls(struct user_namespace *ns);
|
|
struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type);
|
|
void dec_ucount(struct ucounts *ucounts, enum ucount_type type);
|
|
struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid);
|
|
void put_ucounts(struct ucounts *ucounts);
|
|
|
|
static inline struct ucounts * __must_check get_ucounts(struct ucounts *ucounts)
|
|
{
|
|
if (rcuref_get(&ucounts->count))
|
|
return ucounts;
|
|
return NULL;
|
|
}
|
|
|
|
static inline long get_rlimit_value(struct ucounts *ucounts, enum rlimit_type type)
|
|
{
|
|
return atomic_long_read(&ucounts->rlimit[type]);
|
|
}
|
|
|
|
long inc_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v);
|
|
bool dec_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v);
|
|
long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type,
|
|
bool override_rlimit);
|
|
void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum rlimit_type type);
|
|
bool is_rlimit_overlimit(struct ucounts *ucounts, enum rlimit_type type, unsigned long max);
|
|
|
|
static inline long get_userns_rlimit_max(struct user_namespace *ns, enum rlimit_type type)
|
|
{
|
|
return READ_ONCE(ns->rlimit_max[type]);
|
|
}
|
|
|
|
static inline void set_userns_rlimit_max(struct user_namespace *ns,
|
|
enum rlimit_type type, unsigned long max)
|
|
{
|
|
ns->rlimit_max[type] = max <= LONG_MAX ? max : LONG_MAX;
|
|
}
|
|
|
|
static inline struct user_namespace *to_user_ns(struct ns_common *ns)
|
|
{
|
|
return container_of(ns, struct user_namespace, ns);
|
|
}
|
|
|
|
#ifdef CONFIG_USER_NS
|
|
|
|
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
|
|
{
|
|
if (ns)
|
|
ns_ref_inc(ns);
|
|
return ns;
|
|
}
|
|
|
|
extern int create_user_ns(struct cred *new);
|
|
extern int unshare_userns(unsigned long unshare_flags, struct cred **new_cred);
|
|
extern void __put_user_ns(struct user_namespace *ns);
|
|
|
|
static inline void put_user_ns(struct user_namespace *ns)
|
|
{
|
|
if (ns && ns_ref_put(ns))
|
|
__put_user_ns(ns);
|
|
}
|
|
|
|
struct seq_operations;
|
|
extern const struct seq_operations proc_uid_seq_operations;
|
|
extern const struct seq_operations proc_gid_seq_operations;
|
|
extern const struct seq_operations proc_projid_seq_operations;
|
|
extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *);
|
|
extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *);
|
|
extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *);
|
|
extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *);
|
|
extern int proc_setgroups_show(struct seq_file *m, void *v);
|
|
extern bool userns_may_setgroups(const struct user_namespace *ns);
|
|
extern bool in_userns(const struct user_namespace *ancestor,
|
|
const struct user_namespace *child);
|
|
extern bool current_in_userns(const struct user_namespace *target_ns);
|
|
struct ns_common *ns_get_owner(struct ns_common *ns);
|
|
#else
|
|
|
|
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
|
|
{
|
|
return &init_user_ns;
|
|
}
|
|
|
|
static inline int create_user_ns(struct cred *new)
|
|
{
|
|
return -EINVAL;
|
|
}
|
|
|
|
static inline int unshare_userns(unsigned long unshare_flags,
|
|
struct cred **new_cred)
|
|
{
|
|
if (unshare_flags & CLONE_NEWUSER)
|
|
return -EINVAL;
|
|
return 0;
|
|
}
|
|
|
|
static inline void put_user_ns(struct user_namespace *ns)
|
|
{
|
|
}
|
|
|
|
static inline bool userns_may_setgroups(const struct user_namespace *ns)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
static inline bool in_userns(const struct user_namespace *ancestor,
|
|
const struct user_namespace *child)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
static inline bool current_in_userns(const struct user_namespace *target_ns)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
static inline struct ns_common *ns_get_owner(struct ns_common *ns)
|
|
{
|
|
return ERR_PTR(-EPERM);
|
|
}
|
|
#endif
|
|
|
|
#endif /* _LINUX_USER_H */
|