1
0
mirror of https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2026-01-15 11:03:02 +00:00
Christian Brauner 76b6f5dfb3
nstree: add listns()
Add a new listns() system call that allows userspace to iterate through
namespaces in the system. This provides a programmatic interface to
discover and inspect namespaces, enhancing existing namespace apis.

Currently, there is no direct way for userspace to enumerate namespaces
in the system. Applications must resort to scanning /proc/<pid>/ns/
across all processes, which is:

1. Inefficient - requires iterating over all processes
2. Incomplete - misses inactive namespaces that aren't attached to any
   running process but are kept alive by file descriptors, bind mounts,
   or parent namespace references
3. Permission-heavy - requires access to /proc for many processes
4. No ordering or ownership.
5. No filtering per namespace type: Must always iterate and check all
   namespaces.

The list goes on. The listns() system call solves these problems by
providing direct kernel-level enumeration of namespaces. It is similar
to listmount() but obviously tailored to namespaces.

/*
 * @req: Pointer to struct ns_id_req specifying search parameters
 * @ns_ids: User buffer to receive namespace IDs
 * @nr_ns_ids: Size of ns_ids buffer (maximum number of IDs to return)
 * @flags: Reserved for future use (must be 0)
 */
ssize_t listns(const struct ns_id_req *req, u64 *ns_ids,
               size_t nr_ns_ids, unsigned int flags);

Returns:
- On success: Number of namespace IDs written to ns_ids
- On error: Negative error code

/*
 * @size: Structure size
 * @ns_id: Starting point for iteration; use 0 for first call, then
 *         use the last returned ID for subsequent calls to paginate
 * @ns_type: Bitmask of namespace types to include (from enum ns_type):
 *           0: Return all namespace types
 *           MNT_NS: Mount namespaces
 *           NET_NS: Network namespaces
 *           USER_NS: User namespaces
 *           etc. Can be OR'd together
 * @user_ns_id: Filter results to namespaces owned by this user namespace:
 *              0: Return all namespaces (subject to permission checks)
 *              LISTNS_CURRENT_USER: Namespaces owned by caller's user namespace
 *              Other value: Namespaces owned by the specified user namespace ID
 */
struct ns_id_req {
        __u32 size;         /* sizeof(struct ns_id_req) */
        __u32 spare;        /* Reserved, must be 0 */
        __u64 ns_id;        /* Last seen namespace ID (for pagination) */
        __u32 ns_type;      /* Filter by namespace type(s) */
        __u32 spare2;       /* Reserved, must be 0 */
        __u64 user_ns_id;   /* Filter by owning user namespace */
};

Example 1: List all namespaces

void list_all_namespaces(void)
{
    struct ns_id_req req = {
        .size = sizeof(req),
        .ns_id = 0,          /* Start from beginning */
        .ns_type = 0,        /* All types */
        .user_ns_id = 0,     /* All user namespaces */
    };
    uint64_t ids[100];
    ssize_t ret;

    printf("All namespaces in the system:\n");
    do {
        ret = listns(&req, ids, 100, 0);
        if (ret < 0) {
            perror("listns");
            break;
        }

        for (ssize_t i = 0; i < ret; i++)
            printf("  Namespace ID: %llu\n", (unsigned long long)ids[i]);

        /* Continue from last seen ID */
        if (ret > 0)
            req.ns_id = ids[ret - 1];
    } while (ret == 100);  /* Buffer was full, more may exist */
}

Example 2: List network namespaces only

void list_network_namespaces(void)
{
    struct ns_id_req req = {
        .size = sizeof(req),
        .ns_id = 0,
        .ns_type = NET_NS,   /* Only network namespaces */
        .user_ns_id = 0,
    };
    uint64_t ids[100];
    ssize_t ret;

    ret = listns(&req, ids, 100, 0);
    if (ret < 0) {
        perror("listns");
        return;
    }

    printf("Network namespaces: %zd found\n", ret);
    for (ssize_t i = 0; i < ret; i++)
        printf("  netns ID: %llu\n", (unsigned long long)ids[i]);
}

Example 3: List namespaces owned by current user namespace

void list_owned_namespaces(void)
{
    struct ns_id_req req = {
        .size = sizeof(req),
        .ns_id = 0,
        .ns_type = 0,                      /* All types */
        .user_ns_id = LISTNS_CURRENT_USER, /* Current userns */
    };
    uint64_t ids[100];
    ssize_t ret;

    ret = listns(&req, ids, 100, 0);
    if (ret < 0) {
        perror("listns");
        return;
    }

    printf("Namespaces owned by my user namespace: %zd\n", ret);
    for (ssize_t i = 0; i < ret; i++)
        printf("  ns ID: %llu\n", (unsigned long long)ids[i]);
}

Example 4: List multiple namespace types

void list_network_and_mount_namespaces(void)
{
    struct ns_id_req req = {
        .size = sizeof(req),
        .ns_id = 0,
        .ns_type = NET_NS | MNT_NS,  /* Network and mount */
        .user_ns_id = 0,
    };
    uint64_t ids[100];
    ssize_t ret;

    ret = listns(&req, ids, 100, 0);
    printf("Network and mount namespaces: %zd found\n", ret);
}

Example 5: Pagination through large namespace sets

void list_all_with_pagination(void)
{
    struct ns_id_req req = {
        .size = sizeof(req),
        .ns_id = 0,
        .ns_type = 0,
        .user_ns_id = 0,
    };
    uint64_t ids[50];
    size_t total = 0;
    ssize_t ret;

    printf("Enumerating all namespaces with pagination:\n");

    while (1) {
        ret = listns(&req, ids, 50, 0);
        if (ret < 0) {
            perror("listns");
            break;
        }
        if (ret == 0)
            break;  /* No more namespaces */

        total += ret;
        printf("  Batch: %zd namespaces\n", ret);

        /* Last ID in this batch becomes start of next batch */
        req.ns_id = ids[ret - 1];

        if (ret < 50)
            break;  /* Partial batch = end of results */
    }

    printf("Total: %zu namespaces\n", total);
}

Permission Model

listns() respects namespace isolation and capabilities:

(1) Global listing (user_ns_id = 0):
    - Requires CAP_SYS_ADMIN in the namespace's owning user namespace
    - OR the namespace must be in the caller's namespace context (e.g.,
      a namespace the caller is currently using)
    - User namespaces additionally allow listing if the caller has
      CAP_SYS_ADMIN in that user namespace itself
(2) Owner-filtered listing (user_ns_id != 0):
    - Requires CAP_SYS_ADMIN in the specified owner user namespace
    - OR the namespace must be in the caller's namespace context
    - This allows unprivileged processes to enumerate namespaces they own
(3) Visibility:
    - Only "active" namespaces are listed
    - A namespace is active if it has a non-zero __ns_ref_active count
    - This includes namespaces used by running processes, held by open
      file descriptors, or kept active by bind mounts
    - Inactive namespaces (kept alive only by internal kernel
      references) are not visible via listns()

Link: https://patch.msgid.link/20251029-work-namespace-nstree-listns-v4-19-2e6f823ebdc0@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-11-03 17:41:18 +01:00

129 lines
3.8 KiB
C

/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef __LINUX_NSFS_H
#define __LINUX_NSFS_H
#include <linux/ioctl.h>
#include <linux/types.h>
#define NSIO 0xb7
/* Returns a file descriptor that refers to an owning user namespace */
#define NS_GET_USERNS _IO(NSIO, 0x1)
/* Returns a file descriptor that refers to a parent namespace */
#define NS_GET_PARENT _IO(NSIO, 0x2)
/* Returns the type of namespace (CLONE_NEW* value) referred to by
file descriptor */
#define NS_GET_NSTYPE _IO(NSIO, 0x3)
/* Get owner UID (in the caller's user namespace) for a user namespace */
#define NS_GET_OWNER_UID _IO(NSIO, 0x4)
/* Translate pid from target pid namespace into the caller's pid namespace. */
#define NS_GET_PID_FROM_PIDNS _IOR(NSIO, 0x6, int)
/* Return thread-group leader id of pid in the callers pid namespace. */
#define NS_GET_TGID_FROM_PIDNS _IOR(NSIO, 0x7, int)
/* Translate pid from caller's pid namespace into a target pid namespace. */
#define NS_GET_PID_IN_PIDNS _IOR(NSIO, 0x8, int)
/* Return thread-group leader id of pid in the target pid namespace. */
#define NS_GET_TGID_IN_PIDNS _IOR(NSIO, 0x9, int)
struct mnt_ns_info {
__u32 size;
__u32 nr_mounts;
__u64 mnt_ns_id;
};
#define MNT_NS_INFO_SIZE_VER0 16 /* size of first published struct */
/* Get information about namespace. */
#define NS_MNT_GET_INFO _IOR(NSIO, 10, struct mnt_ns_info)
/* Get next namespace. */
#define NS_MNT_GET_NEXT _IOR(NSIO, 11, struct mnt_ns_info)
/* Get previous namespace. */
#define NS_MNT_GET_PREV _IOR(NSIO, 12, struct mnt_ns_info)
/* Retrieve namespace identifiers. */
#define NS_GET_MNTNS_ID _IOR(NSIO, 5, __u64)
#define NS_GET_ID _IOR(NSIO, 13, __u64)
enum init_ns_ino {
IPC_NS_INIT_INO = 0xEFFFFFFFU,
UTS_NS_INIT_INO = 0xEFFFFFFEU,
USER_NS_INIT_INO = 0xEFFFFFFDU,
PID_NS_INIT_INO = 0xEFFFFFFCU,
CGROUP_NS_INIT_INO = 0xEFFFFFFBU,
TIME_NS_INIT_INO = 0xEFFFFFFAU,
NET_NS_INIT_INO = 0xEFFFFFF9U,
MNT_NS_INIT_INO = 0xEFFFFFF8U,
#ifdef __KERNEL__
MNT_NS_ANON_INO = 0xEFFFFFF7U,
#endif
};
struct nsfs_file_handle {
__u64 ns_id;
__u32 ns_type;
__u32 ns_inum;
};
#define NSFS_FILE_HANDLE_SIZE_VER0 16 /* sizeof first published struct */
#define NSFS_FILE_HANDLE_SIZE_LATEST sizeof(struct nsfs_file_handle) /* sizeof latest published struct */
enum init_ns_id {
IPC_NS_INIT_ID = 1ULL,
UTS_NS_INIT_ID = 2ULL,
USER_NS_INIT_ID = 3ULL,
PID_NS_INIT_ID = 4ULL,
CGROUP_NS_INIT_ID = 5ULL,
TIME_NS_INIT_ID = 6ULL,
NET_NS_INIT_ID = 7ULL,
MNT_NS_INIT_ID = 8ULL,
#ifdef __KERNEL__
NS_LAST_INIT_ID = MNT_NS_INIT_ID,
#endif
};
enum ns_type {
TIME_NS = (1ULL << 7), /* CLONE_NEWTIME */
MNT_NS = (1ULL << 17), /* CLONE_NEWNS */
CGROUP_NS = (1ULL << 25), /* CLONE_NEWCGROUP */
UTS_NS = (1ULL << 26), /* CLONE_NEWUTS */
IPC_NS = (1ULL << 27), /* CLONE_NEWIPC */
USER_NS = (1ULL << 28), /* CLONE_NEWUSER */
PID_NS = (1ULL << 29), /* CLONE_NEWPID */
NET_NS = (1ULL << 30), /* CLONE_NEWNET */
};
/**
* struct ns_id_req - namespace ID request structure
* @size: size of this structure
* @spare: reserved for future use
* @filter: filter mask
* @ns_id: last namespace id
* @user_ns_id: owning user namespace ID
*
* Structure for passing namespace ID and miscellaneous parameters to
* statns(2) and listns(2).
*
* For statns(2) @param represents the request mask.
* For listns(2) @param represents the last listed mount id (or zero).
*/
struct ns_id_req {
__u32 size;
__u32 spare;
__u64 ns_id;
struct /* listns */ {
__u32 ns_type;
__u32 spare2;
__u64 user_ns_id;
};
};
/*
* Special @user_ns_id value that can be passed to listns()
*/
#define LISTNS_CURRENT_USER 0xffffffffffffffff /* Caller's userns */
/* List of all ns_id_req versions. */
#define NS_ID_REQ_SIZE_VER0 32 /* sizeof first published struct */
#endif /* __LINUX_NSFS_H */