selinux初探

Posted on 2024-05-03

PREFACE：啥也不懂，爬来学学

# linux 控制访问模型

sched.h - include/linux/sched.h - Linux source code (v5.10-rc4) - Bootlin

运行一个程序本质上是当前用户有没有权限访问并运行该文件、是否有创建进程的权限，以及新进程如何继承当前进程的属性

	struct task_struct {
	#ifdef CONFIG_THREAD_INFO_IN_TASK
	/*
	* For reasons of header soup (see current_thread_info()), this
	* must be the first element of task_struct.
	*/
	struct thread_info thread_info;
	#endif
	/* -1 unrunnable, 0 runnable, >0 stopped: */
	volatile long state;

	/*
	* This begins the randomizable portion of task_struct. Only
	* scheduling-critical items should be added above here.
	*/
	randomized_struct_fields_start

	void *stack;
	refcount_t usage;
	/* Per task flags (PF_), defined further below: /
	unsigned int flags;
	unsigned int ptrace;

	//...
	struct sched_info sched_info;

	struct list_head tasks;

	struct mm_struct *mm;
	struct mm_struct *active_mm;
	/* Real parent process: */
	struct task_struct __rcu *real_parent;

	/* Recipient of SIGCHLD, wait4() reports: */
	struct task_struct __rcu *parent;
	struct list_head ptraced;
	struct list_head ptrace_entry;

	/* PID/PID hash table linkage. */
	struct pid *thread_pid;
	struct hlist_node pid_links[PIDTYPE_MAX];
	struct list_head thread_group;
	struct list_head thread_node;
	/* Process credentials: */

	/* Tracer's credentials at attach: */
	const struct cred __rcu *ptracer_cred;

	/* Objective and real subjective task credentials (COW): */
	const struct cred __rcu *real_cred;

	/* Effective (overridable) subjective task credentials (COW): */
	const struct cred __rcu *cred;
	char comm[TASK_COMM_LEN];
	struct seccomp seccomp;
	// ...
	/*
	* New fields for task_struct should be added above here, so that
	* they are included in the randomized portion of task_struct.
	*/
	randomized_struct_fields_end

	/* CPU-specific state of this task: */
	struct thread_struct thread;

	/*
	* WARNING: on x86, 'thread_struct' contains a variable-sized
	* structure. It MUST be at the end of 'task_struct'.
	*
	* Do not put anything below here!
	*/
	}

# MAC

MAC 即 Mandatory Access Control，用于将系统中的信息分密级和类进行管理，以保证每个用户只能访问到那些被标明可以由他访问的信息的一种访问约束机制。在强制访问控制下，用户 (或其他主体) 与文件 ((其他客体) 都被标记了固定的安全属性 (如安全级、访问权限等)，在每次访问发生时，系统检测安全属性以便确定一个用户是否有权访问该文件。其中 SELinux 和 AppArmor 就是 Linux 中典型的强制访问控制实现。

# DAC

DAC 即 Discretionary Access Control，称为自主访问控制。在 Linux 中最为常见的一种访问控制方案，即用户可以自主选择控制哪些用户可以共享他的文件，有两种自主访问控制策略，分别是文件权限码和访问控制列表 ACL (Access Control List)。

文件权限码

	$ ls -l test
	-rw-r--r-- 1 pan staff 0 Nov 21 14:35 test

分别表示当前用户 (user/owner)、用户组 (group) 和其他用户 (other) 对应的读、写、执行 (rwx) 访问权限，可以参考 chmod(1)。实际上在 Linux 操作系统中在前面还增加了三位，分别是:

S_ISUID (04000): SETUID 位，用于在 exeve 系统调用时设置进程的有效用户 ID (effective user ID)；
S_ISGID (02000): SETGID 位，和 SETUID 类似，从父目录中继承；
S_ISVTX (01000): sticky bit，即防删除位，防止其他用户删除公共文件，通常用于 /tmp 目录下；

通过文件权限码可以实现一定程度上的自主访问控制，但是对于多用户系统而言只能通过用户组去管理，无法控制某个文件可以让用户 A 访问而不让用户 B 访问。ACL 就是为了实现这个目标而出现的。例如，需要单独给某个用户添加文件的读权限如下:

$ setfacl -m u:evilpan:r /etc/passwd

具体命令可以参考 setfacl(1)，ACL 需要内核和文件系统的支持。

DAC 和 MAC 的对比

在 DAC 模式下，只要相应目录有相应用户的权限，就可以被访问。而在 MAC 模式下，还要受进程允许访问目录范围的限制。

# UID

对于操作系统而言，为了方便管理，用户和组都分别对应数字 ID，即 UID 和 GID。传统上获取 root 权限就是执行下 su 程序，即可获得一个 root shell。一般情况下 su 是一个设置了 SETUID 位的程序，并且 owner 是 root 用户。普通用户执行该程序只是上是对该文件执行了 execve 系统调用，也就是说，内核会根据 SETUID 位来调整当前进程的权限，这主要是通过有效用户 ID 去实现的。

Linux 中的用户 ID 分为 real user id 和 effective user id，这样区分的原因是进程在执行过程中需要动态切换到其他用户，如果只有一个用户 ID，那么切换之后就无法换回原来的用户了。因此前者用来表示进程的真实用户，后者用来表示当前所表示的有效用户。

在内核上面介绍的 task_struct 中有一个 struct cred 字段，该字段对应的结构就包含了当前任务的安全相关上下文信息，其中就有 uid:

	struct cred {
	atomic_t usage;
	#ifdef CONFIG_DEBUG_CREDENTIALS
	atomic_t subscribers; /* number of processes subscribed */
	void *put_addr;
	unsigned magic;
	#define CRED_MAGIC 0x43736564
	#define CRED_MAGIC_DEAD 0x44656144
	#endif
	kuid_t uid; /* real UID of the task */
	kgid_t gid; /* real GID of the task */
	kuid_t suid; /* saved UID of the task */
	kgid_t sgid; /* saved GID of the task */
	kuid_t euid; /* effective UID of the task */
	kgid_t egid; /* effective GID of the task */
	kuid_t fsuid; /* UID for VFS ops */
	kgid_t fsgid; /* GID for VFS ops */
	// ...
	} __randomize_layout;

# Capabilities

传统 Linux 执行权限检测主要是基于 UID，而且只有两个分类，即 (effective) UID 为 0 的超级用户和其他普通用户。这样一来就会面临权限划分粒度太粗的问题，比如只想让普通用户可以访问 ping 程序，就需要给 ping 文件加上 SETUID 位，如果该可执行文件的实现存在漏洞，就可能被利用造成权限提升。

因此，从 Linux 2.2 开始，就引入了 capabilities，将超级用户的权限进行切分，并且按需要给普通用户进行分配，解决了传统 UID-0 的局限性。

capabilities 以任务 (线程) 为单位，还是在上面内核的 struct cred 结构体中，其相关的字段为:

	structcred{
	// ...
	kernel_cap_t cap_inheritable;/* caps our children can inherit */
	kernel_cap_t cap_permitted;/* caps we're permitted */
	kernel_cap_t cap_effective;/* caps we can actually use */
	kernel_cap_t cap_bset;/* capability bounding set */
	kernel_cap_t cap_ambient;/* Ambient capability set */
	// ...
	}

从用户空间看，获取、设置线程的系统调用为 capget、capset，如下所示:

	#include<sys/capability.h>

	intcapget(cap_user_header_t hdrp,cap_user_data_t datap);
	intcapset(cap_user_header_t hdrp,constcap_user_data_t datap);

参数的结构体定义如下:

	typedef struct__user_cap_header_struct{
	__u32 version;
	int pid;
	}*cap_user_header_t;

	typedef struct__user_cap_data_struct{
	__u32 effective;
	__u32 permitted;
	__u32 inheritable;
	}*cap_user_data_t;

从定义上看，一共有三类 capability，分别是 effective、permitted 和 inheritable，这和 UID 的设计初衷是类似的，因为进程可以被复制 (fork)，因此增加了 inheritable 的控制。对于每一类 capabilities，由于其类型是 __u32 ，每项 capability 通过位与方式进行组合，因此最多可以支持 32 种 capability，其中一些常见的包括:

CAP_NET_RAW: 创建和使用 RAW/PACKET socket 的权限以及绑定透明代理地址的权限；
CAP_NET_ADMIN: 各类网关相关的操作，比如网卡接口配置、路由表修改等；
CAP_SETUID: 设置和修改进程 UID 的权限；
CAP_SYS_PTRACE: 使用 ptrace 跟踪任意其他进程的能力；
….

完整的权限列表可以参考 capabilities(7)。

对于系统管理员而言，更多是使用 capsh、getcap、setcap 等命令行工具，不过本质上都是通过 libcap 对系统调用进行封装实现的。

# selinux

ubuntu 安装工具

sudo apt install policycoreutils

检查 selinux 状态

	ubuntu@VM-8-8-ubuntu:~$ sestatus -v
	SELinux status: disabled

setenforce 命令的可能参数有：Enforcing、Permissive、1（启用）或 0（禁用）。

# setenforce 0

或永久修改：

vim /etc/selinux/config

SELinux （Security Enhanced Linux）是由美国 NSA（国安局）和 SCC 开发的 Linux 的一个扩张强制访问控制安全模块，目的是最大限度减少系统中服务进程可访问的资源。Google 在 Android 4.4 上正式添加以 SELinux 为基础的系统安全机制，命名为 ** SEAndroid **。SEAndroid 在架构和机制上与 SELinux 完全一样，基于移动设备的特点，SEAndroid 的只是移植 SELinux 的一个子集。

SELinux 主要作用就是最大限度地减小系统中服务进程可访问的资源（最小权限原则）。

基于 Android 4.3（宽容模式）和 Android 4.4（部分强制模式），在 Android 5.0 及更高版本中，已全面强制执行 SELinux。通过此项变更，Android 已从对有限的一组关键域（ installd 、 netdvold 和 zygote ）强制执行 SELinux 转为对所有域（超过 60 个）强制执行 SELinux。

类型强制执行

SElinux is a labeling system. Every process has a label. Every file/directory object in the operating system has a label. Even network ports, devices, and potentially hostnames have labels assigned to them. We write rules to control the access of a process label to an a object label like a file. We call this policy. The kernel enforces the rules. Sometimes this enforcement is called Mandatory Access Control (MAC).

The owner of an object does not have discretion over the security attributes of a object. Standard Linux access control, owner/group + permission flags like rwx, is often called Discretionary Access Control (DAC). SELinux has no concept of UID or ownership of files. Everything is controlled by the labels. Meaning an SELinux system can be setup without an all powerful root process.

Note: SELinux does not let you side step DAC Controls. SELinux is a parallel enforcement model. An application has to be allowed by BOTH SELinux and DAC to do certain activities. This can lead to confusion for administrators because the process gets Permission Denied. Administrators see Permission Denied means something is wrong with DAC, not SELinux labels.

更多关于 SELinux 细节：SELinux 可视化指南

# 用户态

在 SELinux 中，访问控制通过 context 来描述访问权限，例如对于文件系统，可以使用 ls -Z 查看文件对应的标签

对于网络端口的标签，可以用 netstat -Z 查看；对于进程标签，则可以通过 ps -Z 查看

context 可以分为几个部分，使用冒号 : 分隔，分别是:

user: 表示 SELinux 用户账号，与 Linux 用户账号不同，前者在 policy 中定义，包含多层级权限；
role: 定义了主体 (subject) 在特定域 (domain) 中可以对客体 (object) 进行的操作；
type: 定义了文件的类型；
sensitivity: 即最后一个字段，表示涉密等级，范围可以从 c0 到 c1023，c3 表示 Top Secret 。该字段仅在 MLS 模式中使用，用于高敏感度的国防军事机构，对于客户端或者一般数据服务器而言只需保留默认值。

对一系列系统资源增加标签后，系统就可以根据标签来判断访问是否应该允许，一个示例的访问拒绝日志如下:

type=1400 audit(18.250:15): avc: denied{ getattr}forpid=939comm="ls"path="/ueventd.rc"dev="rootfs"ino=2842scontext=u:r:shell:s0tcontext=u:object_r:rootfs:s0tclass=filepermissive=0

访问权限的判断是在内核中实现的，但是访问规则可以动态生成和更新，内核中只预置了一系列触发点。SELinux 规则 (policy) 通常使用自定义的高级语言去描述，目前正在开发的是 CIL(Common Intermediate Language)，但使用更多的是传统的 MLS Statements，比如访问规则的定义如下:

rule_name source_type target_type:class perm_set;

一个具体的例子:

allow initrc_t acct_exec_t:file{ getattrread execute};

表示允许拥有 initrc_t 标签类型的主体访问带有 acct_exec_t 标签的目标 文件 ，访问权限为 getattr、read 和 write。其中类型是使用 type 关键字定义的，一般使用单独的 file_contexts 文件记录。MLS 的完整语法见 Kernel Policy Language Definition Links。

对于系统管理员而言，常用的相关命令有:

chcon: 修改目标文件的 SELinux 标签；
resotrecon: 重新加载 (恢复) 系统文件的 SELinux 标签；
semanage: 实时修改当前系统的 SELinux 规则；
…

使用 MLS 提供的 SELinux Policy 语法，我们可以定义非常细粒度的访问控制，比如根据应用属性甚至签名来控制 IPC 访问。但是与此同时，规则调试也经常困扰 ROM 开发者，有一些脚本比如 audit2allow 、 audit2why 等可以辅助定位和添加规则，不过还是要注意避免添加过度宽泛的权限导致攻击面扩大。

# 内核态

前面说 SELinux 是在内核中进行检查的，那么就以打开文件的操作为例来简单分析下 SELinux 的校验过程。打开文件使用的系统调用是 openat ，该系统调用在内核中的大致调用路径如下:

sys_openat
do_sys_open
do_filp_open
path_openat
do_last
may_open
inode_permission
- do_inode_permission -> generic_permission
- devcgroup_inode_permission
- security_inode_permission

inode_permission 是在文件打开之前检查文件系统 inode 权限的操作，其中包含常规的 DAC 检查、cgroup 权限检查以及我们所关心的 SELinux 检查:

	#define call_int_hook(FUNC, IRC, ...) ({ \\
	int RC = IRC; \\
	do { \\
	struct security_hook_list *P; \\
	\\
	list_for_each_entry(P, &security_hook_heads.FUNC, list) { \\
	RC = P->hook.FUNC(__VA_ARGS__); \\
	if (RC != 0) \\
	break; \\
	} \\
	} while (0); \\
	RC; \\
	})

	int security_inode_permission(structinode*inode,intmask)
	{
	if(unlikely(IS_PRIVATE(inode)))
	return 0;
	return call_int_hook(inode_permission,0,inode,mask);
	}

struct security_hook_heads 是一个结构体，其中包含一系列链表，每个链表都对应一类 SELinux hook:

	struct security_hook_heads{
	struct list_head binder_set_context_mgr;
	struct list_head binder_transaction;
	struct list_head binder_transfer_binder;
	struct list_head binder_transfer_file;
	struct list_head ptrace_access_check;
	struct list_head ptrace_traceme;
	struct list_head capget;
	struct list_head capset;
	//...
	struct list_head inode_permission;
	// ...
	}

每个链表都是在内核启动时进行初始化的， inode_permission 也不例外。在 security/linux/hooks.c 中定义了静态数组 selinux_hooks :

	static struct security_hook_listselinux_hooks[]={
	LSM_HOOK_INIT(binder_set_context_mgr,selinux_binder_set_context_mgr),
	LSM_HOOK_INIT(binder_transaction,selinux_binder_transaction),
	LSM_HOOK_INIT(binder_transfer_binder,selinux_binder_transfer_binder),
	LSM_HOOK_INIT(binder_transfer_file,selinux_binder_transfer_file),

	LSM_HOOK_INIT(ptrace_access_check,selinux_ptrace_access_check),
	LSM_HOOK_INIT(ptrace_traceme,selinux_ptrace_traceme),
	LSM_HOOK_INIT(capget,selinux_capget),
	LSM_HOOK_INIT(capset,selinux_capset),
	// ...
	LSM_HOOK_INIT(inode_permission,selinux_inode_permission),
	// ...
	}

因此，selinux_inode_permission 就是实际进行 SELinux 检查的函数:

	static int selinux_inode_permission(struct inode *inode, int mask)
	{
	const struct cred *cred = current_cred();
	u32 perms;
	bool from_access;
	unsigned flags = mask & MAY_NOT_BLOCK;
	struct inode_security_struct *isec;
	u32 sid;
	struct av_decision avd;
	int rc, rc2;
	u32 audited, denied;

	from_access = mask & MAY_ACCESS;
	mask &= (MAY_READ\|MAY_WRITE\|MAY_EXEC\|MAY_APPEND);

	/* No permission to check. Existence test. */
	if (!mask)
	return 0;

	validate_creds(cred);

	if (unlikely(IS_PRIVATE(inode)))
	return 0;

	perms = file_mask_to_av(inode->i_mode, mask);

	sid = cred_sid(cred);
	isec = inode->i_security;

	rc = avc_has_perm_noaudit(sid, isec->sid, isec->sclass, perms, 0, &avd);
	audited = avc_audit_required(perms, &avd, rc,
	from_access ? FILE__AUDIT_ACCESS : 0,
	&denied);
	if (likely(!audited))
	return rc;

	rc2 = audit_inode_permission(inode, perms, audited, denied, rc, flags);
	if (rc2)
	return rc2;
	return rc;
	}

这里有几个值得注意的地方，一个是 selinux_hooks 中注册了很多回调列表，这些模块就是内核中预置的检查点；另外，在 selinux_inode_permission 函数中，使用 file_mask_to_av 来将打开文件的 flag 转换成 SELinux 对应的访问动作 (Access Vector):

	/* Convert a Linux mode and permission mask to an access vector. */
	static inline u32 file_mask_to_av(int mode, int mask)
	{
	u32 av = 0;

	if (!S_ISDIR(mode)) {
	if (mask & MAY_EXEC)
	av \|= FILE__EXECUTE;
	if (mask & MAY_READ)
	av \|= FILE__READ;

	if (mask & MAY_APPEND)
	av \|= FILE__APPEND;
	else if (mask & MAY_WRITE)
	av \|= FILE__WRITE;

	} else {
	if (mask & MAY_EXEC)
	av \|= DIR__SEARCH;
	if (mask & MAY_WRITE)
	av \|= DIR__WRITE;
	if (mask & MAY_READ)
	av \|= DIR__READ;
	}

	return av;
	}

这些宏定义在 <build>/security/selinux/av_permissions.h 中，是编译内核时自动生成的。在确认该次访问需要审计后，就接着调用 audit_inode_permission -> slow_avc_audit 进行实际的判断了。因为这类访问控制判断需要频繁调用，出于性能考虑判断过程所使用的访问规则预先编译好并已经加载到内核缓存中，称为 avc (Access Vector Cache)，这也是前面日志中 avc 的来源。

参考：

Your visual how-to guide for SELinux policy enforcement | Opensource.com

Android/Linux Root 的那些事儿 - 有价值炮灰 (evilpan.com)

2023AVSS-SELinux 题目 | LLeaves Blog (lleavesg.top)

Android 中的 SELinux_android selinux-CSDN 博客

一文彻底明白 linux 中的 selinux 到底是什么 - 知乎 (zhihu.com)