Linux pid_hash散列表

linux系统中每个进程由一个进程id标识，在内核中对应一个task_struct结构的进程描述符，系统中所有进程的task_struct通过链表链接在一起，在内核中，经常需要通过进程pid来获取进程描述符，例如kill命令；最简单的方法可以通过遍历task_struct链表并对比pid的值来获取，但这样效率太低，尤其当系统中运行很多个进程的时候。linux内核通过pids散列表来解决这一问题

bysun2013

3509人浏览 · 2013-11-02 13:52:42

bysun2013 · 2013-11-02 13:52:42 发布

linux系统中每个进程由一个进程id标识，在内核中对应一个task_struct结构的进程描述符，系统中所有进程的task_struct通过链表链接在一起，在内核中，经常需要通过进程pid来获取进程描述符，例如kill命令；最简单的方法可以通过遍历task_struct链表并对比pid的值来获取，但这样效率太低，尤其当系统中运行很多个进程的时候。

linux内核通过pids散列表来解决这一问题，能快速的通过进程PID获取到进程描述符。

PID散列表包含4个表，因为进程描述符包含了表示不同类型PID的字段，每种类型的PID需要自己的散列表。

在task_struct结构体中：

struct task_struct {
	pid_t pid;
	pid_t tgid;

	/* PID/PID hash table linkage. */
	struct pid_link pids[PIDTYPE_MAX];    //一个进程ID可能是多种身份，比如Session ID，进程组ID， 进程ID，所以指向多个pid节点
	struct list_head thread_group;
}

enum pid_type
{
	PIDTYPE_PID,
	PIDTYPE_PGID,
	PIDTYPE_SID,
	PIDTYPE_MAX
};

pid结构体和task_struct结构体之间的联系，通过pid_link结构体；

/*
 * What is struct pid?
 *
 * A struct pid is the kernel's internal notion of a process identifier.
 * It refers to individual tasks, process groups, and sessions.  While
 * there are processes attached to it the struct pid lives in a hash
 * table, so it and then the processes that it refers to can be found
 * quickly from the numeric pid value.  The attached processes may be
 * quickly accessed by following pointers from struct pid.
 *
 * Storing pid_t values in the kernel and referring to them later has a
 * problem.  The process originally with that pid may have exited and the
 * pid allocator wrapped, and another process could have come along
 * and been assigned that pid.
 *
 * Referring to user space processes by holding a reference to struct
 * task_struct has a problem.  When the user space process exits
 * the now useless task_struct is still kept.  A task_struct plus a
 * stack consumes around 10K of low kernel memory.  More precisely
 * this is THREAD_SIZE + sizeof(struct task_struct).  By comparison
 * a struct pid is about 64 bytes.
 *
 * Holding a reference to struct pid solves both of these problems.
 * It is small so holding a reference does not consume a lot of
 * resources, and since a new struct pid is allocated when the numeric pid
 * value is reused (when pids wrap around) we don't mistakenly refer to new
 * processes.
 */
/*
 * struct upid is used to get the id of the struct pid, as it is
 * seen in particular namespace. Later the struct pid is found with
 * find_pid_ns() using the int nr and struct pid_namespace *ns.
 */

struct upid {
	/* Try to keep pid_chain in the same cacheline as nr for find_vpid */
	int nr;
	struct pid_namespace *ns;
	struct hlist_node pid_chain;
};

struct pid
{
	atomic_t count;
	unsigned int level;
	/* lists of tasks that use this pid */
	struct hlist_head tasks[PIDTYPE_MAX];
	struct rcu_head rcu;
	struct upid numbers[1];
};

struct pid_link
{
	struct hlist_node node;
	struct pid *pid;
};

看完基本的关联之后，利用pid是如何找到进程描述符的呢？

直接贴代码！

/*
 * Must be called under rcu_read_lock().
 */
struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
{
	rcu_lockdep_assert(rcu_read_lock_held(),
			   "find_task_by_pid_ns() needs rcu_read_lock()"
			   " protection");
	return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
}

#define pid_hashfn(nr, ns)	\
	hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
static struct hlist_head *pid_hash;
static unsigned int pidhash_shift = 4;

struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
{
	struct upid *pnr;

	hlist_for_each_entry_rcu(pnr,
			&pid_hash[pid_hashfn(nr, ns)], pid_chain)
		if (pnr->nr == nr && pnr->ns == ns)
			return container_of(pnr, struct pid,
					numbers[ns->level]);

	return NULL;
}

find_pid_ns() 在pid_hash的哈希表中根据哈希函数得到pid所在的链表头部，然后遍历得到 id 和 ns 都匹对的pid结构体指针；

struct task_struct *pid_task(struct pid *pid, enum pid_type type)
{
	struct task_struct *result = NULL;
	if (pid) {
		struct hlist_node *first;
		first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
					      lockdep_tasklist_lock_is_held());
		if (first)         //pid中的task[type] 与task_struct.pid[type].node  指向的是同一个节点
			result = hlist_entry(first, struct task_struct, pids[(type)].node);   //node实体在task_struct结构中，所以可以利用first指针得到task_struct结构体指针
	}
	return result;
}

pid_task()函数中rcu_dereference_check（）可以忽略，可以看做：first = hlist_first_rcu(&pid->tasks[type]);

#define hlist_first_rcu(head)	(*((struct hlist_node __rcu **)(&(head)->first)))

#define hlist_entry(ptr, type, member) container_of(ptr,type,member)

pid_hash的初始化函数：

/*
 * The pid hash table is scaled according to the amount of memory in the
 * machine.  From a minimum of 16 slots up to 4096 slots at one gigabyte or
 * more.
 */
void __init pidhash_init(void)
{
	unsigned int i, pidhash_size;

	pid_hash = alloc_large_system_hash("PID", sizeof(*pid_hash), 0, 18,
					   HASH_EARLY | HASH_SMALL,
					   &pidhash_shift, NULL,
					   0, 4096);
	pidhash_size = 1U << pidhash_shift;

	for (i = 0; i < pidhash_size; i++)
		INIT_HLIST_HEAD(&pid_hash[i]);
}

从pidhash_init函数中可以看出，pid_hash至少含有16项，最多4096项，它是系统中所有进程的哈希表，根据id分配，但此哈希表与PGID，TGID并无直接关系；

/*
 * allocate a large system hash table from bootmem
 * - it is assumed that the hash table must contain an exact power-of-2
 *   quantity of entries
 * - limit is the number of hash buckets, not the total allocation size
 */
void *__init alloc_large_system_hash(const char *tablename,
				     unsigned long bucketsize,
				     unsigned long numentries,
				     int scale,
				     int flags,
				     unsigned int *_hash_shift,
				     unsigned int *_hash_mask,
				     unsigned long low_limit,
				     unsigned long high_limit)

注：所用内核版本为3.10

引用：http://blog.chinaunix.net/uid-20196318-id-134921.html

Linux

更多推荐

网卡速率和双工模式的配置

http://linux.chinaitlab.com/system/792187.html1、mii-tool 配置网络设备协商方式的工具； 1.1 mii-tool 介绍； mii-tool - view, manipulate media-independent interface status （mii-tool 是查看，管理介质的网络接口的状态）

Linux

Linux虚拟文件系统之文件系统卸载（sys_umount())

Linux中卸载文件系统由umount系统调用实现，入口函数为sys_umount()。较于文件系统的安装较为简单，下面是具体的实现。1. /*sys_umont系统调用*/2. SYSCALL_DEFINE2(umount, char __user *, name, int, flags)3. {4.struct path path;

Linux

Linux系统下超级终端Minicom的使用方法（例如：连接交换机，路由器等）转http://baike.baidu.com/view/2911642.htm?fr=ala0_1

Linux系统下超级终端Minicom的使用方法 　　Linux下的Minicom的功能与下的超级终端功能相似，适于在通过超级终端对设备的管理以及对嵌入操作系统的升级，现写出Minicom的使用手册： 　　1．启动minicom 　　以root权限登录系统 　　使用命令 　　minicom –s 则minicom启动，屏

Linux

所有评论(0)

查看更多评论

bysun2013

@sun123bingyong

已为社区贡献2条内容