linux系统中每个进程由一个进程id标识,在内核中对应一个task_struct结构的进程描述符,系统中所有进程的task_struct通过链表链接在一起,在内核中,经常需要通过进程pid来获取进程描述符,例如kill命令;最简单的方法可以通过遍历task_struct链表并对比pid的值来获取,但这样效率太低,尤其当系统中运行很多个进程的时候。

linux内核通过pids散列表来解决这一问题,能快速的通过进程PID获取到进程描述符。

PID散列表包含4个表,因为进程描述符包含了表示不同类型PID的字段,每种类型的PID需要自己的散列表。
进程pid散列表

在task_struct结构体中:

struct task_struct {
	pid_t pid;
	pid_t tgid;

	/* PID/PID hash table linkage. */
	struct pid_link pids[PIDTYPE_MAX];    //一个进程ID可能是多种身份,比如Session ID,进程组ID, 进程ID,所以指向多个pid节点
	struct list_head thread_group;
}
enum pid_type
{
	PIDTYPE_PID,
	PIDTYPE_PGID,
	PIDTYPE_SID,
	PIDTYPE_MAX
};
pid结构体和task_struct结构体之间的联系,通过pid_link结构体;
/*
 * What is struct pid?
 *
 * A struct pid is the kernel's internal notion of a process identifier.
 * It refers to individual tasks, process groups, and sessions.  While
 * there are processes attached to it the struct pid lives in a hash
 * table, so it and then the processes that it refers to can be found
 * quickly from the numeric pid value.  The attached processes may be
 * quickly accessed by following pointers from struct pid.
 *
 * Storing pid_t values in the kernel and referring to them later has a
 * problem.  The process originally with that pid may have exited and the
 * pid allocator wrapped, and another process could have come along
 * and been assigned that pid.
 *
 * Referring to user space processes by holding a reference to struct
 * task_struct has a problem.  When the user space process exits
 * the now useless task_struct is still kept.  A task_struct plus a
 * stack consumes around 10K of low kernel memory.  More precisely
 * this is THREAD_SIZE + sizeof(struct task_struct).  By comparison
 * a struct pid is about 64 bytes.
 *
 * Holding a reference to struct pid solves both of these problems.
 * It is small so holding a reference does not consume a lot of
 * resources, and since a new struct pid is allocated when the numeric pid
 * value is reused (when pids wrap around) we don't mistakenly refer to new
 * processes.
 */
/*
 * struct upid is used to get the id of the struct pid, as it is
 * seen in particular namespace. Later the struct pid is found with
 * find_pid_ns() using the int nr and struct pid_namespace *ns.
 */

struct upid {
	/* Try to keep pid_chain in the same cacheline as nr for find_vpid */
	int nr;
	struct pid_namespace *ns;
	struct hlist_node pid_chain;
};

struct pid
{
	atomic_t count;
	unsigned int level;
	/* lists of tasks that use this pid */
	struct hlist_head tasks[PIDTYPE_MAX];
	struct rcu_head rcu;
	struct upid numbers[1];
};

struct pid_link
{
	struct hlist_node node;
	struct pid *pid;
};

看完基本的关联之后,利用pid是如何找到进程描述符的呢?

直接贴代码!

/*
 * Must be called under rcu_read_lock().
 */
struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
{
	rcu_lockdep_assert(rcu_read_lock_held(),
			   "find_task_by_pid_ns() needs rcu_read_lock()"
			   " protection");
	return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
}

#define pid_hashfn(nr, ns)	\
	hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
static struct hlist_head *pid_hash;
static unsigned int pidhash_shift = 4;

struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
{
	struct upid *pnr;

	hlist_for_each_entry_rcu(pnr,
			&pid_hash[pid_hashfn(nr, ns)], pid_chain)
		if (pnr->nr == nr && pnr->ns == ns)
			return container_of(pnr, struct pid,
					numbers[ns->level]);

	return NULL;
}

find_pid_ns() 在pid_hash的哈希表中根据哈希函数得到pid所在的链表头部,然后遍历得到 id 和 ns 都匹对的pid结构体指针;


struct task_struct *pid_task(struct pid *pid, enum pid_type type)
{
	struct task_struct *result = NULL;
	if (pid) {
		struct hlist_node *first;
		first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
					      lockdep_tasklist_lock_is_held());
		if (first)         //pid中的task[type] 与task_struct.pid[type].node  指向的是同一个节点
			result = hlist_entry(first, struct task_struct, pids[(type)].node);   //node实体在task_struct结构中,所以可以利用first指针得到task_struct结构体指针
	}
	return result;
}

pid_task()函数中rcu_dereference_check()可以忽略,可以看做:first = hlist_first_rcu(&pid->tasks[type]); 

#define hlist_first_rcu(head)	(*((struct hlist_node __rcu **)(&(head)->first)))
#define hlist_entry(ptr, type, member) container_of(ptr,type,member)

pid_hash的初始化函数:

/*
 * The pid hash table is scaled according to the amount of memory in the
 * machine.  From a minimum of 16 slots up to 4096 slots at one gigabyte or
 * more.
 */
void __init pidhash_init(void)
{
	unsigned int i, pidhash_size;

	pid_hash = alloc_large_system_hash("PID", sizeof(*pid_hash), 0, 18,
					   HASH_EARLY | HASH_SMALL,
					   &pidhash_shift, NULL,
					   0, 4096);
	pidhash_size = 1U << pidhash_shift;

	for (i = 0; i < pidhash_size; i++)
		INIT_HLIST_HEAD(&pid_hash[i]);
}

从pidhash_init函数中可以看出,pid_hash至少含有16项,最多4096项,它是系统中所有进程的哈希表,根据id分配,但此哈希表与PGIDTGID并无直接关系;


/*
 * allocate a large system hash table from bootmem
 * - it is assumed that the hash table must contain an exact power-of-2
 *   quantity of entries
 * - limit is the number of hash buckets, not the total allocation size
 */
void *__init alloc_large_system_hash(const char *tablename,
				     unsigned long bucketsize,
				     unsigned long numentries,
				     int scale,
				     int flags,
				     unsigned int *_hash_shift,
				     unsigned int *_hash_mask,
				     unsigned long low_limit,
				     unsigned long high_limit)


注:所用内核版本为3.10

引用:http://blog.chinaunix.net/uid-20196318-id-134921.html


Logo

更多推荐