本节中,我们继续讲解,在linux2.4内核下,如果通过一些列函数从路径名找到目标节点。


3.3.1)接下来查看chached_lookup()的代码(namei.c


[path_walk()>> cached_lookup()]

/*
*Internal lookup() using the new generic dcache.
*SMP-safe
*给定name和parent进行查找,如果找到而且合法,那么返回找到的dentry;否则,返回NULL
*/

staticstruct dentry * cached_lookup(struct dentry * parent, struct qstr *name, int flags)
{
structdentry * dentry = d_lookup(parent, name);

if(dentry && dentry->d_op &&dentry->d_op->d_revalidate) { //注意此处最后一项是判断函数指针非空
if(!dentry->d_op->d_revalidate(dentry, flags) &&!d_invalidate(dentry)) { //此处是对dentry的有效性进行验证,对于NTFS非常重要,因为内存中找到的dentry信息可能已经过时
dput(dentry);
dentry= NULL;
}
}
return dentry;
}

      该函数主要通过d_lookup(),代码在fs/dcache.c之中

[path_walk>> cached_lookup() >> d_lookup()]
/**
*d_lookup - search for a dentry
*@parent: parent dentry
*@name: qstr of name we wish to find
*
*Searches the children of the parent dentry for the name in question.If
*the dentry is found its reference count is incremented and the dentry
*is returned. The caller must use d_put to free the entry when it has
*finished using it. %NULL is returned on failure.
*这个是真正的查找函数,给定parent,查找这个parent对应的包含name的dentry目录下的子文件,如果成功,计数器加1并返回;否则,返回NULL指针。这个函数在实际上是一个hash表的查找过程
*/

structdentry * d_lookup(struct dentry * parent, struct qstr * name)
{
unsignedint len = name->len;
unsignedint hash = name->hash;
constunsigned char *str = name->name;
struct list_head *head = d_hash(parent,hash);//
struct list_head *tmp;

spin_lock(&dcache_lock);
tmp= head->next;//
for(;;) {//由于hash冲突,找到位置以后,需要精确定位dentry所在地址
struct dentry * dentry = list_entry(tmp, struct dentry, d_hash);//tmp是指针,d_hash是成员名字
if(tmp == head)
break;//双向循环链表,解决完毕
tmp= tmp->next; //这是一个循环变量,用于改变tmp的值
if(dentry->d_name.hash != hash)//d_name is the type of qstr
continue;//checkthe hash value,不同名字hash到同一个地址
if(dentry->d_parent != parent)
continue;//checkthe parent value 不同文件夹具有同名的子文件,需要比较patrent
if(parent->d_op && parent->d_op->d_compare) {//checkthe name
if(parent->d_op->d_compare(parent, &dentry->d_name, name))
continue;
}else {
if(dentry->d_name.len != len)
continue;
if(memcmp(dentry->d_name.name, str, len))
continue;
}
__dget_locked(dentry);
dentry->d_vfs_flags|= DCACHE_REFERENCED;
spin_unlock(&dcache_lock);
return dentry;//find the dentry in mem
}
spin_unlock(&dcache_lock);
return NULL;//not find the dentry in mem
}
      首先,本来应该直接根据hash值进行相应的地址定位,但在这里我们选择进一步hash,函数如下

[path_walk()>> cashed_lookup() >> d_lookup() >> d_hash()]

//结合parent进行hash,返回hash以后对应的地址,而不是index。意义:根据父节点的地址进行进一步hash,增强hash值的特异性。

注意:此处的d_hash返回值不是int类型,不是index是地址!!!

staticinline struct list_head * d_hash(struct dentry * parent, unsignedlong hash)
{
hash+= (unsigned long) parent / L1_CACHE_BYTES;
hash= hash ^ (hash >> D_HASHBITS);
return dentry_hashtable + (hash & D_HASHMASK);
}

      list_entry的相应代码如下:

141#define list_entry(ptr, type, member) \

142 ((type *)((char *)(ptr)-(unsigned long)(&((type*)0)->member)))

      这是一个节点地址索引函数,比较难以理解,可以参考这里: 

节点地址的函数list_entry()原理详解

      找到相应的队列头部以后,利用for循环来查找比较简单,唯一的特殊之处在于具体的文件系统可能通过其dentry_operation提供自己的节点名称对比函数,没有的话就用memcmp().

      接下来继续看cached_lookup(),具体的文件系统通过其dentry_operations提供一个对找到的dentry进行验证和处理的函数,如果验证失败,就要通过d_invaliate()将这个数据结构从hash表中删除。原因:在NFS之中,如果一个远程进程是其唯一用户,而且很长时间没有访问它了,那么此时就需要通过访问磁盘上的父目录内容来进行重新构造。具体的函数有dentry_operations结构中通过函数指针d_revaliate提供,最后则根据验证的结果返回一个dentry指针或者出错代码。而很多文件系统,包括ext2,并不提供dentry_operations结构。至此,cached_lookup()就完成了。

3.3.2)接下来查看real_lookup()的代码:

[path_walk()>> real_lookup()]
/*
*This is called when everything else fails, and we actually have
*to go to the low-level filesystem to find out what we should do..
*
*We get the directory semaphore, and after getting that we also
*make sure that nobody added the entry to the dcache in the meantime..
*SMP-safe

*/
staticstruct dentry * real_lookup(struct dentry * parent, struct qstr *name, int flags)
{//thefunction: given the parent and the name and the flags, then find thedentry ,add it to the mem of the queue and return it!!
struct dentry * result;
struct inode *dir = parent->d_inode;

down(&dir->i_sem);//进入临界区,cannotbe interupted by other process
/*
*First re-do the cached lookup just in case it was created
*while we waited for the directory semaphore..
*
*FIXME! This could use version numbering or similar to
*avoid unnecessary cache lookups.
*/
result= d_lookup(parent, name);// maybe during the sleep, the dentry isbuilt by others
if(!result) {
struct dentry * dentry = d_alloc(parent, name);
result= ERR_PTR(-ENOMEM);
if(dentry) {
lock_kernel();
result= dir->i_op->lookup(dir, dentry);//从磁盘上父节点的目录项中寻找相应目录项并且设置相关信息
unlock_kernel();
if(result)
dput(dentry);//寻找失败,撤销已经分配的dentry
else
result= dentry;//寻找成功
}
up(&dir->i_sem);
return result;//
}

/*
*Uhhuh! Nasty case: the cache was re-populated while
* we waited on the semaphore. Need to revalidate.
*/
up(&dir->i_sem);
if(result->d_op && result->d_op->d_revalidate) {
if(!result->d_op->d_revalidate(result, flags) &&!d_invalidate(result)) {
dput(result);
result= ERR_PTR(-ENOENT);
}
}
return result;
}

说明:关于downup函数,互斥信号量的使用,可以参考这里


I)其中,要建立一个dentry结果,首先要为之分配存储空间并初始化,这是d_alloc()完成的,代在dcache.c之中


#define NAME_ALLOC_LEN(len) ((len+16) & ~15)码

/**
*d_alloc - allocate a dcache entry
*@parent: parent of entry to allocate
*@name: qstr of the name
*
*Allocates a dentry. It returns %NULL if there is insufficient memory
*available. On a success the dentry is returned. The name passed in is
*copied and the copy passed in may be reused after this call.
*/

struct dentry * d_alloc(struct dentry * parent, const struct qstr *name)
{//alloc the room for the dentry_cache,init the dentry and return it
char* str;
struct dentry *dentry;

dentry= kmem_cache_alloc(dentry_cache,GFP_KERNEL);//从专用的slab分配器中分配的,关于内存分配,我们以后再进行解释
if(!dentry)
return NULL;

if(name->len > DNAME_INLINE_LEN-1) {
str= kmalloc(NAME_ALLOC_LEN(name->len), GFP_KERNEL);
if(!str) {
kmem_cache_free(dentry_cache,dentry);
return NULL;
}
}else
str= dentry->d_iname;//节点名很短,直接用d_iname保存

memcpy(str,name->name, name->len);//d_name.name总是指向这个字符串
str[name->len]= 0;


//22 #define atomic_read(v) ((v)->counter)
//23#define atomic_set(v,i) ((v)->counter = (i))

atomic_set(&dentry->d_count,1);
dentry->d_vfs_flags= 0;
dentry->d_flags= 0;
dentry->d_inode= NULL;
dentry->d_parent= NULL;
dentry->d_sb= NULL;
dentry->d_name.name= str;
dentry->d_name.len= name->len;
dentry->d_name.hash= name->hash;
dentry->d_op= NULL;
dentry->d_fsdata= NULL;
dentry->d_mounted= 0;
INIT_LIST_HEAD(&dentry->d_hash);
INIT_LIST_HEAD(&dentry->d_lru);
INIT_LIST_HEAD(&dentry->d_subdirs);
INIT_LIST_HEAD(&dentry->d_alias);//对结构体进行初始化就是对结构体所有成员递归的进行初始化
if(parent) {
dentry->d_parent= dget(parent);
dentry->d_sb= parent->d_sb;//从父目录继承sb
spin_lock(&dcache_lock);
list_add(&dentry->d_child,&parent->d_subdirs);
spin_unlock(&dcache_lock);
}else
INIT_LIST_HEAD(&dentry->d_child);

dentry_stat.nr_dentry++;
return dentry;
}



II)从磁盘上寻找文件系统,是通过i_op来中的相关结构来实现的,就ext2而言,对目录节点的函数跳转结构是ext2_dir_inode_operations,定义见fs/ext2/namei.c:ext2_lookup()

struct inode_operations ext2_dir_inode_operations = {

create: ext2_create,

lookup: ext2_lookup,

link: ext2_link,

unlink: ext2_unlink,

symlink: ext2_symlink,

mkdir: ext2_mkdir,

rmdir: ext2_rmdir,

mknod: ext2_mknod,

rename: ext2_rename,

};

      具体的函数是ext2_lookup(),仍然在fs/ext2/namei.c

/*
*Methods themselves.
*/
[path_walk()>> real_lookup() >> ext2_lookup()]
163static struct dentry *ext2_lookup(struct inode * dir, struct dentry*dentry)
164{//从磁盘父节点的目录项中寻找相关的目录项,并设置相关信息
165 struct inode * inode;
166 struct ext2_dir_entry_2 * de;
167 struct buffer_head * bh;
168
169 if (dentry->d_name.len > EXT2_NAME_LEN)
170 return ERR_PTR(-ENAMETOOLONG);
171
172 bh = ext2_find_entry (dir, dentry->d_name.name,dentry->d_name.len, &de);//?
173 inode = NULL;
174 if (bh) {
175 unsigned long ino = le32_to_cpu(de->inode);//?
176 brelse (bh);//?
177 inode = iget(dir->i_sb,ino);//根据索引节点号从磁盘读入相应索引节点并在内存中建立相对的inode结构?
178
179 if (!inode)
180 return ERR_PTR(-EACCES);
181 }
182 d_add(dentry, inode);//dentry结构的设置并挂入相应的某个队列?
183 return NULL;
184}

[[path_walk()>> real_lookup() >> ext2_lookup() >> >>ext2_find_entry ]

//这一部分涉及文件读写和设备驱动,我们以后再分析

52/*

53 * ext2_find_entry()

54 *

55 * finds an entry in the specified directory with the wanted name. It

56 * returns the cache buffer in which the entry was found, and theentry

57 * itself (as a parameter - res_dir). It does NOT read the inode ofthe

58 * entry - you'll have to do that yourself if you want to.

59 */

60static struct buffer_head * ext2_find_entry (struct inode * dir,
61 const char * const name,int namelen,
62 struct ext2_dir_entry_2** res_dir)
63{//在指定目录下,查找某一个指定name对应的目录项,返回这个dentry对应的cachebuffer ,但是并不读取这个dentry对应的inode
64 struct super_block * sb;
65 struct buffer_head * bh_use[NAMEI_RA_SIZE];
66 struct buffer_head * bh_read[NAMEI_RA_SIZE];
67 unsigned long offset;
68 int block, toread, i, err;
69
70 *res_dir = NULL;
71 sb = dir->i_sb;
72
73 if (namelen > EXT2_NAME_LEN)
74 return NULL;
75
76 memset (bh_use, 0, sizeof (bh_use)); //关于指针大小与指向指针的指针
77 toread = 0;
78 for (block = 0; block < NAMEI_RA_SIZE; ++block) {
79 struct buffer_head * bh;
80
81 if ((block << EXT2_BLOCK_SIZE_BITS (sb)) >=dir->i_size)
82 break;
83 bh = ext2_getblk (dir, block, 0, &err);
84 bh_use[block] = bh;
85 if (bh && !buffer_uptodate(bh))
86 bh_read[toread++] = bh;
87 }
88
89 for (block = 0, offset = 0; offset < dir->i_size;block++) {
90 struct buffer_head * bh;
91 struct ext2_dir_entry_2 * de;
92 char * dlimit;
93
94 if ((block % NAMEI_RA_BLOCKS) == 0 && toread){
95 ll_rw_block (READ, toread, bh_read);
96 toread = 0;
97 }
98 bh = bh_use[block % NAMEI_RA_SIZE];
99 if (!bh) {
100#if 0
101 ext2_error (sb, "ext2_find_entry",
102 "directory #%lu contains ahole at offset %lu",
103 dir->i_ino, offset);
104#endif
105 offset += sb->s_blocksize;
106 continue;
107 }
108 wait_on_buffer (bh);
109 if (!buffer_uptodate(bh)) {
110 /*
111 * read error: all bets are off
112 */
113 break;
114 }
115
116 de = (struct ext2_dir_entry_2 *) bh->b_data;
117 dlimit = bh->b_data + sb->s_blocksize;
118 while ((char *) de < dlimit) {
119 /* this code is executed quadratically often*/
120 /* do minimal checking `by hand' */
121 int de_len;
122
123 if ((char *) de + namelen <= dlimit &&
124 ext2_match (namelen, name, de)) {
125 /* found a match -
126 just to be sure, do a full check*/
127 if(!ext2_check_dir_entry("ext2_find_entry",
128 dir, de,bh, offset))
129 goto failure;
130 for (i = 0; i < NAMEI_RA_SIZE;++i) {
131 if (bh_use[i] != bh)
132 brelse (bh_use[i]);
133 }
134 *res_dir = de;
135 return bh;
136 }
137 /* prevent looping on a bad block */
138 de_len = le16_to_cpu(de->rec_len);
139 if (de_len <= 0)
140 goto failure;
141 offset += de_len;
142 de = (struct ext2_dir_entry_2 *)
143 ((char *) de + de_len);
144 }
145
146 brelse (bh);
147 if (((block + NAMEI_RA_SIZE) <<EXT2_BLOCK_SIZE_BITS (sb)) >=
148 dir->i_size)
149 bh = NULL;
150 else
151 bh = ext2_getblk (dir, block + NAMEI_RA_SIZE,0, &err);
152 bh_use[block % NAMEI_RA_SIZE] = bh;
153 if (bh && !buffer_uptodate(bh))
154 bh_read[toread++] = bh;
155 }
156
157failure:
158 for (i = 0; i < NAMEI_RA_SIZE; ++i)
159 brelse (bh_use[i]);
160 return NULL;
161}

      继续回到ext2_lookup()的代码,下一步是根据查的索引节点通过iget()找到对应的inode结构,这里的iget()inline函数,定义在inlude/linux/fs.h

staticinline struct inode *iget(struct super_block *sb, unsigned long ino)

{

return iget4(sb, ino, NULL, NULL);

}

      继续追寻iget4(),在fs/inode.c:

962struct inode *iget4(struct super_block *sb, unsigned long ino,find_inode_t find_actor, void *opaque)
963{//根据sb和索引节点号,返回inode结构;如果对应inode结构不存在内存中,就从磁盘上读取相关信息,然后在内存中建立对应的inode结构
964 struct list_head * head = inode_hashtable +hash(sb,ino);//索引节点号在同一设备上才是唯一的,所以计算hash的时候,要加入superblock的地址
965 struct inode * inode;
966
967 spin_lock(&inode_lock);
968 inode = find_inode(sb, ino, head, find_actor,opaque);//在hash表中查找该inode是否在内存中?
969 if (inode) {
970 __iget(inode);//?
971 spin_unlock(&inode_lock);
972 wait_on_inode(inode);//?
973 return inode;
974 }
975 spin_unlock(&inode_lock);
976
977 /*
978 * get_new_inode() will do the right thing, re-trying thesearch
979 * in case it had to block at any point.
980 */
981 return get_new_inode(sb, ino, head, find_actor,opaque);//内存中找不到inode结构,需要从磁盘上读入
982}
983

      inode也涉及内存缓存,inode结构有个hashinode_hashtable,已经建立的inode结构需要通过inode结构中的i_hash(也是一个list_head)挂在hash表中的队列中,首先通过find_inode进行查找,找到以后通过iget进行递增共享计数。

下面来分析iget4所调用的函数:

(1)find_inode:

static struct inode * find_inode(struct super_block * sb, unsigned long ino, struct list_head *head, find     _inode_t find_actor, void *opaque)

 556 /*
 557  * Called with the inode lock held.
 558  * NOTE: we are not increasing the inode-refcount, you must call __iget()
 559  * by hand after calling find_inode now! This simplifies iunique and won't
 560  * add any additional branch in the common code.
 561  */
 562 static struct inode * find_inode(struct super_block * sb, unsigned long ino, struct list_head *head, find_inode_t find_actor, void *opaque)
 563 {//accually,it is a search in hash table
 564         struct list_head *tmp;
 565         struct inode * inode;
 566 
 567         tmp = head;
 568         for (;;) {
 569                 tmp = tmp->next;
 570                 inode = NULL;
 571                 if (tmp == head)
 572                         break;
 573                 inode = list_entry(tmp, struct inode, i_hash);
 574                 if (inode->i_ino != ino)
 575                         continue;
 576                 if (inode->i_sb != sb)
 577                         continue;
 578                 if (find_actor && !find_actor(inode, ino, opaque))
 579                         continue;
 580                 break;
 581         }
 582         return inode;
 583 }

  关于find_inode_t:

    typedef int (*find_inode_t)(struct inode *, unsigned long, void *);

注意此处复杂变量的定义,先将find_inode_t换成一个,普通变量,然后可以理解成,find_inode_t等于这个普通变量所代表的类型。这个例子中,它是一个函数指针。

(2)__iget:

 180 static inline void __iget(struct inode * inode)
 181 {
 182         if (atomic_read(&inode->i_count)) {//i_count!=0
 183                 atomic_inc(&inode->i_count);
 184                 return;
 185         }
 186         atomic_inc(&inode->i_count);//i_count==0
 187         if (!(inode->i_state & I_DIRTY)) {
 188                 list_del(&inode->i_list);//将这个节点删除
 189                 list_add(&inode->i_list, &inode_in_use);//将此节点加入到另外一个链表之中
 190         }
 191         inodes_stat.nr_unused--;
 192 }

i_state字段的意义,可以参考我的系列博文:inode结构体成员详解

(3)wait_on_inode:

 167 static inline void wait_on_inode(struct inode *inode)
 168 {
 169         if (inode->i_state & I_LOCK)
 170                 __wait_on_inode(inode);
 171 }


继续深入:

152 static void __wait_on_inode(struct inode * inode)
 153 {
 154         DECLARE_WAITQUEUE(wait, current);//当前进程进入等待状态
 155 
 156         add_wait_queue(&inode->i_wait, &wait);
 157 repeat:
 158         set_current_state(TASK_UNINTERRUPTIBLE);
 159         if (inode->i_state & I_LOCK) {
 160                 schedule();
 161                 goto repeat;
 162         }
 163         remove_wait_queue(&inode->i_wait, &wait);
 164         current->state = TASK_RUNNING;
 165 }

这一部分涉及进程控制,以后再回来看。
(4)get_new_inode

      下面看代码fs/inode.c:

[path_walk()>> real_lookup() >> ext2_lookup() >> iget() >>iget4() >> get_new_inode()]

649/*
650 * This is called without the inode lock held.. Be careful.
651 *
652 * We no longer cache the sb_flags in i_flags - see fs.h
653 * -- rmk@arm.uk.linux.org
654 */
655static struct inode * get_new_inode(struct super_block *sb, unsignedlong ino, struct list_head *head, fi nd_inode_t find_actor, void*opaque)
656{//这段代码的理解可以参考前面dentry的空间分配和内存结构的建立,其中包含了初始化的部分
657 struct inode * inode;
658
659 inode = alloc_inode();//分配空间?
660 if (inode) {
661 struct inode * old;
662
663 spin_lock(&inode_lock);//加锁
664 /* We released the lock, so.. */
665 old = find_inode(sb, ino, head, find_actor,opaque);//sb和ino结合起来,才能在全系统内定位一个索引节点,见前面关于这个函数的解释
666 if (!old) {//找不到old节点,就是说我们从磁盘上调入inode节点
667 inodes_stat.nr_inodes++;
668 list_add(&inode->i_list,&inode_in_use);//把para1加入到para2后面
669 list_add(&inode->i_hash, head);//
670 inode->i_sb = sb;
671 inode->i_dev = sb->s_dev;
672 inode->i_ino = ino;
673 inode->i_flags = 0;
674 atomic_set(&inode->i_count, 1);
675 inode->i_state = I_LOCK;//处于磁盘传送中
676 spin_unlock(&inode_lock);
677
678 clean_inode(inode);//
679 sb->s_op->read_inode(inode);//具体从磁盘山读取inode信息,可能会涉及磁盘驱动
680
681 /*
682 * This is special! We do not need thespinlock
683 * when clearing I_LOCK, because we'reguaranteed
684 * that nobody else tries to do anythingabout the
685 * state of the inode when it is locked, aswe
686 * just created it (so there can be no oldholders
687 * that haven't tested I_LOCK).
688 */
689 inode->i_state &= ~I_LOCK;//IO传送完毕
690 wake_up(&inode->i_wait);
691
692 return inode;
693 }
694
695 /*
696 * Uhhuh, somebody else created the same inode under
697 * us. Use the old inode instead of the one we just
698 * allocated.
699 */
700 __iget(old);//和dget类似
701 spin_unlock(&inode_lock);
702 destroy_inode(inode);//别人已经建立了
703 inode = old;
704 wait_on_inode(inode);
705 }
706 return inode;
707}

      调用函数分析:

      (4.1)alloc_inode:

 

 78 #define alloc_inode() \
  79          ((struct inode *) kmem_cache_alloc(inode_cachep, SLAB_KERNEL))
涉及内存,以后再看。

       (4.2)clean_inode:

 585 /*
 586  * This just initializes the inode fields
 587  * to known values before returning the inode..
 588  *
 589  * i_sb, i_ino, i_count, i_state and the lists have
 590  * been initialized elsewhere..
 591  */
 592 static void clean_inode(struct inode *inode)
 593 {
 594         static struct address_space_operations empty_aops;
 595         static struct inode_operations empty_iops;
 596         static struct file_operations empty_fops;
 597         memset(&inode->u, 0, sizeof(inode->u));
 598         inode->i_sock = 0;
 599         inode->i_op = &empty_iops;
 600         inode->i_fop = &empty_fops;
 601         inode->i_nlink = 1;
 602         atomic_set(&inode->i_writecount, 0);
 603         inode->i_size = 0;
 604         inode->i_generation = 0;
 605         memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
 606         inode->i_pipe = NULL;
 607         inode->i_bdev = NULL;
 608         inode->i_data.a_ops = &empty_aops;
 609         inode->i_data.host = inode;
 610         inode->i_mapping = &inode->i_data;
 611 }
需要解释的不多。


   (4.3)对有的文件系统来说,是“读入”索引节点,对有的文件系统来说,是将磁盘上的相关信息变成一个索引节点。

      对于节点的读入,具体的函数是通过函数跳转表super_operations结构中的函数指针read_inode提供的。每个设备的super_block结构都有一个指针s_o,指向具体的跳转表。对于ext2来说,这个跳转表就是ext2_sops,具体的函数是ext2_read_inode()(见文件fs/ext2/super.c)

150static struct super_operations ext2_sops = {

151 read_inode: ext2_read_inode,

152 write_inode: ext2_write_inode,

153 put_inode: ext2_put_inode,

154 delete_inode: ext2_delete_inode,

155 put_super: ext2_put_super,

156 write_super: ext2_write_super,

157 statfs: ext2_statfs,

158 remount_fs: ext2_remount,

159};

      函数ext2_read_inode的代码在fs/ext2/inode.c

[path_walk()> real_lookup() > ext2_lookup() > iget() >get_new_inode() >ext2_read_inode()]

961void ext2_read_inode (struct inode * inode)
962{//从磁盘上读取相应的信息,在内存中建立相应的inode节点信息,但是这个函数的输入是什么?
963 struct buffer_head * bh;
964 struct ext2_inode * raw_inode;
965 unsigned long block_group;
966 unsigned long group_desc;
967 unsigned long desc;
968 unsigned long block;
969 unsigned long offset;
970 struct ext2_group_desc * gdp;
971
972 if ((inode->i_ino != EXT2_ROOT_INO && inode->i_ino!= EXT2_ACL_IDX_INO &&
973 inode->i_ino != EXT2_ACL_DATA_INO &&
974 inode->i_ino < EXT2_FIRST_INO(inode->i_sb)) ||
975 inode->i_ino >le32_to_cpu(inode->i_sb->u.ext2_sb.s_es->s_inodes_count)) {//对inode进行合法性check
976 ext2_error (inode->i_sb, "ext2_read_inode",
977 "bad inode number: %lu",inode->i_ino);
978 goto bad_inode;
979 }
980 block_group = (inode->i_ino - 1) /EXT2_INODES_PER_GROUP(inode->i_sb);//计算块组号=ino/每组包含的inode数目
981 if (block_group >= inode->i_sb->u.ext2_sb.s_groups_count){//块的组号非法
982 ext2_error (inode->i_sb, "ext2_read_inode",
983 "group >= groups count");
984 goto bad_inode;
985 }
986 group_desc = block_group >>EXT2_DESC_PER_BLOCK_BITS(inode->i_sb);//该组的组描述符在组描述符表中的位置
987 desc = block_group & (EXT2_DESC_PER_BLOCK(inode->i_sb)- 1);//组描述符具体是该块中第几个描述符
988 bh = inode->i_sb->u.ext2_sb.s_group_desc[group_desc];//在高速缓存中找这个组描述符
989 if (!bh) {//没有buffer head
990 ext2_error (inode->i_sb, "ext2_read_inode",
991 "Descriptor not loaded");
992 goto bad_inode;
993 }
994
995 gdp = (struct ext2_group_desc *) bh->b_data;
996 /*
997 * Figure out the offset within the block group inode table
998 */
999 offset = ((inode->i_ino - 1) %EXT2_INODES_PER_GROUP(inode->i_sb)) *
1000 EXT2_INODE_SIZE(inode->i_sb);//计算该索引节点在块中的偏移位置
1001 block = le32_to_cpu(gdp[desc].bg_inode_table) +
1002 (offset >> EXT2_BLOCK_SIZE_BITS(inode->i_sb));
1003 if (!(bh = bread (inode->i_dev, block,inode->i_sb->s_blocksize))) {
1004 ext2_error (inode->i_sb, "ext2_read_inode",
1005 "unable to read inode block - "
1006 "inode=%lu, block=%lu",inode->i_ino, block);
1007 goto bad_inode;
1008 }
1009 offset &= (EXT2_BLOCK_SIZE(inode->i_sb) - 1);
1010 raw_inode = (struct ext2_inode *) (bh->b_data + offset);
1011
1012 inode->i_mode = le16_to_cpu(raw_inode->i_mode);
1013 inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
1014 inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
1015 if(!(test_opt (inode->i_sb, NO_UID32))) {
1016 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high)<< 16;
1017 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high)<< 16;
1018 }
1019 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
1020 inode->i_size = le32_to_cpu(raw_inode->i_size);
1021 inode->i_atime = le32_to_cpu(raw_inode->i_atime);
1022 inode->i_ctime = le32_to_cpu(raw_inode->i_ctime);
1023 inode->i_mtime = le32_to_cpu(raw_inode->i_mtime);
1024 inode->u.ext2_i.i_dtime = le32_to_cpu(raw_inode->i_dtime);
1025 /* We now have enough fields to check if the inode was activeor not.
1026 * This is needed because nfsd might try to access deadinodes
1027 * the test is that same one that e2fsck uses
1028 * NeilBrown 1999oct15
1029 */
1030 if (inode->i_nlink == 0 && (inode->i_mode == 0|| inode->u.ext2_i.i_dtime)) {
1031 /* this inode is deleted */
1032 brelse (bh);
1033 goto bad_inode;
1034 }
1035 inode->i_blksize = PAGE_SIZE; /* This is the optimal IOsize (for stat), not the fs block size */
1036 inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
1037 inode->i_version = ++event;
1038 inode->u.ext2_i.i_flags = le32_to_cpu(raw_inode->i_flags);
1039 inode->u.ext2_i.i_faddr = le32_to_cpu(raw_inode->i_faddr);
1040 inode->u.ext2_i.i_frag_no = raw_inode->i_frag;
1041 inode->u.ext2_i.i_frag_size = raw_inode->i_fsize;
1042 inode->u.ext2_i.i_file_acl =le32_to_cpu(raw_inode->i_file_acl);
1043 if (S_ISDIR(inode->i_mode))
1044 inode->u.ext2_i.i_dir_acl =le32_to_cpu(raw_inode->i_dir_acl);
1045 else {
1046 inode->u.ext2_i.i_high_size =le32_to_cpu(raw_inode->i_size_high);
1047 inode->i_size |=((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32;
1048 }
1049 inode->i_generation =le32_to_cpu(raw_inode->i_generation);
1050 inode->u.ext2_i.i_block_group = block_group;
1051
1052 /*
1053 * NOTE! The in-memory inode i_data array is in little-endianorder
1054 * even on big-endian machines: we do NOT byteswap the blocknumbers!
1055 */
1056 for (block = 0; block < EXT2_N_BLOCKS; block++)
1057 inode->u.ext2_i.i_data[block] =raw_inode->i_block[block];

 找到该组的组描述符在组描述符表中的位置。因为组描述符表可能占多个数据块,所以需要确定组描述符在组描述符表的哪一块以及是该块中第几个组描述符。即:group_desc = block_group >> Ext2_DESC_PER_BLOCK_BITS(inode->i_sb) 表示块组号整除每块中组描述符数,计算出该组的组描述符在组描述符表中的哪一块。我们知道,每个组描述符是32字节大小,在一个1K大小的块中可存储32个组描述符。

·      块组号与每块中组的描述符数进行“与”运算,得到这个组描述符具体是该块中第几个描述符。即desc = block_group & (Ext2_DESC_PER_BLOCK(inode->i_sb) - 1)。

·      有了group_desc和desc,接下来在高速缓存中找这个组描述符就比较容易了:

即:bh = inode->i_sb->u.ext2_sb.s_group_desc[group_desc],首先通过s_group_desc[]数组找到这个组描述符所在块在高速缓存中的缓冲区首部;然后通过缓冲区首部找到数据区,即gdp = (struct ext2_group_desc *) bh->b_data。

·      找到组描述符后,就可以通过组描述符结构中的bg_inode_table找到索引节点表首块在高速缓存中的地址:

    offset = ((inode->i_ino - 1) % Ext2_INODES_PER_GROUP(inode->i_sb)) *

        Ext2_INODE_SIZE(inode->i_sb), 计算该索引节点在块中的偏移位置;

block = le32_to_cpu(gdp[desc].bg_inode_table)  +

(offset >> Ext2_BLOCK_SIZE_BITS(inode->i_sb)),计算索引节点所在块的地址;

·      代码中le32_to_cpu()、le16_to_cpu()按具体CPU的要求进行数据的排列,在i386处理器上访问Ext2文件系统时这些函数不做任何事情。因为不同的处理器在存取数据时在字节的排列次序上有所谓“big ending”和“little ending”之分。例如,i386就是“little ending”处理器,它在存储一个16位数据0x1234时,实际存储的却是0x3412,对32位数据也是如此。这里索引节点号与块的长度都作为32位或16位无符号整数存储在磁盘上,而同一磁盘既可以安装在采用“little ending”方式的CPU机器上,也可能安装在采用“big ending”方式的CPU机器上,所以要选择一种形式作为标准。事实上,Ext2采用的标准为“little ending”,所以,le32_to_cpu()、le16_to_cpu()函数不作任何转换。

·      计算出索引节点所在块的地址后,就可以调用sb_bread()通过设备驱动程序读入该块。从磁盘读入的索引节点为ext2_Inode数据结构,前面我们已经看到它的定义。磁盘上索引节点中的信息是原始的、未经加工的,所以代码中称之为raw_ inode,即         raw_inode = (struct ext2_inode *) (bh->b_data + offset)

·      与磁盘索引节点ext2_ inode相对照,内存中VFS的inode结构中的信息则分为两部分,一部分是属于VFS层的,适用于所有的文件系统;另一部份则属于具体的文件系统,这就是inode中的那个union,因具体文件系统的不同而赋予不同的解释。对Ext2来说,这部分数据就是前面介绍的ext2_inode_info结构。至于代表着符号链接的节点,则并没有文件内容(数据),所以正好用这块空间来存储链接目标的路径名。ext2_inode_info结构的大小为60个字节。虽然节点名最长可达255个字节,但一般都不会太长,因此将符号链接目标的路径名限制在60个字节不至于引起问题。代码中inode->u.*设置的就是Ext2文件系统的特定信息。

·      接着,根据索引节点所提供的信息设置inode结构中的inode_operations结构指针和file_operations结构指针,完成具体文件系统与虚拟文件系统VFS之间的连接。

·      目前2.4版内核并不支持存取控制表ACL,因此,代码中只是为之留下了位置,而暂时没做任何处理。

·      另外,通过检查inode结构中的mode域来确定该索引节点是常规文件(S_ISREG)、目录(S_ISDIR)、符号链接(S_ISLNK)还是其他特殊文件而作不同的设置或处理。例如,对Ext2文件系统的目录节点,就将i_op和i_fop分配设置为ext2_dir_inode_operations和ext2_dir_operations。而对于Ext2常规文件,则除i_op和i_fop以外,还设置了另一个指针a_ops,它指向一个address_apace_operation结构,用于文件到内存空间的映射或缓冲。对特殊文件,则通过init_special_inode()函数加以检查和处理。 

从这个读索引节点的过程可以看出,首先要寻找指定的索引节点 ,要找索引节点,必须先找组描述符,然后通过组描述符找到索引节点表,最后才是在这个索引节点表中找索引节点。当从磁盘找到索引节点以后,就要把其读入内存,并存放在VFS索引节点相关的域中。从这个实例的分析,读者可以仔细体会前面所介绍的各种数据结构的具体应用。

未完待续

      在ext2格式的磁盘上,有些索引节点是有特殊用途的,在include/linux/ext2_fs.h中有这些节点的定义:

55/*
56 * Special inode numbers
57 */
58#define EXT2_BAD_INO 1 /* Bad blocks inode */
59#define EXT2_ROOT_INO 2 /* Root inode */
60#define EXT2_ACL_IDX_INO 3 /* ACL inode */
61#define EXT2_ACL_DATA_INO 4 /* ACL inode */
62#define EXT2_BOOT_LOADER_INO 5 /* Boot loader inode */
63#define EXT2_UNDEL_DIR_INO 6 /* Undelete directory inode*/
64
65/* First non-reserved inode for old ext2 filesystems */
66#define EXT2_GOOD_OLD_FIRST_INO 11
67
68/*
69 * The second extended file system magic number
70 */
71#define EXT2_SUPER_MAGIC 0xEF53
72
73/*
74 * Maximal count of links to a file
75 */
76#define EXT2_LINK_MAX 32000
77
78/*
79 * Macro-instructions used to manage several block sizes
80 */
81#define EXT2_MIN_BLOCK_SIZE 1024
82#define EXT2_MAX_BLOCK_SIZE 4096
83#define EXT2_MIN_BLOCK_LOG_SIZE 10


      这些索引节点都是为系统保留的,对它们的访问不同过目录项而通过定义的节点号进行。磁盘设备的super_block结构中提供磁盘上第一个供常规用途的索引节点的节点号以及索引节点的总数,这两项参数都被用于对节点号的范围检查。

      从概念上,ext2格式的磁盘设备上,除了引导块和超级块以外,剩下部分是索引节点和数据。而实际上分区先划分成若干“记录块组”,然后再将每个记录块组分成索引节点和数据两个部分,而关于“记录块组”的信息记录在超级块中。所以,给定索引节点,访问文件的流程是索引节点号——所在记录块组号+组内偏移——节点所在的记录块号——通过设备驱动读取信息。从磁盘上读取的是ext2-inode数据结构信息,是原始的,代码中称为raw_inode;内存中的inode结构中的信息有两个部分,一个是VFS层面,另外一个是具体的文件系统,就是那个union,对于ext2来说,这部分数据形成一个ext2_inode_info结构,这是在inlude/linux/ext2_fs_i.h定义的:

22struct ext2_inode_info {

23 __u32 i_data[15];

24 __u32 i_flags;

25 __u32 i_faddr;

26 __u8 i_frag_no;

27 __u8 i_frag_size;

28 __u16 i_osync;

29 __u32 i_file_acl; //acess control list

30 __u32 i_dir_acl;

31 __u32 i_dtime;

32 __u32 not_used_1; /* FIX: not used/ 2.2 placeholder */

33 __u32 i_block_group;

34 __u32 i_next_alloc_block;

35 __u32 i_next_alloc_goal;

36 __u32 i_prealloc_block;

37 __u32 i_prealloc_count;

38 __u32 i_high_size;

39 int i_new_inode:1; /* Is a freshly allocated inode */

40};

      结构中的idata存放着一些指针,直接或者间接指向磁盘上保存着这些文件的记录块。至于代表着符号连接的节点,并没有实际的内容,所以正好可以用这块空间存储链接目标的路径名。代码中通过for循环将15个整数复制到inode结构的union中。

      在ext2_read_inode()的代码中继续往下看(fs/ext2/inode.c:

[path_walk()> real_lookup() > ext2_lookup() > iget() >get_new_inode() >ext2_read_inode()]

1059 if (inode->i_ino == EXT2_ACL_IDX_INO ||
1060 inode->i_ino == EXT2_ACL_DATA_INO)
1061 /* Nothing to do */ ;
1062 else if (S_ISREG(inode->i_mode)) {
1063 inode->i_op = &ext2_file_inode_operations;
1064 inode->i_fop = &ext2_file_operations;
1065 inode->i_mapping->a_ops = &ext2_aops;
1066 } else if (S_ISDIR(inode->i_mode)) {
1067 inode->i_op = &ext2_dir_inode_operations;
1068 inode->i_fop = &ext2_dir_operations;
1069 } else if (S_ISLNK(inode->i_mode)) {
1070 if (!inode->i_blocks)
1071 inode->i_op =&ext2_fast_symlink_inode_operations;
1072 else {
1073 inode->i_op =&page_symlink_inode_operations;
1074 inode->i_mapping->a_ops = &ext2_aops;
1075 }
1076 } else
1077 init_special_inode(inode, inode->i_mode,
1078 le32_to_cpu(raw_inode->i_block[0]));
1079 brelse (bh);
1080 inode->i_attr_flags = 0;
1081 if (inode->u.ext2_i.i_flags & EXT2_SYNC_FL) {
1082 inode->i_attr_flags |= ATTR_FLAG_SYNCRONOUS;
1083 inode->i_flags |= S_SYNC;
1084 }
1085 if (inode->u.ext2_i.i_flags & EXT2_APPEND_FL) {
1086 inode->i_attr_flags |= ATTR_FLAG_APPEND;
1087 inode->i_flags |= S_APPEND;
1088 }
1089 if (inode->u.ext2_i.i_flags & EXT2_IMMUTABLE_FL) {
1090 inode->i_attr_flags |= ATTR_FLAG_IMMUTABLE;
1091 inode->i_flags |= S_IMMUTABLE;
1092 }
1093 if (inode->u.ext2_i.i_flags & EXT2_NOATIME_FL) {
1094 inode->i_attr_flags |= ATTR_FLAG_NOATIME;
1095 inode->i_flags |= S_NOATIME;
1096 }
1097 return;
1098
1099bad_inode:
1100 make_bad_inode(inode);
1101 return;
1102}

      接着,就是根据句由索引节点所提供的信息设置inode结构中的inode_operations结构指针和file_operations结构指针,完成具体文件系统和虚拟文件系统之间的连接。

      通过检查inode结构中的字段来判断节点是否是常规文件(S_ISREG)目录等而做不同的设置或者处理。例如:目录节点的i_op&i_fop分别设置成指向ext2_dir_inode_operationsext2_dir_operations,对于常规文件,还有两外一个指针a_ops,指向一个address_space_operations数据结构,用于文件到内存空间的映射或者缓冲;对于特殊文件需要用init_special_inode()加以检查和处理。

      找到或者建立了inode结构之后,返回到ext2_lookup(),在那里还要通过d_add()inode结构和dentry结构挂上钩,并将dentry结构挂入hash表的某个队列。这里的d_add()定义在include/linux/dcache.h之中:

200static __inline__ void d_add(struct dentry * entry, struct inode *inode)
201{
202 d_instantiate(entry, inode); //将dentry与inode挂钩(是内存中的吗)
203 d_rehash(entry); //将dentry挂入内存中的某个队列
204}

函数 d_instantiate ()使得 dentry 结构和 inode 结构互相挂钩,代码在 fs/dcache.c 中:

663void d_instantiate(struct dentry *entry, struct inode * inode)
664{//多个dentry可以对应一个inode
665 spin_lock(&dcache_lock); //进程控制中,用来防止SMP并发的自旋锁
666 if (inode) //
667 list_add(&entry->d_alias, &inode->i_dentry);//将inode对应的dentry加入到dentry的d_alias链表之中,一个inode可以对应多个dentry
668 entry->d_inode = inode;//一个dentry对应一个inode
669 spin_unlock(&dcache_lock);
670}

      两个数据结构的关系是双向的,一方面是 dentry 结构中的指针 d_inode 指向 inode 结构,这是 1 1 的关系,但是从 inode dentry 可以是 1 对多的关系。

      至于d_rehash()则将dentry结构挂入hash表,代码在同一文件中

847/**
848 * d_rehash - add an entry back to the hash
849 * @entry: dentry to add to the hash
850 *
851 * Adds a dentry to the hash according to its name.
852 */
853
854void d_rehash(struct dentry * entry)
855{
856 struct list_head *list = d_hash(entry->d_parent,entry->d_name.hash);
857 spin_lock(&dcache_lock);
858 list_add(&entry->d_hash, list);//加入d_hash链表之中
859 spin_unlock(&dcache_lock);
860}

      回到 real_lookup() 的代码,现在已经找到或者建立了所需的 dentry 结构,接着就返回到 path_walk() 的代码中( fs/namei.cline 497

当前节点的dentry结构有了,需要调用d_mountpoint()检查它是不是一个安装点。 


本文来源:谁不小心的CSDN博客 ext2 源代码解析之 “从路径名到目标结点” (二)

Logo

瓜分20万奖金 获得内推名额 丰厚实物奖励 易参与易上手

更多推荐