int dm_io_async_bvec(unsigned int num_regions, 
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
			    struct dm_io_region *where, 
#else
			    struct io_region *where, 
#endif
			    int rw, 
			    struct bio_vec *bvec, io_notify_fn fn, 
			    void *context)
{
	struct dm_io_request iorq;

	iorq.bi_rw = rw;
	iorq.mem.type = DM_IO_BVEC;
	iorq.mem.ptr.bvec = bvec;
	iorq.notify.fn = fn;
	iorq.notify.context = context;
	iorq.client = flashcache_io_client;
	return dm_io(&iorq, num_regions, where, NULL);
}

The user must set up an io_region structure to describe the desired location of the I/O. Each io_region indicates a block-device along with the starting sector and size of the region.

但是不同的内核版本io_region 结构体的表示不同。

2.6.26以后的版本用dm_io_region表示如下:

struct dm_io_region {
	struct block_device *bdev;
	sector_t sector;
	sector_t count;		/* If this is zero the region is ignored. */
};

2.6.26之前的版本用io_region表示如下:

struct io_region {                                                                                                                                
     struct block_device *bdev;
     sector_t sector;
     sector_t count;                                                                                                                         
 };


虽然形式不同,但是里面的内容是一样的,都含有一个指向block_device的指针,以及区域的起始扇区和区域的大小。


bio_vec结构体如下:

struct bio_vec {
struct page	*bv_page;//指向段的页框中页描述符的指针
unsigned int	bv_len;//段的字节长度
unsigned int	bv_offset;//页框中段数据的偏移量
};


io_notify_fn是一个回调函数指针的类型,其定义如下:

typedef void (*io_notify_fn)(unsigned long error, void *context);
The "error" parameter in this callback,, is a bitset (instead of a simple error value). In the case of an write-I/O to multiple regions, this bitset allows dm-io to indicate success or failure on each individual region.


dm_io_request结构体如下:(通过dm_io_request结构来封装请求的类型,如果设置了dm_io_notify.fn则是异步IO,否则是同步IO。)

struct dm_io_request {
	int bi_rw;			/* READ|WRITE - not READA */
	struct dm_io_memory mem;	/* Memory to use for io */
	struct dm_io_notify notify;	/* Synchronous if notify.fn is NULL */
	struct dm_io_client *client;	/* Client memory handler */
};


从上面的分析可以看出,dm_io_async_bvec通过io_notify_fn fn来确定是同步操作还是异步操作,通过bio_vec *bvec确定dm_io的服务类型,dm_io有3种服务类型:

//The first I/O service type takes a list of memory pages as the data buffer for the I/O, along with an offset into the first page.

   struct page_list { struct page_list *next;
      struct page *page; };

   int dm_io_sync(unsigned int num_regions, struct io_region *where, int rw, struct page_list *pl, unsigned int offset,
                  unsigned long *error_bits); 
   int dm_io_async(unsigned int num_regions, struct io_region *where, int rw, struct page_list *pl, unsigned int offset,
                   io_notify_fn fn, void *context);

//The second I/O service type takes an array of bio vectors as the data buffer for the I/O. This service can be handy if the caller has a pre-assembled bio, but wants to direct different portions of the bio to different devices.

   int dm_io_sync_bvec(unsigned int num_regions, struct io_region *where, int rw, struct bio_vec *bvec,
                       unsigned long *error_bits); 
   int dm_io_async_bvec(unsigned int num_regions, struct io_region *where, int rw, struct bio_vec *bvec,
                        io_notify_fn fn, void *context);


//The third I/O service type takes a pointer to a vmalloc'd memory buffer as the data buffer for the I/O. This service can be handy if the caller needs to do I/O to a large region but doesn't want to allocate a large number of individual memory pages.
   
   int dm_io_sync_vm(unsigned int num_regions, struct io_region *where, int rw, void *data, unsigned long *error_bits); 
   int dm_io_async_vm(unsigned int num_regions, struct io_region *where, int rw, void *data, io_notify_fn fn, void *context);

dm_io_async_bvec通过dm_io_request封装请求之后,确定了请求的各种类型,然后由dm_io()函数来完成操作。dm_io函数如下:

int dm_io(struct dm_io_request *io_req, unsigned num_regions,
	  struct dm_io_region *where, unsigned long *sync_error_bits)
{
	int r;
	struct dpages dp;

	r = dp_init(io_req, &dp);
	if (r)
		return r;

	if (!io_req->notify.fn)
		return sync_io(io_req->client, num_regions, where,
			       io_req->bi_rw, &dp, sync_error_bits);

	return async_io(io_req->client, num_regions, where, io_req->bi_rw,
			&dp, io_req->notify.fn, io_req->notify.context);
}

dpages结构体如下:

struct dpages {
	void (*get_page)(struct dpages *dp,
			 struct page **p, unsigned long *len, unsigned *offset);//是一种函数指针的形式,根据参数获取某一区域,区域类型由上下文确定
	void (*next_page)(struct dpages *dp);//同样是函数指针的形式,根据参数获取某一区域的下一区域

	unsigned context_u;
	void *context_ptr;//私有数据成员,可根据上下文确定其类型,也即上面的区域类型
};
dpages结构体provide an abstraction for getting a new  destination page for io.


dp_init()函数如下:

static int dp_init(struct dm_io_request *io_req, struct dpages *dp)
{
	/* Set up dpages based on memory type */
	switch (io_req->mem.type) {
	case DM_IO_PAGE_LIST:
		list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset);
		break;

	case DM_IO_BVEC:	//可以看到我们的io_req->mem.type是此种类型
		bvec_dp_init(dp, io_req->mem.ptr.bvec);
		break;

	case DM_IO_VMA:
		vm_dp_init(dp, io_req->mem.ptr.vma);
		break;

	case DM_IO_KMEM:
		km_dp_init(dp, io_req->mem.ptr.addr);
		break;

	default:
		return -EINVAL;
	}

	return 0;
}

bvce_dp_init()函数如下:

static void bvec_dp_init(struct dpages *dp, struct bio_vec *bvec)
{
	dp->get_page = bvec_get_page;//获取某一个bio_vec
	dp->next_page = bvec_next_page;//获取下一个bio_vec
	dp->context_ptr = bvec;//确定区域类型为bio_vec
}


bvec_get_page()函数如下:

static void bvec_get_page(struct dpages *dp,
		  struct page **p, unsigned long *len, unsigned *offset)
{
	struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr;
	*p = bvec->bv_page;
	*len = bvec->bv_len;
	*offset = bvec->bv_offset;
}

bvec_next_page()函数如下:

static void bvec_next_page(struct dpages *dp)
{
	struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr;
	dp->context_ptr = bvec + 1;
}

处理完dm_io的服务类型之后,然后根据io_req->notify.fn是否设置,来确定dm_io的操作类型是同步的还是异步的。

同步操作调用sync_io;异步操作调用async_io。

sync_io()函数如下:

static int sync_io(struct dm_io_client *client, unsigned int num_regions,
		   struct dm_io_region *where, int rw, struct dpages *dp,
		   unsigned long *error_bits)
{
	struct io io;//将dm_io_request请求进一步封装

	if (num_regions > 1 && (rw & RW_MASK) != WRITE) {//dm_io不能读多个io_region
		WARN_ON(1);
		return -EIO;
	}

retry:
	io.error_bits = 0;
	io.eopnotsupp_bits = 0;
	atomic_set(&io.count, 1); /* see dispatch_io() */
	io.sleeper = current;
	io.client = client;

	dispatch_io(rw, num_regions, where, dp, &io, 1);

	while (1) {
		set_current_state(TASK_UNINTERRUPTIBLE);

		if (!atomic_read(&io.count))
			break;

		io_schedule();
	}
	set_current_state(TASK_RUNNING);

	if (io.eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) {
		rw &= ~(1 << BIO_RW_BARRIER);
		goto retry;
	}

	if (error_bits)
		*error_bits = io.error_bits;

	return io.error_bits ? -EIO : 0;
}


async_io()函数如下:

static int async_io(struct dm_io_client *client, unsigned int num_regions,
		    struct dm_io_region *where, int rw, struct dpages *dp,
		    io_notify_fn fn, void *context)
{
	struct io *io;//之所以加入struct io,是为了将上面的dm_io_request重新封装,加入线程,以便io分发和处理

	if (num_regions > 1 && (rw & RW_MASK) != WRITE) {//Dm-io can read from one io_region or write to one or more io_regions. Writes to multiple regions are specified by an array of io_region structures,dm_io不能读多个io_region
		WARN_ON(1);
		fn(1, context);
		return -EIO;
	}

	io = mempool_alloc(client->pool, GFP_NOIO);
	io->error_bits = 0;
	io->eopnotsupp_bits = 0;
	atomic_set(&io->count, 1); /* see dispatch_io() */
	io->sleeper = NULL;
	io->client = client;
	io->callback = fn;
	io->context = context;

	dispatch_io(rw, num_regions, where, dp, io, 0);
	return 0;
}








Logo

更多推荐