CUDA:纹理金字塔
背景纹理内存是CUDA里非常好用的一个工具,能够极大的提高图片处理的速度。但是通常的纹理内存,只能处理当前尺度的数据,如果想要处理多尺度的信息,就会特别复杂。在本篇博客中,将主要针对纹理金字塔(Mipmap)进行实验,了解一下其用法。这里引用Unity中关于 Mipmap的示意图说明一下Mipmap是啥,其实就是一系列金字塔。另外,在阅读本篇博客时,应该对纹理参考和纹理对象有一定的了解,建议参考博
·
背景
纹理内存是CUDA里非常好用的一个工具,能够极大的提高图片处理的速度。但是通常的纹理内存,只能处理当前尺度的数据,如果想要处理多尺度的信息,就会特别复杂。在本篇博客中,将主要针对纹理金字塔(Mipmap)进行实验,了解一下其用法。
这里引用Unity中关于 Mipmap的示意图说明一下Mipmap是啥,其实就是一系列金字塔。
另外,在阅读本篇博客时,应该对纹理参考和纹理对象有一定的了解,建议参考博主之前的文章。
核心API
cudaMallocMipmappedArray ( cudaMipmappedArray_t* mipmappedArray, const cudaChannelFormatDesc* desc, cudaExtent extent, unsigned int numLevels, unsigned int flags = 0 )
mipmappedArray: 需要申请的mipmap数据结构
desc: 此处与纹理参考一致,表明申请数据块的基本数据结构
extent: 设置长宽
numLevels: 设置需要申请的mipmap的层数,取值范围是[1,max_scale]
cudaGetMipmappedArrayLevel ( cudaArray_t* levelArray, cudaMipmappedArray_const_t mipmappedArray, unsigned int level )
levelArray: 获取得到的某个尺度下内存块
mipmappedArray:输入的mipmap数据结构
level:需要获取的尺度,取值范围是[0,max_scale-1]
cudaFreeMipmappedArray ( cudaMipmappedArray_t mipmappedArray )
mipmappedArray:需要释放的纹理金字塔
tex2DLod(cudaTextureObject_t texObj, float x, float y, float level)
texObj: 纹理对象
x,y: 坐标值
level: 需要获取像素值的尺度
代码
//C
#include <stdio.h>
#include <time.h>
//openCV
#include "opencv2/opencv.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
using namespace cv;
//CUDA
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
// 多尺度像素值获取测试函数
__global__ void convolution_kernel(
float* out_image,
int width,int height,
cudaTextureObject_t texObj
)
{
int x = threadIdx.x + blockIdx.x*blockDim.x;
int y = threadIdx.y + blockIdx.y*blockDim.y;
int pos = y*width+x;
if(x<0||y<0||(x>=width)||(y>=height))
{
return;
}
// 获取尺度2.5的结果
out_image[pos] = tex2DLod<float>(texObj, x*1.0f/width, y * 1.0f/height,2.5);
}
// 金字塔生成函数,直接四个像素平均
__global__ void d_mipmap(cudaSurfaceObject_t mipOutput, cudaTextureObject_t mipInput, uint imageW, uint imageH)
{
uint x = blockIdx.x * blockDim.x + threadIdx.x;
uint y = blockIdx.y * blockDim.y + threadIdx.y;
float px = 1.0 / float(imageW);
float py = 1.0 / float(imageH);
if ((x < imageW) && (y < imageH))
{
// take the average of 4 samples
float color =
(tex2D<float>(mipInput, (x + 0) * px, (y + 0) * py)) +
(tex2D<float>(mipInput, (x + 1) * px, (y + 0) * py)) +
(tex2D<float>(mipInput, (x + 1) * px, (y + 1) * py)) +
(tex2D<float>(mipInput, (x + 0) * px, (y + 1) * py));
color /= 4.0;
surf2Dwrite(color, mipOutput, x * sizeof(float), y);
}
}
int main()
{
// 读取输入图片
Mat in_image = imread("3333.jpg",CV_LOAD_IMAGE_GRAYSCALE);
Mat in_image_float;
in_image.convertTo(in_image_float, CV_32FC1);
Mat out_image = Mat(in_image.size(), CV_32F);
//init CUDA
//error status
cudaError_t cuda_status;
//init
cuda_status = cudaSetDevice(0);
if(cuda_status != cudaSuccess)
{
fprintf(stderr,"cudaSetDevice failed! Do you have a CUDA-Capable GPU installed?");
return -1;
}
//in image and out image
float * dev_out_image;
//size of image
int image_size = in_image.cols*in_image.rows;
//allocate memory on the GPU
cuda_status = cudaMalloc((void**)&dev_out_image,sizeof(float)*image_size);
if(cuda_status != cudaSuccess)
{
fprintf(stderr,"cudaMalloc Failed");
exit( EXIT_FAILURE );
}
// 创建多尺度纹理对象
int max_scale = 4;
cudaMipmappedArray_t mipmapArray;
cudaExtent size;
size.width = in_image.cols;
size.height = in_image.rows;
size.depth = 0; // 本文使用二维纹理,所以无深度
cudaChannelFormatDesc desc = cudaCreateChannelDesc<float>();
cudaMallocMipmappedArray(&mipmapArray, &desc, size, max_scale);
// 设置level=0时的mipmap的数据内容
// 流程是先获取level=0的数据块指针,然后从host把数据块拷贝进来
cudaArray_t dev_images;
cudaGetMipmappedArrayLevel(&dev_images, mipmapArray, 0);
cudaMemcpyToArray(dev_images, 0, 0, (float*)in_image_float.data, sizeof(float) * image_size, cudaMemcpyHostToDevice);
// 以level=0的数据块为基准,生成其他level的数据块
size_t width = in_image.cols;
size_t height = in_image.rows;
for(int level=0; level <max_scale;level++)
{
width /= 2;
height /= 2;
// 获取上一个尺度的内容
cudaArray_t levelFrom;
cudaGetMipmappedArrayLevel(&levelFrom, mipmapArray, level);
// 需要生成的当前尺度的内容
cudaArray_t levelTo;
cudaGetMipmappedArrayLevel(&levelTo, mipmapArray, level + 1);
// 以下的代码是利用纹理对象来生成当前尺度的内容
// 当前尺度生成 cudaTextureObject_t,用于访问数据块
cudaTextureObject_t texInput;
cudaResourceDesc texRes;
memset(&texRes, 0, sizeof(cudaResourceDesc));
texRes.resType = cudaResourceTypeArray;
texRes.res.array.array = levelFrom;
cudaTextureDesc texDescr;
memset(&texDescr, 0, sizeof(cudaTextureDesc));
texDescr.normalizedCoords = 1;
texDescr.filterMode = cudaFilterModeLinear;
texDescr.addressMode[0] = cudaAddressModeClamp;
texDescr.addressMode[1] = cudaAddressModeClamp;
texDescr.addressMode[2] = cudaAddressModeClamp;
texDescr.readMode = cudaReadModeElementType;
cuda_status = cudaCreateTextureObject(&texInput, &texRes, &texDescr, NULL);
if(cuda_status != cudaSuccess)
{
fprintf(stderr,"cudaCreateTextureObject Failed");
exit( EXIT_FAILURE );
}
// 待生成尺度生成 cudaSurfaceObject_t ,用于写出数据块
cudaSurfaceObject_t surfOutput;
cudaResourceDesc surfRes;
memset(&surfRes, 0, sizeof(cudaResourceDesc));
surfRes.resType = cudaResourceTypeArray;
surfRes.res.array.array = levelTo;
cuda_status = cudaCreateSurfaceObject(&surfOutput, &surfRes);
if (cuda_status != cudaSuccess)
{
fprintf(stderr, "cudaCreateSurfaceObject Failed");
exit(EXIT_FAILURE);
}
// 运行d_mipmap函数,生成待生成尺度的数据块
dim3 blockSize(16, 16);
dim3 gridSize(((uint)width + blockSize.x - 1) / blockSize.x, ((uint)height + blockSize.y - 1) / blockSize.y);
d_mipmap << <gridSize, blockSize >> > (surfOutput, texInput, (uint)width, (uint)height);
// 销毁纹理对象
cudaDeviceSynchronize();
cudaGetLastError();
cudaDestroySurfaceObject(surfOutput);
cudaDestroyTextureObject(texInput);
}
// 创建mipmap的纹理对象
cudaTextureObject_t textureObject;
cudaResourceDesc resDescr;
memset(&resDescr, 0, sizeof(cudaResourceDesc));
resDescr.resType = cudaResourceTypeMipmappedArray;
resDescr.res.mipmap.mipmap = mipmapArray; // 输入mipmap的数据块
cudaTextureDesc texDescr;
memset(&texDescr, 0, sizeof(cudaTextureDesc));
texDescr.normalizedCoords = 1;
texDescr.filterMode = cudaFilterModeLinear;
texDescr.mipmapFilterMode = cudaFilterModeLinear;
texDescr.addressMode[0] = cudaAddressModeClamp;
texDescr.addressMode[1] = cudaAddressModeClamp;
texDescr.addressMode[2] = cudaAddressModeClamp;
texDescr.maxMipmapLevelClamp = max_scale-1;
texDescr.readMode = cudaReadModeElementType;
cudaCreateTextureObject(&textureObject, &resDescr, &texDescr, NULL);
// 基于mipmap的一个示例
dim3 threads(16,16);
dim3 grid(std::max(int((in_image.cols+threads.x-1)/threads.x),1), std::max(int((in_image.rows+threads.y-1)/threads.y),1));
convolution_kernel<<<grid,threads>>>(dev_out_image,in_image.cols,in_image.rows, textureObject);
// 拷贝运算结果
cuda_status = cudaMemcpy((float*)out_image.data,dev_out_image,sizeof(float)*image_size,cudaMemcpyDeviceToHost);
if(cuda_status != cudaSuccess)
{
fprintf(stderr,"cudaMemcpy Failed");
exit( EXIT_FAILURE );
}
// 销毁纹理对象
cudaDestroyTextureObject(textureObject);
// 释放设备内存
cudaFreeMipmappedArray(mipmapArray);
cudaFree(dev_out_image);
// 显示图片
Mat abs_dst;
convertScaleAbs( out_image, abs_dst );
imwrite("cuda_texture.jpg",abs_dst);
return 0;
}
结果
输入
输出
更多推荐
已为社区贡献3条内容
所有评论(0)