#include #include #define cudaASSERT(ans) \ { \ cudaAssert((ans), __FILE__, __FUNCTION__, __LINE__); \ } typedef struct { float hs; // space bandwidth float hc; // color bandwidth int iterations; // number of iterations // device parameters size_t bsize; // threads per block (x and y dimensions) // gamma correction float exponent; // image dimensions int rows; int cols; } MSdata; // informs the user when a CUDA error occurs (optionally, also stops the program execution) void cudaAssert(cudaError_t code, const char *file, const char *fn, int line, bool abort=false) { if( cudaSuccess!=code ) { fprintf(stderr,"[%s() @ %s:%d] %s\n",fn,file,line,cudaGetErrorString(code)); if( abort ) { exit(code); } } } // CUDA kernel for the mean shift algorithm for grayscale images __global__ void cudaMeanShift1D(unsigned char *U, unsigned char *V, const MSdata data) { // compute pixel coordinates const int x=blockDim.x*blockIdx.x+threadIdx.x; const int y=blockDim.y*blockIdx.y+threadIdx.y; // square space and color "bandwidths" const int hs=data.hs*data.hs; const int hc=data.hc*data.hc; #define e_U(i,j) U[(j)*data.cols+(i)] #define e_V(i,j) V[(j)*data.cols+(i)] if( data.cols>x && data.rows>y ) { float I=0.0f,W=0.0f,w; // compute the neighborhood size of the pixel (x,y) const int dx_min=(data.hs>x)?x:data.hs; const int dy_min=(data.hs>y)?y:data.hs; const int dx_max=((data.hs+x)>=data.cols)?(data.cols-x-1):data.hs; const int dy_max=((data.hs+y)>=data.rows)?(data.rows-y-1):data.hs; // for each neighbor (i,j) of the pixel (x,y), for( int dx=-dx_min; dx_max>=dx; dx++) { const int i=x+dx; for( int dy=-dy_min; dy_max>=dy; dy++ ) { const int j=y+dy; // compute the kernel value at -||(X-X_{i})/h||^2 const float dI=e_U(i,j)-e_V(x,y); if( hs>=(dx*dx+dy*dy) && hc>=(dI*dI) ) { w=1.0f;//__expf(-(dx*dx+dy*dy)/hs-dI*dI/hc); W+=w; I+=w*e_U(i,j); } } } // compute the new intensity value e_V(x,y)=round(I/W); } #undef e_U #undef e_V } // CUDA kernel for gamma correction __global__ void cudaGammaCorrection(unsigned char *U, const MSdata data) { // compute pixel coordinates const int x=blockDim.x*blockIdx.x+threadIdx.x; const int y=blockDim.y*blockIdx.y+threadIdx.y; #define e_U(i,j) U[(j)*data.cols+(i)] if( data.cols>x && data.rows>y ) { float value=0.00392156862f*e_U(x,y); value=255.0f*powf(value,data.exponent); e_U(x,y)=round(value); } #undef e_U } int main(int argc, char **argv) { cv::Mat img=cv::imread("../prospettiva.jpg"); cv::cvtColor(img,img,CV_BGR2GRAY); MSdata data; data.bsize=32; data.iterations=50; data.hc=10; data.hs=3; data.cols=img.cols; data.rows=img.rows; data.exponent=0.85f; // device arrays unsigned char *d_U,*d_V; // memory allocation on device size_t bytes=data.rows*data.cols*sizeof(unsigned char); cudaASSERT( cudaMalloc((void**)&d_U,bytes) ); cudaASSERT( cudaMalloc((void**)&d_V,bytes) ); // grid and blocks geometry dim3 grid(1,1,1); dim3 threads(data.bsize,data.bsize,1); grid.x=(data.cols/data.bsize)+((data.cols%data.bsize)?1:0); grid.y=(data.rows/data.bsize)+((data.rows%data.bsize)?1:0); // device arrays initialization cudaASSERT( cudaMemcpy(d_U,img.data,bytes,cudaMemcpyHostToDevice) ); cudaASSERT( cudaMemcpy(d_V,img.data,bytes,cudaMemcpyHostToDevice) ); // mean shift iterations on the GPU for( int k=0; data.iterations>k; k++) { cudaMeanShift1D<<>>(d_U,d_V,data); } cudaGammaCorrection<<>>(d_V,data); // retrieve result from device cudaASSERT( cudaMemcpy(img.data,d_V,bytes,cudaMemcpyDeviceToHost) ); // free device memory resources cudaASSERT( cudaFree(d_U) ); cudaASSERT( cudaFree(d_V) ); cv::Mat bw; cv::threshold(img,bw, 0, 255, CV_THRESH_BINARY | CV_THRESH_OTSU); cv::imwrite("../modified.jpg",bw); return 0; }