bs_dsp.cu 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. #include<cuda.h>
  2. #include<opencv2/opencv.hpp>
  3. #define cudaASSERT(ans) \
  4. { \
  5. cudaAssert((ans), __FILE__, __FUNCTION__, __LINE__); \
  6. }
  7. typedef struct
  8. {
  9. float hs; // space bandwidth
  10. float hc; // color bandwidth
  11. int iterations; // number of iterations
  12. // device parameters
  13. size_t bsize; // threads per block (x and y dimensions)
  14. // gamma correction
  15. float exponent;
  16. // image dimensions
  17. int rows;
  18. int cols;
  19. } MSdata;
  20. // informs the user when a CUDA error occurs (optionally, also stops the program execution)
  21. void cudaAssert(cudaError_t code, const char *file, const char *fn, int line, bool abort=false)
  22. {
  23. if( cudaSuccess!=code )
  24. {
  25. fprintf(stderr,"[%s() @ %s:%d] %s\n",fn,file,line,cudaGetErrorString(code));
  26. if( abort )
  27. {
  28. exit(code);
  29. }
  30. }
  31. }
  32. // CUDA kernel for the mean shift algorithm for grayscale images
  33. __global__ void cudaMeanShift1D(unsigned char *U, unsigned char *V, const MSdata data)
  34. {
  35. // compute pixel coordinates
  36. const int x=blockDim.x*blockIdx.x+threadIdx.x;
  37. const int y=blockDim.y*blockIdx.y+threadIdx.y;
  38. // square space and color "bandwidths"
  39. const int hs=data.hs*data.hs;
  40. const int hc=data.hc*data.hc;
  41. #define e_U(i,j) U[(j)*data.cols+(i)]
  42. #define e_V(i,j) V[(j)*data.cols+(i)]
  43. if( data.cols>x && data.rows>y )
  44. {
  45. float I=0.0f,W=0.0f,w;
  46. // compute the neighborhood size of the pixel (x,y)
  47. const int dx_min=(data.hs>x)?x:data.hs;
  48. const int dy_min=(data.hs>y)?y:data.hs;
  49. const int dx_max=((data.hs+x)>=data.cols)?(data.cols-x-1):data.hs;
  50. const int dy_max=((data.hs+y)>=data.rows)?(data.rows-y-1):data.hs;
  51. // for each neighbor (i,j) of the pixel (x,y),
  52. for( int dx=-dx_min; dx_max>=dx; dx++)
  53. {
  54. const int i=x+dx;
  55. for( int dy=-dy_min; dy_max>=dy; dy++ )
  56. {
  57. const int j=y+dy;
  58. // compute the kernel value at -||(X-X_{i})/h||^2
  59. const float dI=e_U(i,j)-e_V(x,y);
  60. if( hs>=(dx*dx+dy*dy) && hc>=(dI*dI) )
  61. {
  62. w=1.0f;//__expf(-(dx*dx+dy*dy)/hs-dI*dI/hc);
  63. W+=w;
  64. I+=w*e_U(i,j);
  65. }
  66. }
  67. }
  68. // compute the new intensity value
  69. e_V(x,y)=round(I/W);
  70. }
  71. #undef e_U
  72. #undef e_V
  73. }
  74. // CUDA kernel for gamma correction
  75. __global__ void cudaGammaCorrection(unsigned char *U, const MSdata data)
  76. {
  77. // compute pixel coordinates
  78. const int x=blockDim.x*blockIdx.x+threadIdx.x;
  79. const int y=blockDim.y*blockIdx.y+threadIdx.y;
  80. #define e_U(i,j) U[(j)*data.cols+(i)]
  81. if( data.cols>x && data.rows>y )
  82. {
  83. float value=0.00392156862f*e_U(x,y);
  84. value=255.0f*powf(value,data.exponent);
  85. e_U(x,y)=round(value);
  86. }
  87. #undef e_U
  88. }
  89. int main(int argc, char **argv)
  90. {
  91. cv::Mat img=cv::imread("../prospettiva.jpg");
  92. cv::cvtColor(img,img,CV_BGR2GRAY);
  93. MSdata data;
  94. data.bsize=32;
  95. data.iterations=50;
  96. data.hc=10;
  97. data.hs=3;
  98. data.cols=img.cols;
  99. data.rows=img.rows;
  100. data.exponent=0.85f;
  101. // device arrays
  102. unsigned char *d_U,*d_V;
  103. // memory allocation on device
  104. size_t bytes=data.rows*data.cols*sizeof(unsigned char);
  105. cudaASSERT( cudaMalloc((void**)&d_U,bytes) );
  106. cudaASSERT( cudaMalloc((void**)&d_V,bytes) );
  107. // grid and blocks geometry
  108. dim3 grid(1,1,1);
  109. dim3 threads(data.bsize,data.bsize,1);
  110. grid.x=(data.cols/data.bsize)+((data.cols%data.bsize)?1:0);
  111. grid.y=(data.rows/data.bsize)+((data.rows%data.bsize)?1:0);
  112. // device arrays initialization
  113. cudaASSERT( cudaMemcpy(d_U,img.data,bytes,cudaMemcpyHostToDevice) );
  114. cudaASSERT( cudaMemcpy(d_V,img.data,bytes,cudaMemcpyHostToDevice) );
  115. // mean shift iterations on the GPU
  116. for( int k=0; data.iterations>k; k++)
  117. {
  118. cudaMeanShift1D<<<grid,threads>>>(d_U,d_V,data);
  119. }
  120. cudaGammaCorrection<<<grid,threads>>>(d_V,data);
  121. // retrieve result from device
  122. cudaASSERT( cudaMemcpy(img.data,d_V,bytes,cudaMemcpyDeviceToHost) );
  123. // free device memory resources
  124. cudaASSERT( cudaFree(d_U) );
  125. cudaASSERT( cudaFree(d_V) );
  126. cv::Mat bw;
  127. cv::threshold(img,bw, 0, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);
  128. cv::imwrite("../modified.jpg",bw);
  129. return 0;
  130. }