forked from avana/sagoma
159 lines
3.9 KiB
Text
159 lines
3.9 KiB
Text
#include<cuda.h>
|
|
#include<opencv2/opencv.hpp>
|
|
|
|
#define cudaASSERT(ans) \
|
|
{ \
|
|
cudaAssert((ans), __FILE__, __FUNCTION__, __LINE__); \
|
|
}
|
|
|
|
typedef struct
|
|
{
|
|
float hs; // space bandwidth
|
|
float hc; // color bandwidth
|
|
|
|
int iterations; // number of iterations
|
|
|
|
// device parameters
|
|
size_t bsize; // threads per block (x and y dimensions)
|
|
|
|
// gamma correction
|
|
float exponent;
|
|
|
|
// image dimensions
|
|
int rows;
|
|
int cols;
|
|
} MSdata;
|
|
|
|
|
|
// informs the user when a CUDA error occurs (optionally, also stops the program execution)
|
|
void cudaAssert(cudaError_t code, const char *file, const char *fn, int line, bool abort=false)
|
|
{
|
|
if( cudaSuccess!=code )
|
|
{
|
|
fprintf(stderr,"[%s() @ %s:%d] %s\n",fn,file,line,cudaGetErrorString(code));
|
|
if( abort )
|
|
{
|
|
exit(code);
|
|
}
|
|
}
|
|
}
|
|
|
|
// CUDA kernel for the mean shift algorithm for grayscale images
|
|
__global__ void cudaMeanShift1D(unsigned char *U, unsigned char *V, const MSdata data)
|
|
{
|
|
// compute pixel coordinates
|
|
const int x=blockDim.x*blockIdx.x+threadIdx.x;
|
|
const int y=blockDim.y*blockIdx.y+threadIdx.y;
|
|
|
|
// square space and color "bandwidths"
|
|
const int hs=data.hs*data.hs;
|
|
const int hc=data.hc*data.hc;
|
|
|
|
#define e_U(i,j) U[(j)*data.cols+(i)]
|
|
#define e_V(i,j) V[(j)*data.cols+(i)]
|
|
if( data.cols>x && data.rows>y )
|
|
{
|
|
float I=0.0f,W=0.0f,w;
|
|
|
|
// compute the neighborhood size of the pixel (x,y)
|
|
const int dx_min=(data.hs>x)?x:data.hs;
|
|
const int dy_min=(data.hs>y)?y:data.hs;
|
|
const int dx_max=((data.hs+x)>=data.cols)?(data.cols-x-1):data.hs;
|
|
const int dy_max=((data.hs+y)>=data.rows)?(data.rows-y-1):data.hs;
|
|
|
|
// for each neighbor (i,j) of the pixel (x,y),
|
|
for( int dx=-dx_min; dx_max>=dx; dx++)
|
|
{
|
|
const int i=x+dx;
|
|
for( int dy=-dy_min; dy_max>=dy; dy++ )
|
|
{
|
|
const int j=y+dy;
|
|
|
|
// compute the kernel value at -||(X-X_{i})/h||^2
|
|
const float dI=e_U(i,j)-e_V(x,y);
|
|
if( hs>=(dx*dx+dy*dy) && hc>=(dI*dI) )
|
|
{
|
|
w=1.0f;//__expf(-(dx*dx+dy*dy)/hs-dI*dI/hc);
|
|
W+=w;
|
|
I+=w*e_U(i,j);
|
|
}
|
|
}
|
|
}
|
|
|
|
// compute the new intensity value
|
|
e_V(x,y)=round(I/W);
|
|
}
|
|
#undef e_U
|
|
#undef e_V
|
|
}
|
|
|
|
// CUDA kernel for gamma correction
|
|
__global__ void cudaGammaCorrection(unsigned char *U, const MSdata data)
|
|
{
|
|
// compute pixel coordinates
|
|
const int x=blockDim.x*blockIdx.x+threadIdx.x;
|
|
const int y=blockDim.y*blockIdx.y+threadIdx.y;
|
|
|
|
#define e_U(i,j) U[(j)*data.cols+(i)]
|
|
if( data.cols>x && data.rows>y )
|
|
{
|
|
float value=0.00392156862f*e_U(x,y);
|
|
value=255.0f*powf(value,data.exponent);
|
|
e_U(x,y)=round(value);
|
|
}
|
|
#undef e_U
|
|
}
|
|
|
|
|
|
int main(int argc, char **argv)
|
|
{
|
|
cv::Mat img=cv::imread("../prospettiva.jpg");
|
|
cv::cvtColor(img,img,CV_BGR2GRAY);
|
|
|
|
MSdata data;
|
|
data.bsize=32;
|
|
data.iterations=50;
|
|
data.hc=10;
|
|
data.hs=3;
|
|
data.cols=img.cols;
|
|
data.rows=img.rows;
|
|
data.exponent=0.85f;
|
|
|
|
// device arrays
|
|
unsigned char *d_U,*d_V;
|
|
|
|
// memory allocation on device
|
|
size_t bytes=data.rows*data.cols*sizeof(unsigned char);
|
|
cudaASSERT( cudaMalloc((void**)&d_U,bytes) );
|
|
cudaASSERT( cudaMalloc((void**)&d_V,bytes) );
|
|
|
|
// grid and blocks geometry
|
|
dim3 grid(1,1,1);
|
|
dim3 threads(data.bsize,data.bsize,1);
|
|
grid.x=(data.cols/data.bsize)+((data.cols%data.bsize)?1:0);
|
|
grid.y=(data.rows/data.bsize)+((data.rows%data.bsize)?1:0);
|
|
|
|
// device arrays initialization
|
|
cudaASSERT( cudaMemcpy(d_U,img.data,bytes,cudaMemcpyHostToDevice) );
|
|
cudaASSERT( cudaMemcpy(d_V,img.data,bytes,cudaMemcpyHostToDevice) );
|
|
|
|
// mean shift iterations on the GPU
|
|
for( int k=0; data.iterations>k; k++)
|
|
{
|
|
cudaMeanShift1D<<<grid,threads>>>(d_U,d_V,data);
|
|
}
|
|
cudaGammaCorrection<<<grid,threads>>>(d_V,data);
|
|
|
|
// retrieve result from device
|
|
cudaASSERT( cudaMemcpy(img.data,d_V,bytes,cudaMemcpyDeviceToHost) );
|
|
|
|
// free device memory resources
|
|
cudaASSERT( cudaFree(d_U) );
|
|
cudaASSERT( cudaFree(d_V) );
|
|
|
|
cv::Mat bw;
|
|
cv::threshold(img,bw, 0, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);
|
|
|
|
cv::imwrite("../modified.jpg",bw);
|
|
return 0;
|
|
}
|