Hi,
I programmed a code to do convolution in OpenCL, however it is working only 70% of the time.
#include"SDL2/SDL.h"
#include"SDL2/SDL_image.h"
#include"CL_Interface.h"
void printFilter(const float* filter, int filter_size)
{
printf("filter:
");
int i, j;
for(i=0;i<filter_size;++i)
{
for(j=0;j<filter_size;++j)
{
printf("%.2f ", filter[i*filter_size + j]);
}
printf("
");
}
}
int main(int argc, char** argv)
{
bool quit= false;
int filter_size=4;
SDL_Event event;
SDL_Init(SDL_INIT_VIDEO);
//ATTENTION: 24bit RGB is not supported by OpenCL!
SDL_Surface* image= IMG_Load("out.png");
int image_height=image->h;
int image_width=image->w;
int bpp = (int) image->format->BytesPerPixel;
std::cout << "Bpp " << bpp << "Size "<< image->w << image->h << std::endl;
SDL_Window* window= SDL_CreateWindow("Convolution", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, 1024, 720, 0);
SDL_Renderer* renderer= SDL_CreateRenderer(window, -1, 0);
SDL_Texture* texture= SDL_CreateTextureFromSurface(renderer, image);
printf("Initializing OpenCL...
");
//opencl
CL_Interface* ocl= new CL_Interface();
ocl->buildProgram("1_test.cl");
Kernel* kernel= new Kernel("convolute", ocl->program);
//input image (OpenCL have few supported formats!!! No 24bit RGB!)
const cl::ImageFormat format(CL_RGBA, CL_UNSIGNED_INT8);
cl::Image2D in(ocl->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, format, image_width, image_height, 0, image->pixels);
//float* filter= (float*)malloc(sizeof(float)*filter_size*filter_size);
float filter[]=
{ -1, 0, 1,0,
-2, 0, 2,0,
-1, 0, 1,0,
0, 0, 0,0};
printFilter(filter, 4);
cl::Buffer filter_buffer= cl::Buffer(ocl->context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, filter_size*filter_size*sizeof(float), filter);
//cl::Image2D out(ocl->context, CL_MEM_WRITE_ONLY, format, image_width, image_height, 0, data2);
cl::Image2D out(ocl->context, CL_MEM_WRITE_ONLY, format, image_width, image_height, 0, NULL);
//cl::Image2D out(ocl->context, CL_MEM_WRITE_ONLY, format, image_width, image_height, 0, image2->pixels);
kernel->kernel.setArg(0, in);
kernel->kernel.setArg(1, out);
kernel->kernel.setArg(2, filter_buffer);
kernel->kernel.setArg(3, filter_size/2);
ocl->queue.enqueueNDRangeKernel(kernel->kernel, cl::NullRange, cl::NDRange(image_width, image_height), cl::NullRange);
ocl->queue.finish();
//start and end coordinates for reading our image
cl::size_t<3> origin;
cl::size_t<3> size;
origin[0] = 0;
origin[1] = 0;
origin[2] = 0;
size[0] = image_width;
size[1] = image_height;
size[2] = 1;
while(!quit)
{
ocl->queue.enqueueReadImage(out, CL_TRUE, origin, size, 0, 0, image->pixels);
//SDL_Texture* texture2= SDL_CreateTextureFromSurface(renderer, image);
SDL_UpdateTexture(texture, NULL, image->pixels, image->pitch);
SDL_WaitEvent(&event);
switch(event.type)
{
case SDL_QUIT:
{
quit=true;
}
break;
}
//SDL_RenderCopy(renderer, texture2, NULL, NULL);
SDL_RenderCopy(renderer, texture, NULL, NULL);
SDL_RenderPresent(renderer);
}
SDL_DestroyTexture(texture);
SDL_FreeSurface(image);
SDL_DestroyRenderer(renderer);
SDL_DestroyWindow(window);
SDL_Quit();
return 0;
}
constant sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
void kernel convolute(__read_only image2d_t in, __write_only image2d_t out, __constant float* filter, int half_filter_size)
{
int image_w= get_global_size(0);
int image_h= get_global_size(1);
int center_x= get_global_id(0);
int center_y= get_global_id(1);
if(
get_global_id(0) < half_filter_size ||
get_global_id(0) > image_w - half_filter_size - 1 ||
get_global_id(1) < half_filter_size ||
get_global_id(1) > image_h - half_filter_size - 1
)
{
return;
}
else
{
// perform convolution
int fIndex = 0;
float4 sum = (float4) 0.0;
for (int r = -half_filter_size; r <= half_filter_size; r++)
{
// int curRow = my + r * image_w;
int x= center_x + r;
for (int c = -half_filter_size; c <= half_filter_size; c++)
{
int y= center_y + c;
int2 pos = (int2)(x, y);
float4 pixel= convert_float4(read_imageui(in, smp, pos))/(float4)255;
//printf("f4 %2.2v4f
",pixel);
float4 pixel2= (float4)filter[fIndex];
sum += pixel * pixel2;
fIndex++;
}
}
int2 pos = (int2)(center_x, center_y);
sum= min(1,sum);
uint4 uint_sum= convert_uint4_sat_rte(max(0,sum)*255) + (uint4)128;
write_imageui(out,pos, uint_sum);
}
}
Any ideas why sometimes I get a nearly blank screen and sometimes I get the filtered image?