Working 70% of the time

Hi,

I programmed a code to do convolution in OpenCL, however it is working only 70% of the time.


#include"SDL2/SDL.h"
#include"SDL2/SDL_image.h"
#include"CL_Interface.h"

void printFilter(const float* filter, int filter_size)
{
	printf("filter:
");
	int i, j;
	for(i=0;i<filter_size;++i)
	{
		for(j=0;j<filter_size;++j)
		{
			printf("%.2f ", filter[i*filter_size + j]);
		}
		printf("
");
	}

}


int main(int argc, char** argv)
{
	bool quit= false;
	int filter_size=4;
	
	SDL_Event event;

	SDL_Init(SDL_INIT_VIDEO);

	//ATTENTION: 24bit RGB is not supported by OpenCL!	
	SDL_Surface* image= IMG_Load("out.png");
	int image_height=image->h;
	int image_width=image->w;
	int bpp = (int) image->format->BytesPerPixel;
	std::cout << "Bpp " << bpp << "Size "<< image->w << image->h << std::endl;


	SDL_Window* window= SDL_CreateWindow("Convolution", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, 1024, 720, 0);

	SDL_Renderer* renderer= SDL_CreateRenderer(window, -1, 0);

	SDL_Texture* texture= SDL_CreateTextureFromSurface(renderer, image);

	printf("Initializing OpenCL...
");
	//opencl
	CL_Interface* ocl= new CL_Interface();
	ocl->buildProgram("1_test.cl");
	Kernel* kernel= new Kernel("convolute", ocl->program);
	
        //input image (OpenCL have few supported formats!!! No 24bit RGB!)
	const cl::ImageFormat format(CL_RGBA, CL_UNSIGNED_INT8);

	cl::Image2D in(ocl->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, format, image_width, image_height, 0, image->pixels);

	//float* filter= (float*)malloc(sizeof(float)*filter_size*filter_size);
	float filter[]= 
	{ -1, 0, 1,0,
	  -2, 0, 2,0,
	  -1, 0, 1,0,
	  0, 0, 0,0};
	printFilter(filter, 4);
	cl::Buffer filter_buffer= cl::Buffer(ocl->context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, filter_size*filter_size*sizeof(float), filter);

	//cl::Image2D out(ocl->context, CL_MEM_WRITE_ONLY, format, image_width, image_height, 0, data2);
	cl::Image2D out(ocl->context, CL_MEM_WRITE_ONLY, format, image_width, image_height, 0, NULL);
	//cl::Image2D out(ocl->context, CL_MEM_WRITE_ONLY, format, image_width, image_height, 0, image2->pixels);
	kernel->kernel.setArg(0, in);
	kernel->kernel.setArg(1, out);
	kernel->kernel.setArg(2, filter_buffer);
	kernel->kernel.setArg(3, filter_size/2);

	ocl->queue.enqueueNDRangeKernel(kernel->kernel, cl::NullRange, cl::NDRange(image_width, image_height), cl::NullRange);

	ocl->queue.finish();

	//start and end coordinates for reading our image
	cl::size_t<3> origin;
	cl::size_t<3> size;
	origin[0] = 0;
	origin[1] = 0;
	origin[2] = 0;
	size[0] = image_width;
	size[1] = image_height;

	size[2] = 1;


	while(!quit)
	{
		ocl->queue.enqueueReadImage(out, CL_TRUE, origin, size, 0, 0, image->pixels);
	
		//SDL_Texture* texture2= SDL_CreateTextureFromSurface(renderer, image);
		SDL_UpdateTexture(texture, NULL, image->pixels, image->pitch);

		SDL_WaitEvent(&event);

		switch(event.type)
		{
			case SDL_QUIT:
			{
				quit=true;
			}
			break;
		}

		//SDL_RenderCopy(renderer, texture2, NULL, NULL);
		SDL_RenderCopy(renderer, texture, NULL, NULL);
		SDL_RenderPresent(renderer);
	}

	SDL_DestroyTexture(texture);
	SDL_FreeSurface(image);
	SDL_DestroyRenderer(renderer);
	SDL_DestroyWindow(window);

	SDL_Quit();

	return 0;
}


constant sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;

void kernel convolute(__read_only image2d_t in, __write_only image2d_t out, __constant float* filter, int half_filter_size)
{
	int image_w= get_global_size(0);
	int image_h= get_global_size(1);
	int center_x= get_global_id(0); 
	int center_y= get_global_id(1); 
	
	if(
		get_global_id(0) < half_filter_size || 
		get_global_id(0) > image_w - half_filter_size - 1 || 
		get_global_id(1) < half_filter_size ||
		get_global_id(1) > image_h - half_filter_size - 1
	)
	{
		return;
	}
	else
	{
		// perform convolution
		int fIndex = 0;
		float4 sum = (float4) 0.0;
				
		for (int r = -half_filter_size; r <= half_filter_size; r++)
		{
			// int curRow = my + r * image_w;
			int x= center_x + r;

			for (int c = -half_filter_size; c <= half_filter_size; c++)
			{	
				int y= center_y + c;
				int2 pos = (int2)(x, y);
				float4 pixel= convert_float4(read_imageui(in, smp, pos))/(float4)255;
				//printf("f4 %2.2v4f
",pixel);
				float4 pixel2= (float4)filter[fIndex];
				sum += pixel * pixel2; 
				fIndex++;
			}
		}
		int2 pos = (int2)(center_x, center_y);
		sum= min(1,sum);
		uint4 uint_sum= convert_uint4_sat_rte(max(0,sum)*255) + (uint4)128;
		write_imageui(out,pos, uint_sum);
	}
}

Any ideas why sometimes I get a nearly blank screen and sometimes I get the filtered image?

Solved. The error was that the filter was being accessed out of bounds when its size is even.