Dear all,

I am trying to write a code for image processing.

I want to write one kernel and want to call in another kernel. Is it possible?

I came to know that we can call functions. I also want to try the kernel also.

Please help me in this regard.

Here is the situation in which I am now.

//creatematrix.cl

float ** createMat(int iRows, int iColumns, float Initvalue)

{

float **pMatrix;

```
pMatrix = (float**)malloc(sizeof(float*) * iRows);
for (int i = 0; i < iRows; i++)
{
pMatrix[i] = (float*)malloc(sizeof(float) * iColumns);
}
for (int i = 0; i < iRows; i++)
{
for (int j = 0; j < iColumns; j++)
{
pMatrix[i][j] = Initvalue;
}
}
return pMatrix;
```

}

// Convolution.cl

__kernel float* IMConvolution(const __global float * pInput,

__constant float * pFilter,

const int nInWidth,

const int nFilterWidth,

const int nWidth)

{

float *pOutput;

```
//const int nWidth = get_global_size(0);
const int xOut = get_global_id(0);
const int yOut = get_global_id(1);
const int xInTopLeft = xOut;
const int yInTopLeft = yOut;
float sum = 0;
for (int r = 0; r < nFilterWidth; r++)
{
const int idxFtmp = r * nFilterWidth;
const int yIn = yInTopLeft + r;
const int idxIntmp = yIn * nInWidth + xInTopLeft;
for (int c = 0; c < nFilterWidth; c++)
{
const int idxF = idxFtmp + c;
const int idxIn = idxIntmp + c;
sum += pFilter[idxF]*pInput[idxIn];
}
} //for (int r = 0...
/*if (sum > 1)
sum = 1;
if (sum < -1)
sum = -1;*/
const int idxOut = yOut * nWidth + xOut;
pOutput[idxOut] = sum;
return(pOutput);
```

}

// CNNonGPU.cl

#include “Convolution.cl”

#include “creatematrix.cl”

__kernel void CNNonGPU(const __global float * pInput,

__constant float * tempA,

__constant float * tempB,

__global float * pOutput,

const int nInWidth,

const int nInHeight,

const int nFilterWidth,

const int iterations)

{

```
const int nWidth = get_global_size(0);
float *TBimg, *Yimg, *extYimg, *TAimg;
float **tempimg;
int Elements = 0;
int inElements = 0;
float *sum;
Elements = nWidth * nWidth;
inElements = nInWidth * nInHeight;
TBimg = new float [Elements];
TAimg = new float [Elements];
sum = new float [Elements];
TBimg = IMConvolution(pInput,tempB,nInWidth,nFilterWidth,nWidth);
extYimg = pInput;
for(int i = 0; i < iterations;i++)
{
Yimg = extYimg;
TAimg = IMConvolution(Yimg,tempA,nInWidth,nFilterWidth,nWidth);
for(int j = 0; j < Elements;j++)
{
sum[j] = TBimg[j] + TAimg[j] + bias;
}
tempimg = createMat(nInWidth,nInHeight,0);
int ind = 0;
for (int i = 1; i < iHeightExtended-1; i++)
{
for (int j = 1; j < iWidthExtended-1; j++)
{
tempimg[i][j] = 0.5 * ((abs(sum[ind] + 1) - (abs(sum[ind] - 1)));
ind++;
}
}
ind = 0;
for(int j = 0; j < nInHeight; j++)
{
for(int i = 0; i < nInWidth;i++)
{
extYimg[ind] = tempimg[i][j];
ind++;
}
}
}
ind = 0;
for (int i = 1; i < iHeightExtended-1; i++)
{
for (int j = 1; j < iWidthExtended-1; j++)
{
pOutput[ind] = tempimg[i][j];
ind++;
}
}
```

}

Here in CNNonGPU I want to use the convolution kernel many times. Some time it may be 1000 time or even more.

I also want to use the create matrix function once. Is it possible to do so?

I tried to execute the above one by calling the CNNonGPU from main(CPU). I am getting error at clBuildProgram. (Error is :CL_BUILD_PROGRAM_FAILURE)

Thanks in advance.