How to check if OpenCL kernel is working properly?

I have written simple code to solve advection equation using OpenC, and writing the results into netcdf file. The code does not produce any error messages during compilation, and the it runs without any errors. But it seems that the kernel is not doing anything. The kernel is looping the numerical scheme about 3000 times, and if it works correctly I should be seeing something very different. Is there a way to find out if the kernel is working properly, something like printing?

Below is the kernel

void pbndry(int in_x_siz, int in_y_siz, int in_z_siz, global float *in_arr)
{
   int i,j,k;

   // Periodic boundary
   // x-direction
   for(k=1;k<in_z_siz+1;k++)
      for(j=1;j<in_y_siz+1;j++)
         {
         in_arr[k * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + 0] =
         in_arr[k * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + in_x_siz];

         in_arr[k * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + (in_x_siz+1)] =
         in_arr[k * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + 1];
         }

   // y-direction
   for(k=1;k<in_z_siz+1;k++)
      for(i=1;i<in_x_siz+1;i++)
         {
         in_arr[k * (in_y_siz+2) * (in_x_siz+2) + 0 * (in_x_siz+2) + i] =
         in_arr[k * (in_y_siz+2) * (in_x_siz+2) + in_y_siz * (in_x_siz+2) + i];

         in_arr[k * (in_y_siz+2) * (in_x_siz+2) + (in_y_siz+1) * (in_x_siz+2) + i] =
         in_arr[k * (in_y_siz+2) * (in_x_siz+2) + 1 * (in_x_siz+2) + i];
         }

   // z-direction
   for(j=1;j<in_y_siz+1;j++)
      for(i=1;i<in_x_siz+1;i++)
         {
         in_arr[0 * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + i] =
         in_arr[in_z_siz * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + i];

         in_arr[(in_z_siz+1) * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + i] =
         in_arr[1 * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + i];
         }
}


kernel void leapfrog3d(
                        const  int x_siz,
                        const  int y_siz,
                        const  int z_siz,
                        const  int t_siz,
                        global float *in_p_tf,
                        global float *in_p_tn,
                        global float *in_p_tp,
                        const  float u_vel,
                        const  float v_vel,
                        const  float w_vel,
                        const  float c,
                        global float *in_p_rs
                      )
{
   int nx   =  x_siz;
   int ny   =  y_siz;
   int nz   =  z_siz;
   int nt   =  t_siz;
   float u  =  u_vel;
   float v  =  v_vel;
   float w  =  w_vel;
   float C  =  c    ;
   int i    =  get_global_id(0);
   int j    =  get_global_id(1);
   int k    =  get_global_id(2);

   int idx0, idx_i0, idx_i1, idx_j0, idx_j1, idx_k0, idx_k1;


   for(int t=1;t<t_siz;t++)
   {

      idx0     =  i + j * (nx+2) + k * (nx+2) * (ny+2);

      idx_i0   =  (i+1) + j * (nx+2) + k * (nx+2) * (ny+2);
      idx_j0   =  i + (j+1) * (nx+2) + k * (nx+2) * (ny+2);
      idx_k0   =  i + j * (nx+2) + (k+1) * (nx+2) * (ny+2);
      
      idx_i1   =  (i-1) + j * (nx+2) + k * (nx+2) * (ny+2);
      idx_j1   =  i + (j-1) * (nx+2) + k * (nx+2) * (ny+2);
      idx_k1   =  i + j * (nx+2) + (k-1) * (nx+2) * (ny+2);

      in_p_tf[idx0] = in_p_tp[idx0] 
                   - u_vel * C * (in_p_tn[idx_i0] - in_p_tn[idx_i1])
                   - v_vel * C * (in_p_tn[idx_j0] - in_p_tn[idx_j1])
                   - w_vel * C * (in_p_tn[idx_k0] - in_p_tn[idx_k1]);

      pbndry(nx,ny,nz,in_p_tf);

      in_p_tp = in_p_tn;
      in_p_tn = in_p_tf;
   }

   in_p_rs = in_p_tf;

}

You can use the printf function in your kernel to have debug output.

kernel void leapfrog3d( ...
{
...
idx0     =  i + j * (nx+2) + k * (nx+2) * (ny+2);
printf("%d\n", idx0);
}
1 Like

Thanks @devent, it was helpful for me

I don’t know why but printf() is giving me OUT_OF_RESOURCES error when enqueuing clEnqueueReadBuffer().