I have written simple code to solve advection equation using OpenC, and writing the results into netcdf file. The code does not produce any error messages during compilation, and the it runs without any errors. But it seems that the kernel is not doing anything. The kernel is looping the numerical scheme about 3000 times, and if it works correctly I should be seeing something very different. Is there a way to find out if the kernel is working properly, something like printing?
Below is the kernel
void pbndry(int in_x_siz, int in_y_siz, int in_z_siz, global float *in_arr)
{
int i,j,k;
// Periodic boundary
// x-direction
for(k=1;k<in_z_siz+1;k++)
for(j=1;j<in_y_siz+1;j++)
{
in_arr[k * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + 0] =
in_arr[k * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + in_x_siz];
in_arr[k * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + (in_x_siz+1)] =
in_arr[k * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + 1];
}
// y-direction
for(k=1;k<in_z_siz+1;k++)
for(i=1;i<in_x_siz+1;i++)
{
in_arr[k * (in_y_siz+2) * (in_x_siz+2) + 0 * (in_x_siz+2) + i] =
in_arr[k * (in_y_siz+2) * (in_x_siz+2) + in_y_siz * (in_x_siz+2) + i];
in_arr[k * (in_y_siz+2) * (in_x_siz+2) + (in_y_siz+1) * (in_x_siz+2) + i] =
in_arr[k * (in_y_siz+2) * (in_x_siz+2) + 1 * (in_x_siz+2) + i];
}
// z-direction
for(j=1;j<in_y_siz+1;j++)
for(i=1;i<in_x_siz+1;i++)
{
in_arr[0 * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + i] =
in_arr[in_z_siz * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + i];
in_arr[(in_z_siz+1) * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + i] =
in_arr[1 * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + i];
}
}
kernel void leapfrog3d(
const int x_siz,
const int y_siz,
const int z_siz,
const int t_siz,
global float *in_p_tf,
global float *in_p_tn,
global float *in_p_tp,
const float u_vel,
const float v_vel,
const float w_vel,
const float c,
global float *in_p_rs
)
{
int nx = x_siz;
int ny = y_siz;
int nz = z_siz;
int nt = t_siz;
float u = u_vel;
float v = v_vel;
float w = w_vel;
float C = c ;
int i = get_global_id(0);
int j = get_global_id(1);
int k = get_global_id(2);
int idx0, idx_i0, idx_i1, idx_j0, idx_j1, idx_k0, idx_k1;
for(int t=1;t<t_siz;t++)
{
idx0 = i + j * (nx+2) + k * (nx+2) * (ny+2);
idx_i0 = (i+1) + j * (nx+2) + k * (nx+2) * (ny+2);
idx_j0 = i + (j+1) * (nx+2) + k * (nx+2) * (ny+2);
idx_k0 = i + j * (nx+2) + (k+1) * (nx+2) * (ny+2);
idx_i1 = (i-1) + j * (nx+2) + k * (nx+2) * (ny+2);
idx_j1 = i + (j-1) * (nx+2) + k * (nx+2) * (ny+2);
idx_k1 = i + j * (nx+2) + (k-1) * (nx+2) * (ny+2);
in_p_tf[idx0] = in_p_tp[idx0]
- u_vel * C * (in_p_tn[idx_i0] - in_p_tn[idx_i1])
- v_vel * C * (in_p_tn[idx_j0] - in_p_tn[idx_j1])
- w_vel * C * (in_p_tn[idx_k0] - in_p_tn[idx_k1]);
pbndry(nx,ny,nz,in_p_tf);
in_p_tp = in_p_tn;
in_p_tn = in_p_tf;
}
in_p_rs = in_p_tf;
}