I have the kernel as below. My question is why is vstore8 not working? When the output is printed in the host code, it only returns 0s. I put an “if(all(v == 0) == 1)” in the code to check whether the error was caused when I copy the values from int4* to int8 in v, but it was not that. It seems like vstoren is doing nothing. I am new to OpenCL so any help is appreciated.
__kernel void select_vec(__global int4 *input1,
__global int *input2,
__global int *output){
//copy values in input arrays to vectors
int i = get_global_id(0);
int4 vA = input1[2*i];
int4 vB = input1[2*i+1];
__private int8 v = (int8)(vA.s0, vA.s1, vA.s2, vA.s3, vB.s0, vB.s1, vB.s2, vB.s3);
__private int8 v1 = vload8(0, input2);
__private int8 v2 = vload8(1, input2);
int8 results;
if(any(v > 10) == 1){
//if there is any of the elements in v that are greater than 10
// copy the corresponding elements from v1 for elements greater than 10
// for elements less than or equal to 17, copy the corresponding elements from v2
results = select(v1, v2, v > 10);
}else{
//results is the combination of the first half of v2 and v2
results = (int8) (v1.lo, v2.lo);
}
//store results in output array
vstore8(results, 0, output);
}