Hi,

this is my test kernel code:

```
__kernel void
calc_triangle_mapping(__global const Mesh* mesh, __global Triangle* triangles,
const int res_x, const int res_y, __global int* triangle_mapping) {
int index = get_global_id(0);
__global Triangle *t;
Point2D l0, l1, l2;
Point2D top, bottom, middle;
float inc, inc_bottom_top, inc_bottom_middle, inc_middle_top;
bool right_side = false;
int begin, end;
// check bounds
if (index >= mesh->num_triangles)
return;
// get the triangle of the current thread
t = &triangles[mesh->triangles_offset + index];
// detect top, middle and bottom lighmap coordinate
if (t->l0.y >= t->l1.y) {
if (t->l0.y >= t->l2.y) {
top = t->l0;
if (t->l1.y >= t->l2.y) {
middle = t->l1;
bottom = t->l2;
}
else {
middle = t->l2;
bottom = t->l1;
}
}
else {
top = t->l2;
middle = t->l0;
bottom = t->l1;
}
}
else {
if (t->l1.y >= t->l2.y) {
top = t->l1;
if (t->l0.y >= t->l2.y) {
middle = t->l0;
bottom = t->l2;
}
else {
middle = t->l2;
bottom = t->l0;
}
}
else {
top = t->l2;
middle = t->l1;
bottom = t->l0;
}
}
if ((middle.y == bottom.y) && (bottom.x > middle.x)) {
Point2D tmp = bottom;
bottom = middle;
middle = tmp;
}
// for testing purposes repeat a round operation
top.x = round(top.x * (res_x-1));
top.y = round(top.y * (res_y-1));
top.x = round(top.x * (res_x-1));
top.y = round(top.y * (res_y-1));
top.x = round(top.x * (res_x-1));
top.y = round(top.y * (res_y-1));
}
```

I cannot imagine that the instruction limitation is that low. Maybe I’m doing something wrong when bulding the program? Can you specify the compute capability before building the program? Any suggestions?

Daniel