Well the problem I have is the following:

I have this kernel that tries to calculate the betweenness centrality of a graph in parallel. What happens is actually very strange since a loop is executed two times by one of the work groups. In average once every 7-8 runs the second while loop ( while(count[k]<nr_roots) ) gets executed twice by a workgroup although at the first iteration the count[k] variable is incremented. So for my case I have a 12 vertices graph and nr_roots is 1, count[k] is 0, the while gets executed, count[k] gets incremented by 1, and still cout[k] will be viewed as 0 one more time.

This happends only once every 6-7 runs, remeber that, not always. Does anyone have any idea why? I also tried making count a __local variable (shared by group) and also made it __private ( shared by the work-item only), no success. Any tips, suggestions are more than welcome!

" while ( found_local != 0){

"

"

"

"

"

"

"

" if(i==0) { level_local = atomic_add(nr_level,0); atomic_xchg(found,0);

"

" pozition_local = atomic_add(pozition,0);

"

" nr_roots = atomic_add(&level[level_local],0)/j; atomic_xchg(&count[k],0); nr=0; rest = atomic_add(&level[level_local],0)%j;

"

" if(k<rest) nr_roots = nr_roots + 1;}

"

"

"

"

"

" barrier(CLK_GLOBAL_MEM_FENCE);

"

"

"

" while(count[k] < nr_roots ){

"

"

"

" if(i==0){

"

" root = stack[pozition_local + count[k]*j + k];
"
" succ_index[root] = 0;
"
" nr_neigh = firstnbr[root+1] - firstnbr[root]; }
"
" barrier(CLK_LOCAL_MEM_FENCE);
"
"
"
" neigh_per_thread = nr_neigh/size;
"
" if(i<nr_neigh%size)
"
" neigh_per_thread ++;
"
" h = 0;
"
" while(h<neigh_per_thread)
"
" {
"
" node = nbr[firstnbr[root] + size*h + i];

"

"

"

" dw = atomic_cmpxchg(&d[node], -1, level_local + 1);

"

"

"

" if(dw == -1)

"

" {

"

" atomic_inc(&level[level_local + 1]);

"

" atomic_cmpxchg(found,0,1);

"

" dw = level_local + 1;

"

" gh = atomic_inc(nr_stack);

"

" stack[gh] = node;

"

"

"

" }

"

"if(dw == level_local + 1)

"

" {

"

"

"

" temp = atomic_inc(&succ_index[root]);

"

" succ[firstnbr[root] + temp] = node;

"

" GetSemaphor2(&sem[0]); temporal = atomic_xchg(&sigma[node],0); temporal2=atomic_xchg(&sigma[root],sigma[root]);

"

" atomic_xchg(&sigma[node],temporal+temporal2);ReleaseSemaphor2(&sem[0]);

"

" }

"

"h++;

"

"}

"

"

"

"if(glob%6==1) {atomic_add(&count[k],1);if(root==4&&nr1==1) BC[8] = 1;}

"

" barrier(CLK_GLOBAL_MEM_FENCE); }

"

"

"

" barrier(CLK_LOCAL_MEM_FENCE);

"

"if(glob==0) {f= atomic_add(&level[level_local],0); atomic_add(pozition,f); atomic_add(nr_level,1);

"

" }

"

"

"

" if(i==0)

"

" { atomic_add(global_sync,1);

"

" if ( k==0) { while(atomic_add(global_sync,0)< j); atomic_xchg(global_sync, 0); }

"

" else { while(atomic_add(global_sync,0) > 0); }}

"

"barrier(CLK_LOCAL_MEM_FENCE);if(i==0) found_local = atomic_add(found,0);barrier(CLK_LOCAL_MEM_FENCE);

"

"} if(glob==11) BC[glob] = atomic_xchg(&sigma[11],sigma[11]); }

";