Hi All, I am new to vulkan and SPIRV. Added some new changes to TVM vulkan backend to support Vulkan images. I am getting zero output in TVM as compared to CPU results. Here are the generated SPIRV and their convereted GLSL.
The SPIR-V is generated for simple convolution operation.
is there something wrong with below glsl generated from spirvs
GLSL - 1
#version 450
layout(local_size_x = 25, local_size_y = 1, local_size_z = 1) in;
layout(set = 0, binding = 0, std430) buffer p0
{
float _m0[];
} p0_1;
layout(set = 0, binding = 0, rgba32f) uniform writeonly image2D T_layout_trans;
void main()
{
imageStore(T_layout_trans, ivec2(int(gl_LocalInvocationID.x) - 5 * (int(gl_LocalInvocationID.x) / 5), int(gl_LocalInvocationID.x) / 5), vec4(p0_1._m0[int(gl_LocalInvocationID.x)], p0_1._m0[int(gl_LocalInvocationID.x) + 25], p0_1._m0[int(gl_LocalInvocationID.x) + 50], p0_1._m0[int(gl_LocalInvocationID.x) + 75]));
}
GLSL-2
#version 450
#if defined(GL_EXT_control_flow_attributes)
#extension GL_EXT_control_flow_attributes : require
#define SPIRV_CROSS_FLATTEN [[flatten]]
#define SPIRV_CROSS_BRANCH [[dont_flatten]]
#define SPIRV_CROSS_UNROLL [[unroll]]
#define SPIRV_CROSS_LOOP [[dont_unroll]]
#else
#define SPIRV_CROSS_FLATTEN
#define SPIRV_CROSS_BRANCH
#define SPIRV_CROSS_UNROLL
#define SPIRV_CROSS_LOOP
#endif
layout(local_size_x = 2, local_size_y = 1, local_size_z = 2) in;
struct _30
{
vec4 _m0[8];
};
layout(set = 0, binding = 0, std430) buffer p1
{
vec4 _m0[];
} p1_1;
layout(set = 0, binding = 1, std430) buffer T_relu
{
vec4 _m0[];
} T_relu_1;
layout(set = 0, binding = 0, rgba32f) uniform readonly image2D p0;
layout(set = 0, binding = 1, rgba32f) uniform readonly image2D p2;
void main()
{
_30 compute;
compute._m0[0] = vec4(0.0);
compute._m0[2] = vec4(0.0);
compute._m0[4] = vec4(0.0);
compute._m0[6] = vec4(0.0);
compute._m0[1] = vec4(0.0);
compute._m0[3] = vec4(0.0);
compute._m0[5] = vec4(0.0);
compute._m0[7] = vec4(0.0);
for (int _72 = 0; _72 < 3; _72++)
{
for (int _78 = 0; _78 < 3; _78++)
{
SPIRV_CROSS_UNROLL
for (int _84 = 0; _84 < 4; _84++)
{
compute._m0[0] += (imageLoad(p0, ivec2((int(gl_LocalInvocationID.x) * 2) + _78, _72)) * p1_1._m0[(((int(gl_LocalInvocationID.z) * 36) + (_84 * 9)) + (_72 * 3)) + _78]);
compute._m0[2] += (imageLoad(p0, ivec2((int(gl_LocalInvocationID.x) * 2) + _78, _72 + 2)) * p1_1._m0[(((int(gl_LocalInvocationID.z) * 36) + (_84 * 9)) + (_72 * 3)) + _78]);
compute._m0[4] += (imageLoad(p0, ivec2((int(gl_LocalInvocationID.x) * 2) + _78, _72 + 4)) * p1_1._m0[(((int(gl_LocalInvocationID.z) * 36) + (_84 * 9)) + (_72 * 3)) + _78]);
compute._m0[6] += (imageLoad(p0, ivec2((int(gl_LocalInvocationID.x) * 2) + _78, _72 + 6)) * p1_1._m0[(((int(gl_LocalInvocationID.z) * 36) + (_84 * 9)) + (_72 * 3)) + _78]);
compute._m0[1] += (imageLoad(p0, ivec2(((int(gl_LocalInvocationID.x) * 2) + _78) + 4, _72)) * p1_1._m0[(((int(gl_LocalInvocationID.z) * 36) + (_84 * 9)) + (_72 * 3)) + _78]);
compute._m0[3] += (imageLoad(p0, ivec2(((int(gl_LocalInvocationID.x) * 2) + _78) + 4, _72 + 2)) * p1_1._m0[(((int(gl_LocalInvocationID.z) * 36) + (_84 * 9)) + (_72 * 3)) + _78]);
compute._m0[5] += (imageLoad(p0, ivec2(((int(gl_LocalInvocationID.x) * 2) + _78) + 4, _72 + 4)) * p1_1._m0[(((int(gl_LocalInvocationID.z) * 36) + (_84 * 9)) + (_72 * 3)) + _78]);
compute._m0[7] += (imageLoad(p0, ivec2(((int(gl_LocalInvocationID.x) * 2) + _78) + 4, _72 + 6)) * p1_1._m0[(((int(gl_LocalInvocationID.z) * 36) + (_84 * 9)) + (_72 * 3)) + _78]);
}
}
}
vec4 _252 = compute._m0[0] + imageLoad(p2, ivec2(int(gl_LocalInvocationID.z), 0));
T_relu_1._m0[((int(gl_LocalInvocationID.z) * 16) + (int(gl_LocalInvocationID.x) * 4)) / 4] = mix(vec4(0.0), _252, greaterThan(_252, vec4(0.0)));
vec4 _268 = compute._m0[2] + imageLoad(p2, ivec2(int(gl_LocalInvocationID.z), 0));
T_relu_1._m0[(((int(gl_LocalInvocationID.z) * 16) + (int(gl_LocalInvocationID.x) * 4)) + 8) / 4] = mix(vec4(0.0), _268, greaterThan(_268, vec4(0.0)));
}
GLSL - 3
#version 450
layout(local_size_x = 4, local_size_y = 1, local_size_z = 1) in;
layout(set = 0, binding = 0, std430) buffer T_layout_trans
{
float _m0[];
} T_layout_trans_1;
layout(set = 0, binding = 1, std430) buffer p0
{
float _m0[];
} p0_1;
void main()
{
T_layout_trans_1._m0[int(gl_LocalInvocationID.x)] = p0_1._m0[int(gl_LocalInvocationID.x) * 4];
T_layout_trans_1._m0[int(gl_LocalInvocationID.x) + 4] = p0_1._m0[(int(gl_LocalInvocationID.x) * 4) + 1];
T_layout_trans_1._m0[int(gl_LocalInvocationID.x) + 8] = p0_1._m0[(int(gl_LocalInvocationID.x) * 4) + 2];
T_layout_trans_1._m0[int(gl_LocalInvocationID.x) + 12] = p0_1._m0[(int(gl_LocalInvocationID.x) * 4) + 3];
T_layout_trans_1._m0[int(gl_LocalInvocationID.x) + 16] = p0_1._m0[(int(gl_LocalInvocationID.x) * 4) + 16];
T_layout_trans_1._m0[int(gl_LocalInvocationID.x) + 20] = p0_1._m0[(int(gl_LocalInvocationID.x) * 4) + 17];
T_layout_trans_1._m0[int(gl_LocalInvocationID.x) + 24] = p0_1._m0[(int(gl_LocalInvocationID.x) * 4) + 18];
T_layout_trans_1._m0[int(gl_LocalInvocationID.x) + 28] = p0_1._m0[(int(gl_LocalInvocationID.x) * 4) + 19];
}