Help me to find issue in my GLSL and spirv. I am getting zero output

Hi All, I am new to vulkan and SPIRV. Added some new changes to TVM vulkan backend to support Vulkan images. I am getting zero output in TVM as compared to CPU results. Here are the generated SPIRV and their convereted GLSL.

The SPIR-V is generated for simple convolution operation.
is there something wrong with below glsl generated from spirvs

GLSL - 1

#version 450
layout(local_size_x = 25, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) buffer p0
{
    float _m0[];
} p0_1;

layout(set = 0, binding = 0, rgba32f) uniform writeonly image2D T_layout_trans;

void main()
{
    imageStore(T_layout_trans, ivec2(int(gl_LocalInvocationID.x) - 5 * (int(gl_LocalInvocationID.x) / 5), int(gl_LocalInvocationID.x) / 5), vec4(p0_1._m0[int(gl_LocalInvocationID.x)], p0_1._m0[int(gl_LocalInvocationID.x) + 25], p0_1._m0[int(gl_LocalInvocationID.x) + 50], p0_1._m0[int(gl_LocalInvocationID.x) + 75]));
}

GLSL-2


#version 450
#if defined(GL_EXT_control_flow_attributes)
#extension GL_EXT_control_flow_attributes : require
#define SPIRV_CROSS_FLATTEN [[flatten]]
#define SPIRV_CROSS_BRANCH [[dont_flatten]]
#define SPIRV_CROSS_UNROLL [[unroll]]
#define SPIRV_CROSS_LOOP [[dont_unroll]]
#else
#define SPIRV_CROSS_FLATTEN
#define SPIRV_CROSS_BRANCH
#define SPIRV_CROSS_UNROLL
#define SPIRV_CROSS_LOOP
#endif
layout(local_size_x = 2, local_size_y = 1, local_size_z = 2) in;

struct _30
{
    vec4 _m0[8];
};

layout(set = 0, binding = 0, std430) buffer p1
{
    vec4 _m0[];
} p1_1;

layout(set = 0, binding = 1, std430) buffer T_relu
{
    vec4 _m0[];
} T_relu_1;

layout(set = 0, binding = 0, rgba32f) uniform readonly image2D p0;
layout(set = 0, binding = 1, rgba32f) uniform readonly image2D p2;

void main()
{
    _30 compute;
    compute._m0[0] = vec4(0.0);
    compute._m0[2] = vec4(0.0);
    compute._m0[4] = vec4(0.0);
    compute._m0[6] = vec4(0.0);
    compute._m0[1] = vec4(0.0);
    compute._m0[3] = vec4(0.0);
    compute._m0[5] = vec4(0.0);
    compute._m0[7] = vec4(0.0);
    for (int _72 = 0; _72 < 3; _72++)
    {
        for (int _78 = 0; _78 < 3; _78++)
        {
            SPIRV_CROSS_UNROLL
            for (int _84 = 0; _84 < 4; _84++)
            {
                compute._m0[0] += (imageLoad(p0, ivec2((int(gl_LocalInvocationID.x) * 2) + _78, _72)) * p1_1._m0[(((int(gl_LocalInvocationID.z) * 36) + (_84 * 9)) + (_72 * 3)) + _78]);
                compute._m0[2] += (imageLoad(p0, ivec2((int(gl_LocalInvocationID.x) * 2) + _78, _72 + 2)) * p1_1._m0[(((int(gl_LocalInvocationID.z) * 36) + (_84 * 9)) + (_72 * 3)) + _78]);
                compute._m0[4] += (imageLoad(p0, ivec2((int(gl_LocalInvocationID.x) * 2) + _78, _72 + 4)) * p1_1._m0[(((int(gl_LocalInvocationID.z) * 36) + (_84 * 9)) + (_72 * 3)) + _78]);
                compute._m0[6] += (imageLoad(p0, ivec2((int(gl_LocalInvocationID.x) * 2) + _78, _72 + 6)) * p1_1._m0[(((int(gl_LocalInvocationID.z) * 36) + (_84 * 9)) + (_72 * 3)) + _78]);
                compute._m0[1] += (imageLoad(p0, ivec2(((int(gl_LocalInvocationID.x) * 2) + _78) + 4, _72)) * p1_1._m0[(((int(gl_LocalInvocationID.z) * 36) + (_84 * 9)) + (_72 * 3)) + _78]);
                compute._m0[3] += (imageLoad(p0, ivec2(((int(gl_LocalInvocationID.x) * 2) + _78) + 4, _72 + 2)) * p1_1._m0[(((int(gl_LocalInvocationID.z) * 36) + (_84 * 9)) + (_72 * 3)) + _78]);
                compute._m0[5] += (imageLoad(p0, ivec2(((int(gl_LocalInvocationID.x) * 2) + _78) + 4, _72 + 4)) * p1_1._m0[(((int(gl_LocalInvocationID.z) * 36) + (_84 * 9)) + (_72 * 3)) + _78]);
                compute._m0[7] += (imageLoad(p0, ivec2(((int(gl_LocalInvocationID.x) * 2) + _78) + 4, _72 + 6)) * p1_1._m0[(((int(gl_LocalInvocationID.z) * 36) + (_84 * 9)) + (_72 * 3)) + _78]);
            }
        }
    }
    vec4 _252 = compute._m0[0] + imageLoad(p2, ivec2(int(gl_LocalInvocationID.z), 0));
    T_relu_1._m0[((int(gl_LocalInvocationID.z) * 16) + (int(gl_LocalInvocationID.x) * 4)) / 4] = mix(vec4(0.0), _252, greaterThan(_252, vec4(0.0)));
    vec4 _268 = compute._m0[2] + imageLoad(p2, ivec2(int(gl_LocalInvocationID.z), 0));
    T_relu_1._m0[(((int(gl_LocalInvocationID.z) * 16) + (int(gl_LocalInvocationID.x) * 4)) + 8) / 4] = mix(vec4(0.0), _268, greaterThan(_268, vec4(0.0)));
}


GLSL - 3


#version 450
layout(local_size_x = 4, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 0, std430) buffer T_layout_trans
{
    float _m0[];
} T_layout_trans_1;

layout(set = 0, binding = 1, std430) buffer p0
{
    float _m0[];
} p0_1;

void main()
{
    T_layout_trans_1._m0[int(gl_LocalInvocationID.x)] = p0_1._m0[int(gl_LocalInvocationID.x) * 4];
    T_layout_trans_1._m0[int(gl_LocalInvocationID.x) + 4] = p0_1._m0[(int(gl_LocalInvocationID.x) * 4) + 1];
    T_layout_trans_1._m0[int(gl_LocalInvocationID.x) + 8] = p0_1._m0[(int(gl_LocalInvocationID.x) * 4) + 2];
    T_layout_trans_1._m0[int(gl_LocalInvocationID.x) + 12] = p0_1._m0[(int(gl_LocalInvocationID.x) * 4) + 3];
    T_layout_trans_1._m0[int(gl_LocalInvocationID.x) + 16] = p0_1._m0[(int(gl_LocalInvocationID.x) * 4) + 16];
    T_layout_trans_1._m0[int(gl_LocalInvocationID.x) + 20] = p0_1._m0[(int(gl_LocalInvocationID.x) * 4) + 17];
    T_layout_trans_1._m0[int(gl_LocalInvocationID.x) + 24] = p0_1._m0[(int(gl_LocalInvocationID.x) * 4) + 18];
    T_layout_trans_1._m0[int(gl_LocalInvocationID.x) + 28] = p0_1._m0[(int(gl_LocalInvocationID.x) * 4) + 19];
}