GLSL: Help why AMD don't like this simple shader?!

Something goes bad for this shader on my AMD chipset. It works fine on Nvidia and with Direct3D on the AMD chipset.

What the shader does is draw an old school bitmap shadow underneath character models, that conforms to the geometry under their feet, like a localized fog effect. What AMD does is twist the effect according to the model-view matrix as near as I can see. I mean, if you stand in a certain direction in the world and look vertically down at it it will look correct, but otherwise it’s skewed according to the look direction.

I’ve tried everything and confirmed the x4mV constant (matrix) is correct by using it to compute the rest of the game’s geometry, and I’ve checked everything else. The constant matrices are using layout(row_major) to match HLSL and mul is reverse order likewise (Edited: this–and many things–is done with preprocessor.) I’ve seen problems in GLSL by trying to convert matrices, but in this case it just uses = to assign mat4 to mat4 and transpose but it seems to me the problem is in the in and out matrix (Out_mat and In_mat) because I’ve isolated out everything else. Again, it works as expected on Nvidia. And I’m sorry, I don’t have any experience using visual debugging systems with shaders, or I might inspect the data myself. So far I’ve made it through life just tinkering with the shaders until they work.

NOTE: I’ve left code in to not be disorienting, but I’ve tried eliminating most of the minor code without any success. I think whatever the problem is it’s something basic. More or less it looks like mul(In.mat,pos) produces erroneous results to me. And if I put it in a visual debugger I would inspect that. If someone wants to give me advice on visual debuggers I’d be receptive. (EDITED: Also I’m confident pos is correct since many other shaders use this pattern and the problem isn’t indicative of it… but I’m trying to think of a test I could do to confirm it, but mostly it works on Nvidia and other shaders on AMD, so I don’t think it’s a problem.)

	//FOREWORD (WARNING)
	//this is GLSL code inflected to mostly HLSL
	//keywords to ease copy-paste between files
	
		//SNIPPET (VERT SHADER ONLY)
	GLSL(
	struct CLASSIC_SHADOW
	{
		/*
		float4 pos : POSITION;  
		float4 col : COLOR0;     
		float4 fog : COLOR1;     
		float4x4 mat : TEXCOORD0; 
		classic_stereo_DEPTH
		*/
		float4 col; float4x4 mat; float4 pos; //gl_Position

		float4 fog; //classic_stereo_pos?
	};
	//D3D11 rejects GL_EXT_shader_io_blocks
	//layout(location=0) out CLASSIC_SHADOW
	//{
		layout(location=0) out float4 Out_col;
		layout(location=1) out float4 Out_pos; //Out_fog
		layout(location=2) out float4x4 Out_mat;
		//REMINDER: location=3,4,5 is float4x4...
		#ifdef DSTEREO
		layout(location=6) out float4 Xr_dpos;
		#endif
	//}Out;
	#define OUTPUT(x) Out_col = x.col;\
	Out_mat = x.mat; set_gl_Position(x.pos);\n 
	)

		//SNIPPET (FRAG SHADER ONLY)	
	GLSL(
	struct CLASSIC_SHADOW
	{
		/*
		float4 col : COLOR0;     		
		float4 fog : COLOR1; 
		float4x4 mat : TEXCOORD0; 
		#ifdef DSTEREO		
		float stereo : DEPTH; //needs vpos
		#endif
		#if 3<=SHADER_MODEL
		float2 vpos : VPOS; //Shader Model 3
		#endif	
		*/
		float4 col; float4x4 mat; float4 pos; float2 vpos;
	};
	//D3D11 rejects GL_EXT_shader_io_blocks
	//layout(location=0) in CLASSIC_SHADOW
	//{
		layout(location=0) in float4 In_col;
		layout(location=1) in float4 In_pos; //In_fog
		layout(location=2) in float4x4 In_mat;
		//REMINDER: location=3,4,5 is float4x4...
		#ifdef DSTEREO
		layout(location=6) in float4 Xr_dpos;
		#endif
	//}In;	
	#define INPUT CLASSIC_SHADOW(In_col,In_mat,In_pos,gl_FragCoord.xy)\n
	#endif
	struct CLASSIC_OUTPUT
	{
		/*
		float4 col:COLOR0, z:COLOR1;
		*/
		float4 col; float4 z; //MRT
	};
	//out struct CLASSIC_OUTPUT
	//{
		layout(location=0) out float4 Out_col;
		layout(location=1) out float4 Out_z;
	//}Out;
	#define OUTPUT(x) Out_col = x.col; Out_z = x.z;\n
	)
	
	GLSL(void main() //shadow() VERTEX SHADER
	{
		CLASSIC_INPUT In = UNLIT_INPUT; //const
		CLASSIC_SHADOW Out;

		Out.col = In.col; 		

		float4 center,corner;				  
		center.xyz = In.uv0.xyz;		
		center.w = corner.w = 1.0;
		corner.xyz = In.pos.xyz*In.uv0.w+center.xyz;		
		//Out.pos = mul(x4mWVP,corner);
		classic_stereo_pos(corner)

		//REMINDER: this code was based on a technique which
		//projects geometry onto planes to make flat shadows
		
		center = mul(x4mV,center);
		Out.mat = transpose(x4mV);		
		for(int i=0;i<3;i++)
		Out.mat[i].w = -dot(center.xyz,Out.mat[i].xyz);

		OUTPUT(Out); //return Out;
		
		//2022: rename fog->pos and repurpose fog/pos.z
		//after set_gl_Position
		//Out.fog = Out.pos;		
		float bias = EX_INI_SHADOWUVBIAS;
		Out_pos.z = 0.5/(In.uv0.w*EX_INI_SHADOWRADIUS/bias);
	}),
	GLSL(void main() //shadow() FRAGMENT SHADER
	{
		CLASSIC_SHADOW In = INPUT; //const
		CLASSIC_OUTPUT Out;

		Out.col = In.col; 

		float4 pos = float4(stereo_dpos(In.pos.xy/In.pos.w),1.0); 

		pos.xyz*=tex2D(sam1,In.vpos*rcpViewport.xy).x;
		
		float3 st = mul(In.mat,pos).xzy; //!!			  		
		st.xy = vec2(0.5f)-st.xy*In.pos.z;				
		//1.9: should (probably) be 2 (squeezing every last drop)
		st.z*=In.pos.z*(EX_INI_SHADOWRADIUS/EX_INI_SHADOWVOLUME*1.9f); 		
		st.z*=st.z; //pow(st.z,2.0); 
		float4 dd = clamp(float4(ddx(st.xy),ddy(st.xy)),-0.5f,0.5f);
		Out.col.a*=textureGrad(sam0,st.xy,dd.xy,dd.zw).a; //tex2D
		Out.col.a-=st.z;

		Out.z = vec4(0.0); //compiler

		OUTPUT(Out); //return Out;
	})