Need help with geometry shader

zxuiji · January 4, 2022, 6:11pm

thx, not what I had in mind though, little while ago I remembered about uniform buffers so I’m gonna try something with that in the geometry shader since that appears to be the only one able to actually emit an unfixed number of vertices (barring whatever complicated methods are used in the compute shader, not in the mood to dive into that one).

zxuiji · January 4, 2022, 9:07pm

Welp, the shader executes fine when I’m outputting just points but as soon as I try to switch to triangles the compiler tells me it’s invalid, mind taking a look?

make debug=1 run
...
cd bin && ./d-check_extra.elf
Attempting to open 'libd-extraglfw.so' & 'libd-extragl.so'
Creating program '[vfxapp.flat]'
Bound shader 'shaders/null.glsl' as point shader
source = GL_DEBUG_SOURCE_SHADER_COMPILER, defect = GL_DEBUG_TYPE_ERROR, weight = GL_DEBUG_SEVERITY_HIGH, seekid = 1 (GL_ID_UNKOWN)
report = 0:4(1): error: invalid geometry shader output primitive type
src/extra/viewfx/gl/opengl_shader.c:56: Error 0xFFFFFFFF (-1) EUNKNOWN
src/extra/viewfx/shader.c:213: Error 0xFFFFFFFF (-1) EUNKNOWN
src/extra/viewfx/vfxapp.c:349: Error 0xFFFFFFFF (-1) EUNKNOWN
src/extra/viewfx/vfxapp.c:115: In shader 'basic', path = 'shaders/flat.glsl'
0:4(1): error: invalid geometry shader output primitive type
src/extra/viewfx/vfxapp.c:128:
src/extra/viewfx/vfxapp.c:373: Error 0xFFFFFFFF (-1) EUNKNOWN
test/extra/launch.c:22: Error 0xFFFFFFFF (-1) EUNKNOWN
test/extra/main.c:108: Error 0xFFFFFFFF (-1) EUNKNOWN
Compilation finished successfully.

The code:

#version 440

layout(points) in;
layout(triangles, max_vertices=256) out;

const int INT_MAX = int(  ~0u >> 1 );
const int INT_MIN = int(~(~0u >> 1));

const uint		uqtr = ~0u / 4;
const uint		uoct = ~0u / 8;
const double	dcap = double(~0u);
const double	dqtr = double(~0u / 4);

out		vec4	DyeColor;
out		vec4	DyeTexel;

uniform vec4	MidPoint;
uniform ivec2	WinLimit;
uniform vec2	WinScale;
uniform uint	VtxCount;
uniform bool	FlatLine;

vec4 edge_vertex( uint x, uint y )
{
	vec4 point;
	point.x = float(double(x) / dqtr);
	point.y = float(double(y) / dqtr);
	point.xy *= WinScale.xy;
	point.xy += MidPoint.xy;
	point.zw  = MidPoint.zw;
	return point;
}

void emit_vertex( vec4 point )
{
	gl_Position = point;
	DyeColor = vec4( 1.0, 1.0, 1.0, 1.0 );
	DyeTexel = vec4( 1.0, 1.0, 1.0, 1.0 );
	EmitVertex();
}

void emit_point( uint v, uint vertices )
{
	double aim = double(v);
	double max = double(vertices);
	uint rotate = uint((aim / max) * dcap);
	uint linear = rotate % uqtr;

	if ( FlatLine )
		emit_vertex( edge_vertex( linear, uqtr - linear ) );
	else
	{
		uint curved = linear + (linear / 3);
		if ( linear < uoct )
			emit_vertex( edge_vertex( curved, uqtr - linear ) );
		else
			emit_vertex( edge_vertex( linear, uqtr - curved ) );
	}
}

void main()
{
	uint v, vertices = VtxCount + uint(VtxCount == 0), stop;
	vec4 center = vec4( 0.0, 0.0, 0.0, 1.0 );

	vertices += (vertices % 2);
	stop = vertices - 1;

	for ( v = 0; v < stop; ++v )
	{
		emit_vertex( center );
		emit_point( v, vertices );
		emit_point( v + 1, vertices );
	}
}

Alfonse_Reinheart · January 4, 2022, 9:33pm

The error seems pretty clear. Take a second look at the valid output primitive types.

zxuiji · January 4, 2022, 9:39pm

Ah, didn’t realise there was yet another stupid restriction on geometry shaders, did the designers just decide “Let’s be an asshole and make this thing that should be useful almost useless”?

zxuiji · January 4, 2022, 10:17pm

I seem to have misunderstood how to use the uniform buffers, mind explaining where I’m going wrong here:

uniform vec4	RegPoint;
uniform vec4	RegScale;

The wrapper code:

linkVfxVar_cb libLinkVfxVarCB[VFXTYPE_COUNT] = {NULL};

dint libLinkNoSupportVfxVar( VFXCFG *vfxcfg, VFXVAR *vfxvar )
	{ (void)vfxcfg; (void)vfxvar; return ENOTSUP; }

dint libLinkSharedVfxVarf4v( VFXCFG *vfxcfg, VFXVAR *vfxvar )
{
	VFXBUF *vfxbuf = vfxvar->vfxbuf;
	BUFFER *buffer = vfxbuf->buffer;
	LIB_VFXVAR *var = vfxvar->libref;
	(void)vfxcfg;
	glUniform4fv( var->loc, buffer->count, buffer->array );
	return 0;
}

dint libLinkShaderVfxVar( VFXCFG *vfxcfg, VFXVAR *vfxvar )
{
	VFXBUF *vfxbuf = vfxvar->vfxbuf;
	LIB_VFXVAR *var = vfxvar->libref;
	VFXDEF const *def = SeekVfxDef( vfxbuf->type );
	GLenum const *type = def->libref;
	(void)vfxcfg;
	glVertexAttribPointer
	(
		var->loc,
		def->count,
		*type,
		GL_FALSE,
		def->bytes,
		NULL
	);
	return 0;
}

dint libLinkVfxVar( VFXCFG *vfxcfg, VFXVAR *vfxvar )
{
	VFXBUF *vfxbuf = vfxvar->vfxbuf;
	return vfxvar->shared
		? libLinkVfxVarCB[vfxbuf->type]( vfxcfg, vfxvar )
		: libLinkShaderVfxVar( vfxcfg, vfxvar );
}

zxuiji · January 5, 2022, 12:22am

I’m guessing I didn’t give enough info, well here’s the log at least, I’ll see what other info I can find that might be useful:

make debug=1 run
...
cd bin && ./d-check_extra.elf
Attempting to open 'libd-extraglfw.so' & 'libd-extragl.so'
Creating program '[vfxapp.flat]'
Bound shader 'shaders/null.glsl' as point shader
Bound shader 'shaders/flat.glsl' as basic shader
Bound shader 'shaders/frag.glsl' as color shader
Linking program...
Testing program...
source = GL_DEBUG_SOURCE_SHADER_COMPILER, fromid = 1 (GL_ID_UNKOWN), report = Shader Stats: SGPRS: 16 VGPRS: 24 Code Size: 136 LDS: 0 Scratch: 0 Max Waves: 8 Spilled SGPRs: 0 Spilled VGPRs: 0 PrivMem VGPRs: 0
test/extra/create.c:27: 'WinLimit' not found
source = GL_DEBUG_SOURCE_SHADER_COMPILER, fromid = 1 (GL_ID_UNKOWN), report = Shader Stats: SGPRS: 16 VGPRS: 4 Code Size: 4 LDS: 0 Scratch: 0 Max Waves: 8 Spilled SGPRs: 0 Spilled VGPRs: 0 PrivMem VGPRs: 0
source = GL_DEBUG_SOURCE_API, defect = GL_DEBUG_TYPE_ERROR, weight = GL_DEBUG_SEVERITY_HIGH, fromid = 2 (GL_ID_UNKOWN)
report = GL_INVALID_OPERATION in glUniform(program not linked)
source = GL_DEBUG_SOURCE_SHADER_COMPILER, fromid = 1 (GL_ID_UNKOWN), report = Shader Stats: SGPRS: 56 VGPRS: 12 Code Size: 2392 LDS: 0 Scratch: 0 Max Waves: 8 Spilled SGPRs: 0 Spilled VGPRs: 0 PrivMem VGPRs: 0
source = GL_DEBUG_SOURCE_SHADER_COMPILER, fromid = 1 (GL_ID_UNKOWN), report = Shader Stats: SGPRS: 24 VGPRS: 8 Code Size: 152 LDS: 0 Scratch: 0 Max Waves: 8 Spilled SGPRs: 0 Spilled VGPRs: 0 PrivMem VGPRs: 0
Passing width=480, height=640 to shaders...failed
Passing scalex=1.000000, scaley=0.750000 to shaders...success
source = GL_DEBUG_SOURCE_API, defect = GL_DEBUG_TYPE_ERROR, weight = GL_DEBUG_SEVERITY_HIGH, fromid = 2 (GL_ID_UNKOWN)
report = GL_INVALID_OPERATION in glUniform(count = 3 for non-array "RegPoint"@0)
source = GL_DEBUG_SOURCE_API, defect = GL_DEBUG_TYPE_ERROR, weight = GL_DEBUG_SEVERITY_HIGH, fromid = 2 (GL_ID_UNKOWN)
report = GL_INVALID_OPERATION in glDrawArrays
...
source = GL_DEBUG_SOURCE_API, defect = GL_DEBUG_TYPE_ERROR, weight = GL_DEBUG_SEVERITY_HIGH, fromid = 2 (GL_ID_UNKOWN)
report = GL_INVALID_OPERATION in glUniform(count = 3 for non-array "RegPoint"@0)
source = GL_DEBUG_SOURCE_API, defect = GL_DEBUG_TYPE_ERROR, weight = GL_DEBUG_SEVERITY_HIGH, fromid = 2 (GL_ID_UNKOWN)
report = GL_INVALID_OPERATION in glDrawArrays
Compilation finished successfully.

Edit: Putting aside some initialisation code that amounts to thin thread safe wrappers over the opengl calls, here’s what I initialise the 2 mentioned uniforms with:

	BUFFER *regpointsBuffer = NULL, *regscalesBuffer = NULL, **buffer = NULL;
	VFXBUF *regpointsVfxBuf = NULL, *regscalesVfxBuf = NULL, **vfxbuf = NULL;
	VFXVAR *regpointsVfxVar = NULL, *regscalesVfxVar = NULL, **vfxvar = NULL;
	VFXCFG *vfxcfg = NULL;
	vec4s regpoints[] =
	{
		{{0.25,0.25,1.0,1}},
		{{0.25,0.75,1.0,1}},
		{{0.5, 0.5, 1.0,1}}
	};
	uint w = 1;
	vec4s regscales[sizeof(regpoints)/sizeof(vec4)] =
	{
		{{ w, w, w, 1.0 }},
		{{ w, w, w, 1.0 }},
		{{ w, w, w, 1.0 }}
	};

Alfonse_Reinheart · January 5, 2022, 12:26am

Yes, such a stupid restriction: allowing you to output triangle strips instead of just triangles. That forces you to use EndPrimitive if you wanted to output individual triangles instead of a primitive. Which is such an onerous burden.

Neither of those are uniform buffers. They’re just non-block uniforms.

Also, neither of them are arrays, so I don’t see why your C++ code uses arrays for these values.

zxuiji · January 5, 2022, 12:34am

There’s a difference between “allowing” and enforcing

They’re supposed to be fed by arrays/buffers since for some strange reason the devs decided no one would ever want just individual vertices instead of entire bloody buffers at once, it was the only thing I could think of to work around the silly restriction of whole buffers only.

Alfonse_Reinheart · January 5, 2022, 2:03am

Weren’t you complaining earlier about how GS’s aren’t flexible enough? Yet by supporting triangle strips and EndPrimitive(), GS’s can output a single triangle, multiple individual triangles, or any number of triangle strips of any length, all without changing the actual output primitive.

So which is it: are they too constricted or too flexible?

zxuiji · January 5, 2022, 8:12am

That’s besides the point, when the output is declared as triangle_strip the gpu/gfx card AND future developers will expect likewise, “triangles” and similar should’ve been an output option to avoid that confusion, case and point I didn’t realise that until your first mention of EndPrimitive()

GClements · January 5, 2022, 10:44am

Look, you’re basically blaming the specification authors because you didn’t read the documentation. Valid output types are points, line_strip and triangle_strip.

There’s nothing to be gained by adding additional types (triangle fans and line loops would impose an additional burden on the implementation, as they don’t follow the sliding-window idiom of the supported types).

zxuiji · January 5, 2022, 11:02am

How so? all it is using preexisting modes, correct me if I’m wrong but the vertex shader comes before the geometry shader right? and that only deals with points, everything thereafter is just honouring what mode was sent to it, if a geometry shader is present it should just ignore the given mode unless that mode can be easily constructed from the geometry output (for example points to lines & triangles but not triangles or lines to points), the alternative option to ignoring it is to both ignore and spit out an error if it’s incompatible, there’s no reason for the insistence of only 3 modes being usable. Another example is say if the geometry shader was set to triangle_fan while the expected output is GL_TRIANGLE_STRIP then the limits of the fan can be determined by take whatever is determined to be the furthest vertices from the center (which is easy to determine given it will always be the same for each triangle) and just using them to set the bounds of the expected triangle strip part, it’s not difficult, slower sure but that’s on the dev for not switching geometry shaders to one that can be used as direct vertices instead of indirect

zxuiji · January 5, 2022, 3:39pm

Done plenty of cleanup, to more easily understand what is being created and to ensure all uniforms will show up, or at least that was the intent, still having issues on that front.

Here’s what I have in the shader/s:

struct FNUMS
{
	vec3	WinScale;
	vec3	RegScale;
	vec3	RegPoint;
	vec3	RegEmits;
	vec3	RegTakes;
};

struct DINTS
{
	ivec3	WinSpace;
	ivec2	WinPoint;
	int		VtxCount;
	int		FlatLine;
};

uniform DINTS dints;
uniform FNUMS fnums;

Here’s my creation process:

dint create( SRC *src, APP *app )
{
	VFXAPP *vfxapp = app->vfxapp;
#if 0
	SHADER *shader = SeekVfxAppShader( vfxapp, SHADER_TYPE_POINT );
#endif
	BUFFER *buffer = NULL;
	VFXBUF *vfxbuf = NULL;
	VFXVAR *vfxvar = NULL;
	VFXCFG *vfxcfg = NULL;
	vec3s one3 = {{1,1,1}};
	SHARED_DINTS dints;
	SHARED_FNUMS fnums;
	VFXTYPE SharedBufferTypes[SHARED_BUFFER_COUNT] = {0};
	ucap SharedBufferVsizes[SHARED_BUFFER_COUNT] = {0};
	uint SharedBufferCounts[SHARED_BUFFER_COUNT] = {0};
	achs SharedBufferNames[SHARED_BUFFER_COUNT] = {NULL};
	void *SharedBufferDefaults[SHARED_BUFFER_COUNT] = {NULL};
	dint i, err;

	memset( dints.raw, 0, sizeof(dints) );
	memset( fnums.raw, 0, sizeof(fnums) );

	dints.mem.WinSpace.x = 480;
	dints.mem.WinSpace.y = 640;
	/* A depth of 10,000 should be enough for normal game play */
	dints.mem.WinSpace.z = 10000;

	dints.mem.VtxCount = 3;
	dints.mem.FlatLine = 1;

	fnums.mem.WinScale = one3;
	fnums.mem.WinScale.y = 480.0 / 640.0;

	fnums.mem.RegScale = one3;
	fnums.mem.RegEmits = one3;

	SharedBufferNames[SHARED_BUFFER_UINTS] = "uints";
	SharedBufferNames[SHARED_BUFFER_DINTS] = "dints";
	SharedBufferNames[SHARED_BUFFER_FNUMS] = "fnums";
	SharedBufferNames[SHARED_BUFFER_DNUMS] = "dnums";

	SharedBufferVsizes[SHARED_BUFFER_UINTS] = sizeof(uint);
	SharedBufferVsizes[SHARED_BUFFER_DINTS] = sizeof(dint);
	SharedBufferVsizes[SHARED_BUFFER_FNUMS] = sizeof(fnum);
	SharedBufferVsizes[SHARED_BUFFER_DNUMS] = sizeof(dnum);

	SharedBufferTypes[SHARED_BUFFER_UINTS] = VFXTYPE_UINT;
	SharedBufferTypes[SHARED_BUFFER_DINTS] = VFXTYPE_DINT;
	SharedBufferTypes[SHARED_BUFFER_FNUMS] = VFXTYPE_FLOAT;
	SharedBufferTypes[SHARED_BUFFER_DNUMS] = VFXTYPE_DOUBLE;

	/*SharedBufferDefaults[SHARED_BUFFER_UINTS] = uints.raw;*/
	SharedBufferDefaults[SHARED_BUFFER_DINTS] = dints.raw;
	SharedBufferDefaults[SHARED_BUFFER_FNUMS] = fnums.raw;
	/*SharedBufferDefaults[SHARED_BUFFER_DNUMS] = dnums.raw;*/

	/*SharedBufferCounts[SHARED_BUFFER_UINTS] = sizeof(uints) / sizeof(uint);*/
	SharedBufferCounts[SHARED_BUFFER_DINTS] = sizeof(dints) / sizeof(dint);
	SharedBufferCounts[SHARED_BUFFER_FNUMS] = sizeof(fnums) / sizeof(fnum);
	/*SharedBufferCounts[SHARED_BUFFER_DNUMS] = sizeof(dnums) / sizeof(dnum);*/

	for ( i = 0; i < SHARED_BUFFER_COUNT; ++i )
	{
		err = MakeBuffer
			( src, &(app->SharedBuffers[i]), SharedBufferVsizes[i] );
		if ( err )
		{
			PRINT_ERRNO( stdout, err );
			goto void_vectors;
		}

		buffer = app->SharedBuffers[i];
		err = InitBuffer
			( src, buffer, SharedBufferDefaults[i], SharedBufferCounts[i] );

		if ( err )
		{
			PRINT_ERRNO( stdout, err );
			goto void_vectors;
		}

		err = MakeVfxBuf( src, &(app->SharedVfxBufs[i]) );
		if ( err )
		{
			PRINT_ERRNO( stdout, err );
			goto void_vectors;
		}

		vfxbuf = app->SharedVfxBufs[i];
		err = MakeVfxVar( src, &(app->SharedVfxVars[i]) );
		if ( err )
		{
			PRINT_ERRNO( stdout, err );
			goto void_vectors;
		}

		vfxvar = app->SharedVfxVars[i];
		err = InitVfxVar( src, vfxvar, vfxbuf, SharedBufferNames[i] );
		if ( err )
		{
			PRINT_ERRNO( stdout, err );
			goto void_vectors;
		}

		err = FindVfxVar( src, vfxvar, vfxapp, NULL );
		if ( err )
		{
			/* Not fatal as it is an optional value */
			PRINT_LINE( stdout );
			printf("'%s' not found\n", SharedBufferNames[i] );
		}
	}
	...

Here’s the test “draw” loop:

	while ( !DeadVfxWin( win ) )
	{
		ucap i;
		SHARED_DINTS *dints =
			SeekBufferArray( app->SharedBuffers[SHARED_BUFFER_DINTS] );
		SHARED_FNUMS *fnums =
			SeekBufferArray( app->SharedBuffers[SHARED_BUFFER_FNUMS] );
		dint vertices[] = { 3, 3, 3 };
		vec3 points[sizeof(vertices)/sizeof(dint)] =
			{ { 0.25, 0.25, 0 }, { 0.75, 0.25, 0 }, { 0.5, 0.75, 0 } };
		BindVfxApp( src, vfxapp );
		fillVfxBoxCB( 0.0, 0.0, 0.0, 1.0 );
		zeroVfxBitCB( VFX_COLOR_BIT | VFX_DEPTH_BIT );

		if ( SeekKeyAct( win, KEY_ANSI_ESC ) == KEY_ISACTIVE )
		{
			KillVfxWin( src, win );
			break;
		}

		err = SeekVfxWinSize( win, &winH, &winW );

		if ( err )
			PRINT_ERRNO( stdout, err );

		err = SeekVfxWinPos( win, &winX, &winY );

		if ( err )
			PRINT_ERRNO( stdout, err );

		dints->mem.WinSpace.x = winW;
		dints->mem.WinSpace.y = winH;
		dints->mem.WinPoint.x = winX;
		dints->mem.WinPoint.y = winY;

		for ( i = 0; i < (sizeof(points) / sizeof(vec3)); ++i )
		{
			dints->mem.VtxCount = vertices[i];
			memcpy( fnums->mem.RegPoint.raw, points, sizeof(vec3) );
			DrawVfxCfg( src, vfxcfg, VFXDRAW_TRIOS );
		}

		SwapVfxWinBufs( src, win );
		PollWapi();
	}

I understand why uints & dnums are not found (since I have no use for them yet), but I don’t understand why dints & fnums are not found even though they are definitely used. I’m not yet expecting their output to be what I’m “feeding” them because I haven’t done the wrapper for their uniform types (float* & int*) but I do expect them to be locatable, why isn’t that the case?

zxuiji · January 5, 2022, 4:00pm

Did some more reading and edited my wrapper for finding shader symbols but still no luck:

dint libFindVfxVar( VFXVAR *vfxvar, VFXAPP *vfxapp, SHADER *shader )
{
	LIB_VFXAPP *app = vfxapp->libref;
	LIB_VFXVAR *var = vfxvar->libref;
	achs name = SeekAchsArray( vfxvar->name );

	if ( shader )
		var->loc = glGetAttribLocation( app->id, name );
	else if ( vfxvar->vfxbuf )
		var->loc = glGetUniformBlockIndex( app->id, name );
	else
		var->loc = glGetUniformLocation( app->id, name );

	vfxvar->used = ((var->loc) != (GLuint)-1);

	return -(!(vfxvar->used));
}

Alfonse_Reinheart · January 5, 2022, 4:13pm

All of your code is written against your own APIs, so it would be pretty much impossible for anyone else to be able to debug it. Have you considered using an OpenGL logging utility and looking at the sequence of OpenGL calls your program makes?

What does “expect likewise” mean here?

Because EndPrimitive() exists, the GPU cannot be unaware of the idea that a GS which outputs triangle strips can output multiple strips, and that it could in fact output one strip per triangle. Therefore, the GPU must “expect” that a triangle strip GS will output multiple strips.

In order for a GPU to make a GS work, there has to be some buffering between the primitives output by a GS and the rasterizer. There is a limitation on the amount of stuff you can stick into that buffer, beyond which the GPU can’t handle what you’re doing.

But this limitation is primarily about the amount of vertex data you write. If you write 12 vertices, you’ve written 12 vertices whether you wrote them as a single strip or as 4 individual triangles. As evidence for this, OpenGL has limitations on the total number of vertices a GS can output and the total size of the data of those vertices. But it doesn’t have a limit on the amount of primitives. So as far as OpenGL limitations are concerned, 12 vertices interpreted as a single strip or as 4 individual triangles take up the same space.

So if the arrangement of primitives was in fact a hardware concern, OpenGL’s API does not seem to address it. As such, we have no reason to assume that there would be any hardware benefit to knowing that a GS only outputs triangle lists instead of strips which could be individual triangles.

So unless you have specific knowledge of the actual implementation of GSs on some particular hardware, your concern here is entirely speculative.

No, vertex shaders only deal with vertices. They don’t deal with primitives of any kind.

None of what you said here makes sense. There is no “center” of a primitive. A “triangle fan” is not literal in the sense of the shape of a fan. It is a primitive where a sequence of vertices is converted into a sequence of triangles by making a triangle starting with the first vertex in the sequence and connecting it to the next two. A fan takes the vertex sequence ABCDEFG and creates the triangles (ABC)(ACD)(ADE)(AEF)(AFG).

It doesn’t know anything about the shape of the triangle, a center point, or anything of the kind. It’s simply a means to convert X vertices into X-2 triangles.

Overall, your problem seems to be that you have an intuitive sense about how graphics is supposed to work. But how graphics actually works doesn’t match your intuition. And you’re unwilling to let go of your intuition; you want reality to match the way you think it ought to work.

That kind of thinking will only poison your ability to actually use the tools given to you. So you should stop doing that.

zxuiji · January 5, 2022, 4:57pm

Well here’s the code, I’m not looking into utilities yet because I’m already aware of what’s being called, I DID make the wrapper API after all, the api just amounts to thin wrappers for the most part, the only things that do anything extra are mainly just general stuff like storing names and pointers (done prior to opengl calls to make debugging easier and also cleanup.

You just answered your own question there, but in case you don’t see it, likewise here means that even the devs new to the shader code/file will see that triangle_strip/line_strip and expect just those output primitives despite the output being just disjoint lines or triangles, the most I can do to circumvent that is just something like:

#define triangles triangle_strip
...
layout( triangles, max_vertices = # )

That’s only in the case that all vertices are handled at once, it’s perfectly possible to use a thread pool or just pass a primitive off to another core straight after it’s been constructed (like if & when EndPrimitive() is called), opengl should not be assuming that such situations will never happen, it’s fine to assume that it’s not supported by the graphics card but it should still not be enforcing limits on the number of vertices, it should let the gpu report the error, the gpu is perfectly capable of doing so and ignoring any further output from the geometry shader, it is the developer’s job to query the limitations of the gpu and determine what limits, if any, to place on his/her shaders

points & vertices are the same thing

"Alfonse_Reinheart:

zxuiji:

Another example is say if the geometry shader was set to triangle_fan while the expected output is GL_TRIANGLE_STRIP then the limits of the fan can be determined by take whatever is determined to be the furthest vertices from the center (which is easy to determine given it will always be the same for each triangle) and just using them to set the bounds of the expected triangle strip part

None of what you said here makes sense. There is no “center” of a primitive. A “triangle fan” is not literal in the sense of the shape of a fan. It is a primitive where a sequence of vertices is converted into a sequence of triangles by making a triangle starting with the first vertex in the sequence and connecting it to the next two. A fan takes the vertex sequence ABCDEFG and creates the triangles (ABC)(ACD)(ADE)(AEF)(AFG).

It doesn’t know anything about the shape of the triangle, a center point, or anything of the kind. It’s simply a means to convert X vertices into X-2 triangles.

But there IS a center point for triangle fans, there is ALWAYS a center point, you generate circles with fans, you generate semi-circles with fans, you generate quater circles with fans, the center is always the one point every single one of the resulting triangles in the fan connect, for example let’s say a circle being generated has just 5 vertices including the center vertex, the center in a normalised scope will be [0,0], the edge 4 would be something like [0,1],[1,0],[0,-1],[-1,0]. Each of the edge 4 would used to create a triangle that contains the center point and 1 other of the edge 4, even in the scenario that the triangles are made to point out there is still gonna be a center point, since the gpu knows how many triangles are being used to create the fan it just needs to find the center after finding the furthest x,y coordinates, that’s easy enough to do as the coords can be checked in the EmitVertex() call.

You may be right there, however I want to see how far I can go into forcing the geometry shader to behave the expected way before I give up entirely, the vertex limit is annoying but it’s not a deal breaker as far as the algorithm goes, just a hard limit to the number of excepted vertices, doesn’t stop the calls from occurring, just will likely cause the gpu to output an error and ignore the any further output, that’s not great but it’s still exceptable, I merely griping over the intentional limits that have no place in the graphics pipeline, most of the time I’ll be using the geometry shader to produce quadrilaterals for texturing anyways, the point is that if I wanted a circle with say 2049 vertices (including the center) then the only thing that should stop me from generating it in the geometry shader is a poor hardware implementation, not the shader language

Alfonse_Reinheart · January 5, 2022, 5:29pm

How “thin” it is is a matter of debate. To me, a wrapper is “thin” if you can easily see how it corresponds to the API it wraps. Yours isn’t that.

For example, what does MakeVfxVar do? I can see that it takes a src, which I might guess is some kind of shader. But how do you “make” a variable from a shader?

Unless said cores are all busy processing primitives generated by a previous GS invocation. That is, there’s no reason to assume that GS invocations cannot generate primitives faster than the rasterizer can process them. In which case, buffering is needed.

Plus, you never want the rasterizer to stop. When it goes looking for more data, it should be able to find some if the vertex processing stages can keep up with it. For GS’s, that requires buffering, since each GS invocation can generate a lot of primitive data.

No, they are not. A vertex is just a vertex. A point is a primitive that is composed of a vertex.

And you can generate rectangles and squares with fans:

  A-----B
  | \   |
  |  \  |
  |   \ |
  |    \|
  D-----C

The triangle fan primitive ABCD draws exactly like this. Vertex A is the first vertex, so the fan creates triangles with it and pairs of other vertices: ABC and ACD. At no point does the triangle fan algorithm know or care where any of these vertices are in space. None of these vertices are geometrically at the “center” of the polygon. Indeed, you can form any polygon with a fan like this.

Primitives do not care about the geometric position of any of their vertices. They’re dealing with the topology of a set of vertices, which is independent of their geometric position in space.

My problem is that you make these declarative statements based purely on how you want things to work, without any regard to how GPUs actually work.

I get that you want to just read a bunch of arbitrary data, process it, and shove a bunch of arbitrary data down the GPU’s throat. But the existing model exists because that model prioritizes parallelism. VS’s operate on a 1:1 basis with vertices because that allows each invocation to operate on a distinct dataset in parallel with others. GS’s operate on a fixed primitive of input data because that allows for better parallelism than if they could just read an arbitrary number of vertices in the stream. Etc.

Those limitations have a “place in the graphics pipeline” because they allow hardware to be efficient. Stop assuming that you know better than the makers of GPUs.

Again, NVIDIA’s mesh shaders work much more like what you want, but those are an NVIDIA-only hardware thing. Which goes back to my point: GPUs by and large are not designed to work this way, and they require specialized hardware to do so efficiently.

zxuiji · January 5, 2022, 6:25pm

Well assuming you took a quick glance at the url I gave to where I’m uploading my code you only had to do a quick search for SRC to find it’s just an abstract type for top thread objects to inherit so that non-thread safe actions can be taken without knowing anything about the thread object the developer is using (for example I’m using the THREAD structure from basic/thread.h but a developer is perfectly free to ignore that and make their own, as long as the object they make can provide a properly initialised SRC then none of the reliant APIs need to know anything about the structure they’re using.

You could have also noted the SHADER object encased in a #if 0 at the top of one of my code snippets to realise, no SRC has nothing to do with shaders, although it is perfectly possible for them to supply their own that references it (be a bit redundant but dev’s choice and all).

"Alfonse_Reinheart:

zxuiji:

That’s only in the case that all vertices are handled at once, it’s perfectly possible to use a thread pool or just pass a primitive off to another core straight after it’s been constructed (like if & when EndPrimitive() is called)

Unless said cores are all busy processing primitives generated by a previous GS invocation. That is, there’s no reason to assume that GS invocations cannot generate primitives faster than the rasterizer can process them. In which case, buffering is needed.

Plus, you never want the rasterizer to stop. When it goes looking for more data, it should be able to find some if the vertex processing stages can keep up with it. For GS’s, that requires buffering, since each GS invocation can generate a lot of primitive data.

That still boils down to hardware implementation, that’s not what I’m griping about, I’m griping about glsl effectively saying “I don’t care if the hardware might support it, I still won’t allow you to do this”. The ONLY refusal should be coming from the hardware, not the shader language, an example set of messages should look like this:

GLSL: I'll create the instructions,as for what the hardware says...
GPU: No, I'm not built for this

or

GLSL: I'll create the instructions,as for what the hardware says...
GPU: Yep, I can do this

That’s it, glsl should not be refusing to try ever

You’ll eventually realise you’re confusing points with particles but for now let’s just agree to disagree on this subject.

"Alfonse_Reinheart:

zxuiji:

But there IS a center point for triangle fans, there is ALWAYS a center point, you generate circles with fans, you generate semi-circles with fans, you generate quater circles with fans

And you can generate rectangles and squares with fans:
  A-----B
  | \   |
  |  \  |
  |   \ |
  |    \|
  D-----C
The triangle fan primitive ABCD draws exactly like this. Vertex A is the first vertex, so the fan creates triangles with it and pairs of other vertices: ABC and ACD. At no point does the triangle fan algorithm know or care where any of these vertices are in space. None of these vertices are geometrically at the “center” of the polygon. Indeed, you can form any polygon with a fan like this.

Just because that CAN be done, doesn’t mean that’s the generally understood meaning of a fan, the generally understood meaning is “a fan of triangles produces a curve” which to the gpu would mean “expect one point in every triangle of the fan to be the same”, on just 2 triangles it will only have candidates, on the 3rd triangle it will have enough data to see if it actually is a fan or should be treated as strips or just disjoint triangles, whatever the case it will have enough data to decide how to convert to another triangle type (if anything needs to be done at all)

No, I’m just expecting a compiler, even if it’s for shaders, to behave as JUST a compiler, it has no place refusing to support something when it is perfectly possible, if the hardware doesn’t support it then it’s a runtime issue for the developer to solve, nothing to do with the compiler or the language it’s compiling.

For example the compiler could query the hardware prior to compiling and see what it supports, then provide a bunch of precompiler defines for code to check for and adapt to, there is no reason the compiler should outright refuse to support something in the language it’s compiling, that was never the purpose of a compiler, hardware limitations are solely for the developer to deal with, compilers just need to report what they can while they’re compiling.

Again you refer to an unrelated subject, I’m not complaining about GPU limits, I’m complaining about a programming language enforcing limits, here’s a different way of putting it, stdout surely has a limit, be it by hardware or software limitation, but we can put as many calls to puts etc as we want but what if compilers where designed to say “No, you can only access stdout X times then I won’t add any more of your calls” don’t you think there would be an uproar? That’s what I’m getting at here, yes the gpu might ignore or refuse further calls to EmitVertex() but that is a RUNTIME/HARDWARE issue, it was never something that glsl needed to care about, the gpu can and likely does have a default size for related buffers, but that is a hardware implementation issue, not a shader language issue, forcefully making it a shader language issue only adds an uneccessary ball & chain to developers workflow, it’s the developers job to detect what implementation is in use and what to do with it, not the shader language’s.

zxuiji · January 5, 2022, 10:10pm

I tried to change the method of declaring my uniforms in shader to match exactly the same style as the document I’m referencing but now the shader won’t compile:

shaders/flat.glsl:39(19): error: syntax error, unexpected DOT_TOK, expecting '('

uniform struct fnums
{
	vec3	WinScale;
	vec3	RegScale;
	vec3	RegPoint;
	vec3	RegEmits;
	vec3	RegTakes;
};

uniform struct dints
{
	ivec3	WinSpace;
	ivec2	WinPoint;
	int		VtxCount;
	int		FlatLine;
};

vec4 edge_vertex( uint x, uint y )
{
	vec4 point;
	point.x = float(double(x) / dqtr);
	point.y = float(double(y) / dqtr);
	point.xy *= fnums.WinScale.xy * fnums.RegScale.xy;
	point.xy += fnums.RegPoint.xy;
	point.z   = fnums.RegPoint.z;
	point.w	  = 1.0;
	return point;
}

According to my text editor the line & character being referenced is this one: point.xy *= fnu

Any ideas?

Edit: Also this is the document I’m referencing:

https://www.geeks3d.com/20140704/gpu-buffers-introduction-to-opengl-3-1-uniform-buffers-objects/

I opted not to use (std140) because I want this to be usable on the earliest glsl variant possible at a later date and being too reliant on features from #version 440 would make it annoying to fix later

zxuiji · January 6, 2022, 11:49am

don’t think the page updated yet but anyways, I managed to locate by uniform “buffers” after finally finding something useful via google (was in stack overflow like usual), anyways now my shader code looks like this:

uniform FNUMS
{
	vec3	WinScale;
	vec3	RegScale;
	vec3	RegPoint;
	vec3	RegEmits;
	vec3	RegTakes;
} fnums;

uniform DINTS
{
	ivec3	WinSpace;
	ivec2	WinPoint;
	int		VtxCount;
	int		FlatLine;
} dints;

vec4 edge_vertex( uint x, uint y )
{
	vec4 point;
	point.x = float(double(x) / dqtr);
	point.y = float(double(y) / dqtr);
	point.xy *= fnums.WinScale.xy * fnums.RegScale.xy;
	point.xy += fnums.RegPoint.xy;
	point.z   = fnums.RegPoint.z;
	point.w	  = 1.0;
	return point;
}

And I search for FNUMS & DINTS instead of the expected fnums & dints, not at all intuitive