Hi oglers
Slighty OT ? I’m trying to get a couple of octaves of perlin-esque noise out of a vertex shader with room to spare for some other calcs.
The smallest 3D noise shader I’ve seen is the vnoise effect from the Nvidia SDK/Effects browser, which is 127 instructions for 1 octave of 3d noise. This saves instructions by doing a 32 element array lookup and doesnt use an ‘easy’ function to interpolate between lattice values.
The one I’ve come up with is 66 instruction and uses a hashing function to determine to lattice noise values (rather than a lookup) and does a nice easy between them for smooth noise.
the basic hash function is:
n = x+y57.0+z17.0;
n = (n*(nn15731.0+789221.0)+1376312589.0);
n = n / 65535.0;
n = n - floor(n);
n = n * 2.0 - 1.0;
The vertex shader follows so any comments/ideas on making it smaller please! It’s a new version that I haven’t tested yet so there may be some typos’
!!VP1.0
Vertex program procedural noise
Faked Perlin-style 3D Noise
repeatable random numbers for given vertex position.
Interpolates bilinearly between 4 neighbours to give a smooth result
v1.1
66 instructions but I’m sure I can save a few more
Rob James 2002
pocketmoon@ntlworld.com
#c16 1 2 3 4
#c13 freq amp 3.0 1.0
#c18 1 57 17 0
#c19 15731.0 789221.0 1376312589.0
#scale up the vector
MAD R0, v[OPOS], c[13].x, c[13].w;
#calc the fractional parts and store in R10
EXP R3.y, R0.x;
MOV R10.x, R3.y;
EXP R10 .y, R0.y;
EXP R3.y, R0.z;
MOV R10.z, R3.y;
#floor (R0)
ADD R0 , R0, -R10;
#calc the slerp parts sx sy sz and stick in R11
MAD R3, R10, -c[16].yyyy, c[16].zzzz;
MUL R4, R3, R10;
MUL R11, R4, R10;
#now look up eight noise values in R12 and R13
#hash into R3 and R4 initially using
n = x + 57 * y + 17 * z
MOV R1, R0;
#MUL R2, R0 , c[18];
#DP3 R3.x, R2, c[18].xxxx;
DP3 R3.x, R0, c[18];
ADD R0.y, R0.y, c[18].x;
DP3 R3.y, R0, c[18];
ADD R0 .z, R0.z, c[18].x;
DP3 R3.w, R0, c[18];
ADD R0.y, R0.y, -c[18].x;
DP3 R3.z, R0, c[18];
#hmm
ADD R0.z , R0.z, c[18].x;
DP3 R4.x, R0, c[18];
ADD R0.y, R0.y, c[18].x;
DP3 R4.w, R0, c[18];
ADD R0.x, R0.x, -c[18].x;
DP3 R4.y, R0, c[18];
ADD R0.y, R0.y, -c[18].x;
DP3 R4.x, R0, c[18];
#now do the mashing!
n = (n*(nn15731)+789221)+1376312589
n = n /65535
dont ask how I came by those numbers!
MUL R1, R3, R3;
MAD R2, R1, c[19].xxxx, c[19].yyyy;
MAD R1, R3, R2, c[19].zzzz;
MUL R3, R1, c[20].xxxx;
MUL R1, R4, R4;
MAD R2, R1, c[19].xxxx, c[19].yyyy;
MAD R1, R4, R2, c[19].zzzz;
MUL R4, R1, c[20].xxxx;
#keep fractional part
EXP R1.y, R3.x;
MOV R3.x, R1.y;
EXP R1.y, R3.y;
MOV R3.y, R1.y;
EXP R1.y, R3.z;
MOV R3.z, R1.y;
EXP R1.y, R3.w;
MOV R3.w, R1.y;
EXP R1.y, R4.x;
MOV R4.x, R1.y;
EXP R1.y, R4.y;
MOV R4.y, R1.y;
EXP R1.y, R4.z;
MOV R4.z, R1.y;
EXP R1.y, R4.w;
MOV R4.w, R1.y;
#multiply by 2 and subtact 1
#this gives a range -1 to 1
MAD R1, R3, c[16].yyyy, -c[16].xxxx;
MAD R2, R4, c[16].yyyy, -c[16].xxxx;
#a = u + t * (v - u)
#NB Proper slerps
#This does 4 at once
ADD R4, R2, -R1;
MAD R3, R4, R11.xxxx, R1;
#two slerps
MOV R4, R3.ywww;
ADD R1, R4, -R3;
MAD R2, R1, R11.yyyy, R3;
#one final slerp
MOV R3.x, R2.y;
ADD R1, R3, -R2;
MAD R3, R1, R11.zzzz, R2;
MOV R8.w, c[12].w;
MUL R8.xyz, v[NRML], R3.x;
MAD R8.xyz, R8, c[13].y, v[OPOS];
MAX o[COL0], R3, -R3;
DP4 o[HPOS].x, c[0], R8;
DP4 o[HPOS].y, c[1], R8;
DP4 o[HPOS].z, c[2], R8;
DP4 o[HPOS].w, c[3], R8;
END