fp64 · December 18, 2025 04:17
diff --git a/gen.v1.0.cpp b/gen.v1.0.cpp
 // Public Domain under http://unlicense.org, see link for details.

 // Generator of test cases for PSP vdot instruction.
 // Version 1.0.

 #include <stddef.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>

 // NOTE: we use bitpatterns (as u32), not floats, throughout.
 // LSB of u32 is LSB of float's significand.
 typedef uint32_t u32;

 //==============================================================================
 // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 // ! The interface to the dot product function. !
 // ! Replace this with an actual implementation !
 // ! invoking PSP vdot instruction.             !
 // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

 static u32 vdot(const u32 *x,const u32 *y)
 {
    // Stub.
    (void)x;
    (void)y;
    return 0;
 }

 /*
 // Example 1. Accessing x86 dpps dot product instruction, GCC inline asm.
 static u32 vdot(const u32 *x,const u32 *y)
 {
    // NOTE: this is over-cautious.
    // You may not need __volatile__,
    // unless you plan to compute the
    // dot product twice and expect
    // different results (e.g. you
    // change rounding mode in between).
    // Also "memory" is likely only
    // needed if you don't specify
    // the size of inputs via casts.
    // See https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Clobbers-and-Scratch-Registers
    // for details.
    u32 z;
    __asm__ __volatile__(
        "movups %[x],%%xmm0\n\t"
        "movups %[y],%%xmm1\n\t"
        "dpps $241,%%xmm1,%%xmm0\n\t"
        "movd %%xmm0,%[z]\n\t"
        :[z]"=r"(z)
        :[x]"m"(*(const u32(*)[4])x)
        ,[y]"m"(*(const u32(*)[4])y)
        :"xmm0","xmm1","memory"
    );
    return z;
 }
 */

 /*
 // Example 2. Accessing x86 dpps dot product instruction, intrinsics.
 static u32 vdot(const u32 *x,const u32 *y)
 {
    float z=_mm_cvtss_f32(_mm_dp_ps(
        _mm_loadu_ps((const float *)x),
        _mm_loadu_ps((const float *)y),241));
    u32 ret;
    memcpy(&ret,&z,sizeof(ret));
    return ret;
 }
 */

 /*
 // Example 3. Scalar dot product.
 static u32 vdot(const u32 *x,const u32 *y)
 {
    float X[4],Y[4],ret;
    u32 bits;
    memcpy(X,x,sizeof(X));
    memcpy(Y,y,sizeof(Y));
    ret=(X[0]*Y[0]+X[1]*Y[1])+(X[2]*Y[2]+X[3]*Y[3]); // Matches summation order of DPPS.
    memcpy(&bits,&ret,sizeof(bits));
    return bits;
 }
 */

 //==============================================================================
 // Emitting test cases.

 static const char *filename="vdot.bin";
 static FILE *file=0;
 static bool include_input=true,include_output=true;

 static void emit(const u32 *x,const u32 *y)
 {
    u32 z=vdot(x,y);
    if(include_input)  fwrite(x,1,4*sizeof(u32),file);
    if(include_input)  fwrite(y,1,4*sizeof(u32),file);
    if(include_output) fwrite(&z,1,sizeof(u32),file);
 }

 // Emit all 24 permutations of the input order.
 // The order matches for x and y, there are no
 // x<->y swaps.
 // The computed result of the dot product may
 // or may not depend on it. E.g. x86 dpps
 // instruction is specified to be evaluated as
 // (x[0]*y[0]+x[1]*y[1])+(x[2]*y[2]+x[3]*y[3])
 static void emit_all_permutations(const u32 *x,const u32 *y)
 {
    int P[24]={228,180,216,120,156,108,225,177,201, 57,141, 45,210,114,198, 54, 78, 30,147, 99,135, 39, 75, 27};
    for(int i=0;i<24;++i)
    {
        u32 X[4],Y[4];
        for(int j=0;j<4;++j) X[j]=x[(P[i]>>(2*j))&3];
        for(int j=0;j<4;++j) Y[j]=y[(P[i]>>(2*j))&3];
        emit(X,Y);
    }
 }

 //==============================================================================
 // Embedding a message (comment) in the output file.

 static void message(const char *text)
 {
    char buf[7*sizeof(u32)]={0};
    u32 marker=0xFFFFFFFFu;
    snprintf(buf,sizeof(buf),text);
    if(include_input)  fwrite(&marker,1,sizeof(marker),file);
    if(include_input)  fwrite(buf,1,sizeof(buf),file);
    if(include_output) fwrite(&marker,1,sizeof(marker),file);
 }

 //==============================================================================
 // Floating-point values.

 static const u32 S=u32(1)<<31,E=u32(255)<<23,M=(u32(1)<<23)-1;
 static const u32 D=u32(1)<<23,ONE=127*D;

 //==============================================================================
 // Random number generation.

 // https://gist.github.com/fp64/126289051f0c9e1b70bcba7c3e07a82f
 static inline uint32_t u32noise2(uint64_t n)
 {
    n^=~n>>32; n*=0xAF251AF3B0F025B5ull;
    n^=~n>>32; n=(n|1ull)^(n*n);
    return (uint32_t)(n^(n>>32));
 }

 static uint64_t rng_seed=0;

 static inline u32 rnd(void)
 {
    return u32noise2(rng_seed++);
 }

 static inline u32 rnd(u32 n)
 {
    return rnd()%n;
 }

 static inline u32 rnd(u32 l,u32 h)
 {
    return l+rnd(h-l);
 }

 static inline u32 rnd12(void)
 {
    return ONE+(rnd()&M);
 }

 //==============================================================================
 // Main program.

 int main(int argc,char **argv)
 {
    for(int i=1;i<argc;++i)
    {
        if(!strcmp(argv[i],"--no-input"))  include_input =false;
        if(!strcmp(argv[i],"--no-output")) include_output=false;
        if(!strncmp(argv[i],"--dump=",7)) filename=argv[i]+7;
    }
    if(!(file=fopen(filename,"wb"))) return 1;
    //==============================================================================
    message("PSP vdot testcase generator.");
    message("Version 1.0.");
    if(true)
    {
        message("[1;2)");
        for(int k=0;k<1024;++k)
        {
            u32 t[8]={0};
            for(int i=0;i<8;++i) t[i]=rnd12();
            emit(t+0,t+4);
        }
    }
    if(true)
    {
        message("[1;2), perm");
        for(int k=0;k<256;++k)
        {
            u32 t[8]={0};
            for(int i=0;i<8;++i) t[i]=rnd12();
            emit_all_permutations(t+0,t+4);
        }
    }
    if(true)
    {
        message("+- [1;2)");
        for(int k=0;k<1024;++k)
        {
            u32 t[8]={0};
            for(int i=0;i<8;++i) {t[i]=rnd12(); t[i]|=rnd()&S;}
            emit(t+0,t+4);
        }
    }
    if(true)
    {
        message("+- [1;2), perm");
        for(int k=0;k<256;++k)
        {
            u32 t[8]={0};
            for(int i=0;i<8;++i) {t[i]=rnd12(); t[i]|=rnd()&S;}
            emit_all_permutations(t+0,t+4);
        }
    }
    if(true)
    {
        message("+- [1/256;256)");
        for(int k=0;k<4096;++k)
        {
            u32 t[8]={0};
            for(int i=0;i<8;++i) {t[i]=rnd12(); t[i]|=rnd()&S; t[i]+=D*rnd(-u32(8),+u32(8));}
            emit(t+0,t+4);
        }
    }
    if(true)
    {
        message("+- [1/256;256), perm");
        for(int k=0;k<256;++k)
        {
            u32 t[8]={0};
            for(int i=0;i<8;++i) {t[i]=rnd12(); t[i]|=rnd()&S; t[i]+=D*rnd(-u32(8),+u32(8));}
            emit_all_permutations(t+0,t+4);
        }
    }
    if(true)
    {
        message("0*0");
        for(int k=0;k<256;++k)
        {
            u32 t[8]={0};
            for(int i=0;i<8;++i) t[i]=((k>>i)&1?S:0);
            emit(t+0,t+4);
        }
    }
    if(true)
    {
        message("1*0");
        for(int k=0;k<256;++k)
        {
            u32 t[8]={0};
            for(int i=0;i<8;++i) t[i]=((k>>i)&1?S:0)|(i<4?ONE:0);
            emit(t+0,t+4);
        }
    }
    if(true)
    {
        message("Random");
        for(int k=0;k<4096;++k)
        {
            u32 t[8]={0};
            for(int i=0;i<8;++i) t[i]=rnd();
            emit(t+0,t+4);
        }
    }
    if(true)
    {
        message("Random, perm");
        for(int k=0;k<256;++k)
        {
            u32 t[8]={0};
            for(int i=0;i<8;++i) t[i]=rnd();
            emit_all_permutations(t+0,t+4);
        }
    }
    if(true)
    {
        message("1+x");
        for(int k=0;k<256;++k)
        {
            u32 t[8]={ONE,ONE,0,0,ONE,0,0,0};
            t[5]=rnd12()+2*D;
            for(int p=2;p>-32;--p)
            {
                emit(t+0,t+4);
                t[5]-=D;
            }
        }
    }
    if(true)
    {
        message("1-1+x");
        for(int k=0;k<256;++k)
        {
            u32 t[8]={ONE,ONE,ONE,0,ONE,S|ONE,0,0};
            t[6]=rnd12()+2*D;
            for(int p=2;p>-32;--p)
            {
                emit(t+0,t+4);
                t[6]-=D;
            }
        }
    }
    if(true)
    {
        message("1+x-1+y");
        for(int k=0;k<256;++k)
        {
            u32 t[8]={ONE,ONE,ONE,ONE,ONE,0,S|ONE,0};
            t[5]=rnd12()+2*D;
            t[7]=rnd12()+2*D;
            t[5]|=(S&rnd());
            t[7]|=(S&rnd());
            for(int p=2;p>-32;--p)
            {
                emit(t+0,t+4);
                t[5]-=D;
                t[7]-=D;
            }
        }
    }
    if(true)
    {
        message("1-(1-x)*(1+x)");
        for(int k=0;k<256;++k)
        {
            u32 t[8]={ONE,0,0,0,ONE,0,0,0};
            for(int p=22;p>=1;--p)
            {
                u32 x=rnd(u32(1)<<p);
                t[1]=ONE+x;
                t[5]=S|(ONE-2*x);
                emit(t+0,t+4);
            }
        }
    }
    if(true)
    {
        message("1-(1+x)*(1+y)");
        for(int k=0;k<256;++k)
        {
            u32 t[8]={ONE,0,0,0,ONE,0,0,0};
            for(int p=22;p>=1;--p)
            {
                t[1]=   ONE+rnd(-(u32(1)<<p),+(u32(1)<<p));
                t[5]=S|(ONE+rnd(-(u32(1)<<p),+(u32(1)<<p)));
                emit(t+0,t+4);
            }
        }
    }
    if(true)
    {
        message("A*B-C*D");
        for(int k=0;k<256;++k)
        {
            u32 t[8]={0};
            for(int p=20;p>=1;--p)
            {
                t[0]=   ONE+rnd(-(u32(1)<<p),+(u32(1)<<p));
                t[1]=   ONE+rnd(-(u32(1)<<p),+(u32(1)<<p));
                t[4]=   ONE+rnd(-(u32(1)<<p),+(u32(1)<<p));
                t[5]=S|(ONE+rnd(-(u32(1)<<p),+(u32(1)<<p)));
                emit(t+0,t+4);
            }
        }
    }
    if(true)
    {
        message("2^n*(A*B-C*D)");
        for(int k=0;k<256;++k)
        {
            u32 t[8]={0};
            u32 c=4096;
            u32 xA=rnd(c);
            u32 xB=rnd(c);
            u32 xC=rnd(c);
            u32 xD=rnd(c);
            for(u32 p=0;p<256;++p)
            {
                u32 e=p*D;
                t[0]=   e+xA;
                t[1]=   e+xB;
                t[4]=   e+xC;
                t[5]=S|(e+xD);
                emit(t+0,t+4);
            }
        }
    }
    if(true)
    {
        message("A*B-C*D, low 0s");
        for(int k=0;k<256;++k)
        {
            u32 t[8]={0};
            t[0]=   rnd12();
            t[1]=   rnd12();
            t[4]=   rnd12();
            t[5]=S|(rnd12());
            for(int p=1;p<22;++p)
            {
                t[0]&=(u32(-1)<<p);
                t[1]&=(u32(-1)<<p);
                t[4]&=(u32(-1)<<p);
                t[5]&=(u32(-1)<<p);
                emit(t+0,t+4);
            }
        }
    }
    if(true)
    {
        message("1+k*2^n");
        for(u32 k=0,p=8;k<(u32(1)<<p);++k)
        {
            u32 t[8]={ONE,ONE,0,0,ONE,0,0,0};
            t[5]=ONE+(k<<(23-p));
            for(int n=0;n>-32;--n)
            {
                emit(t+0,t+4);
                t[5]-=D;
            }
        }
    }
    if(true)
    {
        message("0/1/inf/nan");
        u32 s[4]={0,ONE,E,E+1};
        for(u32 k=0;k<256;++k)
        {
            u32 t[8]={0};
            for(int i=0;i<4;++i) t[(i&1)+4*(i>>1)]=s[(k>>(2*i))&3];
            emit(t+0,t+4);
        }
    }
    //==============================================================================
    fclose(file);
    return 0;
 }
	// Public Domain under http://unlicense.org, see link for details.

	// Generator of test cases for PSP vdot instruction.
	// Version 1.0.

	#include <stddef.h>
	#include <stdint.h>
	#include <stdio.h>
	#include <string.h>

	// NOTE: we use bitpatterns (as u32), not floats, throughout.
	// LSB of u32 is LSB of float's significand.
	typedef uint32_t u32;

	//==============================================================================
	// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
	// ! The interface to the dot product function. !
	// ! Replace this with an actual implementation !
	// ! invoking PSP vdot instruction. !
	// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

	static u32 vdot(const u32 x,const u32 y)
	{
	// Stub.
	(void)x;
	(void)y;
	return 0;
	}

	/*
	// Example 1. Accessing x86 dpps dot product instruction, GCC inline asm.
	static u32 vdot(const u32 x,const u32 y)
	{
	// NOTE: this is over-cautious.
	// You may not need __volatile__,
	// unless you plan to compute the
	// dot product twice and expect
	// different results (e.g. you
	// change rounding mode in between).
	// Also "memory" is likely only
	// needed if you don't specify
	// the size of inputs via casts.
	// See https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Clobbers-and-Scratch-Registers
	// for details.
	u32 z;
	__asm__ __volatile__(
	"movups %[x],%%xmm0\n\t"
	"movups %[y],%%xmm1\n\t"
	"dpps $241,%%xmm1,%%xmm0\n\t"
	"movd %%xmm0,%[z]\n\t"
	:[z]"=r"(z)
	:[x]"m"((const u32()[4])x)
	,[y]"m"((const u32()[4])y)
	:"xmm0","xmm1","memory"
	);
	return z;
	}
	*/

	/*
	// Example 2. Accessing x86 dpps dot product instruction, intrinsics.
	static u32 vdot(const u32 x,const u32 y)
	{
	float z=_mm_cvtss_f32(_mm_dp_ps(
	_mm_loadu_ps((const float *)x),
	_mm_loadu_ps((const float *)y),241));
	u32 ret;
	memcpy(&ret,&z,sizeof(ret));
	return ret;
	}
	*/

	/*
	// Example 3. Scalar dot product.
	static u32 vdot(const u32 x,const u32 y)
	{
	float X[4],Y[4],ret;
	u32 bits;
	memcpy(X,x,sizeof(X));
	memcpy(Y,y,sizeof(Y));
	ret=(X[0]Y[0]+X[1]Y[1])+(X[2]Y[2]+X[3]Y[3]); // Matches summation order of DPPS.
	memcpy(&bits,&ret,sizeof(bits));
	return bits;
	}
	*/

	//==============================================================================
	// Emitting test cases.

	static const char *filename="vdot.bin";
	static FILE *file=0;
	static bool include_input=true,include_output=true;

	static void emit(const u32 x,const u32 y)
	{
	u32 z=vdot(x,y);
	if(include_input) fwrite(x,1,4*sizeof(u32),file);
	if(include_input) fwrite(y,1,4*sizeof(u32),file);
	if(include_output) fwrite(&z,1,sizeof(u32),file);
	}

	// Emit all 24 permutations of the input order.
	// The order matches for x and y, there are no
	// x<->y swaps.
	// The computed result of the dot product may
	// or may not depend on it. E.g. x86 dpps
	// instruction is specified to be evaluated as
	// (x[0]y[0]+x[1]y[1])+(x[2]y[2]+x[3]y[3])
	static void emit_all_permutations(const u32 x,const u32 y)
	{
	int P[24]={228,180,216,120,156,108,225,177,201, 57,141, 45,210,114,198, 54, 78, 30,147, 99,135, 39, 75, 27};
	for(int i=0;i<24;++i)
	{
	u32 X[4],Y[4];
	for(int j=0;j<4;++j) X[j]=x[(P[i]>>(2*j))&3];
	for(int j=0;j<4;++j) Y[j]=y[(P[i]>>(2*j))&3];
	emit(X,Y);
	}
	}

	//==============================================================================
	// Embedding a message (comment) in the output file.

	static void message(const char *text)
	{
	char buf[7*sizeof(u32)]={0};
	u32 marker=0xFFFFFFFFu;
	snprintf(buf,sizeof(buf),text);
	if(include_input) fwrite(&marker,1,sizeof(marker),file);
	if(include_input) fwrite(buf,1,sizeof(buf),file);
	if(include_output) fwrite(&marker,1,sizeof(marker),file);
	}

	//==============================================================================
	// Floating-point values.

	static const u32 S=u32(1)<<31,E=u32(255)<<23,M=(u32(1)<<23)-1;
	static const u32 D=u32(1)<<23,ONE=127*D;

	//==============================================================================
	// Random number generation.

	// https://gist.github.com/fp64/126289051f0c9e1b70bcba7c3e07a82f
	static inline uint32_t u32noise2(uint64_t n)
	{
	n^=~n>>32; n*=0xAF251AF3B0F025B5ull;
	n^=~n>>32; n=(n\|1ull)^(n*n);
	return (uint32_t)(n^(n>>32));
	}

	static uint64_t rng_seed=0;

	static inline u32 rnd(void)
	{
	return u32noise2(rng_seed++);
	}

	static inline u32 rnd(u32 n)
	{
	return rnd()%n;
	}

	static inline u32 rnd(u32 l,u32 h)
	{
	return l+rnd(h-l);
	}

	static inline u32 rnd12(void)
	{
	return ONE+(rnd()&M);
	}

	//==============================================================================
	// Main program.

	int main(int argc,char **argv)
	{
	for(int i=1;i<argc;++i)
	{
	if(!strcmp(argv[i],"--no-input")) include_input =false;
	if(!strcmp(argv[i],"--no-output")) include_output=false;
	if(!strncmp(argv[i],"--dump=",7)) filename=argv[i]+7;
	}
	if(!(file=fopen(filename,"wb"))) return 1;
	//==============================================================================
	message("PSP vdot testcase generator.");
	message("Version 1.0.");
	if(true)
	{
	message("[1;2)");
	for(int k=0;k<1024;++k)
	{
	u32 t[8]={0};
	for(int i=0;i<8;++i) t[i]=rnd12();
	emit(t+0,t+4);
	}
	}
	if(true)
	{
	message("[1;2), perm");
	for(int k=0;k<256;++k)
	{
	u32 t[8]={0};
	for(int i=0;i<8;++i) t[i]=rnd12();
	emit_all_permutations(t+0,t+4);
	}
	}
	if(true)
	{
	message("+- [1;2)");
	for(int k=0;k<1024;++k)
	{
	u32 t[8]={0};
	for(int i=0;i<8;++i) {t[i]=rnd12(); t[i]\|=rnd()&S;}
	emit(t+0,t+4);
	}
	}
	if(true)
	{
	message("+- [1;2), perm");
	for(int k=0;k<256;++k)
	{
	u32 t[8]={0};
	for(int i=0;i<8;++i) {t[i]=rnd12(); t[i]\|=rnd()&S;}
	emit_all_permutations(t+0,t+4);
	}
	}
	if(true)
	{
	message("+- [1/256;256)");
	for(int k=0;k<4096;++k)
	{
	u32 t[8]={0};
	for(int i=0;i<8;++i) {t[i]=rnd12(); t[i]\|=rnd()&S; t[i]+=D*rnd(-u32(8),+u32(8));}
	emit(t+0,t+4);
	}
	}
	if(true)
	{
	message("+- [1/256;256), perm");
	for(int k=0;k<256;++k)
	{
	u32 t[8]={0};
	for(int i=0;i<8;++i) {t[i]=rnd12(); t[i]\|=rnd()&S; t[i]+=D*rnd(-u32(8),+u32(8));}
	emit_all_permutations(t+0,t+4);
	}
	}
	if(true)
	{
	message("0*0");
	for(int k=0;k<256;++k)
	{
	u32 t[8]={0};
	for(int i=0;i<8;++i) t[i]=((k>>i)&1?S:0);
	emit(t+0,t+4);
	}
	}
	if(true)
	{
	message("1*0");
	for(int k=0;k<256;++k)
	{
	u32 t[8]={0};
	for(int i=0;i<8;++i) t[i]=((k>>i)&1?S:0)\|(i<4?ONE:0);
	emit(t+0,t+4);
	}
	}
	if(true)
	{
	message("Random");
	for(int k=0;k<4096;++k)
	{
	u32 t[8]={0};
	for(int i=0;i<8;++i) t[i]=rnd();
	emit(t+0,t+4);
	}
	}
	if(true)
	{
	message("Random, perm");
	for(int k=0;k<256;++k)
	{
	u32 t[8]={0};
	for(int i=0;i<8;++i) t[i]=rnd();
	emit_all_permutations(t+0,t+4);
	}
	}
	if(true)
	{
	message("1+x");
	for(int k=0;k<256;++k)
	{
	u32 t[8]={ONE,ONE,0,0,ONE,0,0,0};
	t[5]=rnd12()+2*D;
	for(int p=2;p>-32;--p)
	{
	emit(t+0,t+4);
	t[5]-=D;
	}
	}
	}
	if(true)
	{
	message("1-1+x");
	for(int k=0;k<256;++k)
	{
	u32 t[8]={ONE,ONE,ONE,0,ONE,S\|ONE,0,0};
	t[6]=rnd12()+2*D;
	for(int p=2;p>-32;--p)
	{
	emit(t+0,t+4);
	t[6]-=D;
	}
	}
	}
	if(true)
	{
	message("1+x-1+y");
	for(int k=0;k<256;++k)
	{
	u32 t[8]={ONE,ONE,ONE,ONE,ONE,0,S\|ONE,0};
	t[5]=rnd12()+2*D;
	t[7]=rnd12()+2*D;
	t[5]\|=(S&rnd());
	t[7]\|=(S&rnd());
	for(int p=2;p>-32;--p)
	{
	emit(t+0,t+4);
	t[5]-=D;
	t[7]-=D;
	}
	}
	}
	if(true)
	{
	message("1-(1-x)*(1+x)");
	for(int k=0;k<256;++k)
	{
	u32 t[8]={ONE,0,0,0,ONE,0,0,0};
	for(int p=22;p>=1;--p)
	{
	u32 x=rnd(u32(1)<<p);
	t[1]=ONE+x;
	t[5]=S\|(ONE-2*x);
	emit(t+0,t+4);
	}
	}
	}
	if(true)
	{
	message("1-(1+x)*(1+y)");
	for(int k=0;k<256;++k)
	{
	u32 t[8]={ONE,0,0,0,ONE,0,0,0};
	for(int p=22;p>=1;--p)
	{
	t[1]= ONE+rnd(-(u32(1)<<p),+(u32(1)<<p));
	t[5]=S\|(ONE+rnd(-(u32(1)<<p),+(u32(1)<<p)));
	emit(t+0,t+4);
	}
	}
	}
	if(true)
	{
	message("AB-CD");
	for(int k=0;k<256;++k)
	{
	u32 t[8]={0};
	for(int p=20;p>=1;--p)
	{
	t[0]= ONE+rnd(-(u32(1)<<p),+(u32(1)<<p));
	t[1]= ONE+rnd(-(u32(1)<<p),+(u32(1)<<p));
	t[4]= ONE+rnd(-(u32(1)<<p),+(u32(1)<<p));
	t[5]=S\|(ONE+rnd(-(u32(1)<<p),+(u32(1)<<p)));
	emit(t+0,t+4);
	}
	}
	}
	if(true)
	{
	message("2^n(AB-C*D)");
	for(int k=0;k<256;++k)
	{
	u32 t[8]={0};
	u32 c=4096;
	u32 xA=rnd(c);
	u32 xB=rnd(c);
	u32 xC=rnd(c);
	u32 xD=rnd(c);
	for(u32 p=0;p<256;++p)
	{
	u32 e=p*D;
	t[0]= e+xA;
	t[1]= e+xB;
	t[4]= e+xC;
	t[5]=S\|(e+xD);
	emit(t+0,t+4);
	}
	}
	}
	if(true)
	{
	message("AB-CD, low 0s");
	for(int k=0;k<256;++k)
	{
	u32 t[8]={0};
	t[0]= rnd12();
	t[1]= rnd12();
	t[4]= rnd12();
	t[5]=S\|(rnd12());
	for(int p=1;p<22;++p)
	{
	t[0]&=(u32(-1)<<p);
	t[1]&=(u32(-1)<<p);
	t[4]&=(u32(-1)<<p);
	t[5]&=(u32(-1)<<p);
	emit(t+0,t+4);
	}
	}
	}
	if(true)
	{
	message("1+k*2^n");
	for(u32 k=0,p=8;k<(u32(1)<<p);++k)
	{
	u32 t[8]={ONE,ONE,0,0,ONE,0,0,0};
	t[5]=ONE+(k<<(23-p));
	for(int n=0;n>-32;--n)
	{
	emit(t+0,t+4);
	t[5]-=D;
	}
	}
	}
	if(true)
	{
	message("0/1/inf/nan");
	u32 s[4]={0,ONE,E,E+1};
	for(u32 k=0;k<256;++k)
	{
	u32 t[8]={0};
	for(int i=0;i<4;++i) t[(i&1)+4(i>>1)]=s[(k>>(2i))&3];
	emit(t+0,t+4);
	}
	}
	//==============================================================================
	fclose(file);
	return 0;
	}
No results found