Skip to content

Instantly share code, notes, and snippets.

@johnwbyrd
Created February 17, 2026 04:25
Show Gist options
  • Select an option

  • Save johnwbyrd/cb75a1844661677fe614bc39f171fe39 to your computer and use it in GitHub Desktop.

Select an option

Save johnwbyrd/cb75a1844661677fe614bc39f171fe39 to your computer and use it in GitHub Desktop.
Demonstrates 7 wrong results in Berkeley SoftFloat 3e extFloat80 operations for valid inputs
/*
* demo_extf80_bugs.c -- Demonstrate incorrect SoftFloat 3e results
*
* Every input below is a valid extFloat80 encoding with an unambiguous
* mathematical value. The extFloat80 format stores the integer bit (J)
* explicitly, so every combination of exponent and significand is defined.
*
* Build (from this directory):
* gcc -O2 -DSOFTFLOAT_FAST_INT64 -DLITTLEENDIAN \
* -I ../../../berkeley-softfloat-3/source/include \
* -I ../../../berkeley-softfloat-3/build/Linux-x86_64-GCC \
* demo_extf80_bugs.c \
* ../../../berkeley-softfloat-3/build/Linux-x86_64-GCC/softfloat.a \
* -o demo_extf80_bugs
*/
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include "softfloat.h"
/* Pack an extFloat80 from sign+exponent and 64-bit significand. */
static extFloat80_t pack80(uint16_t signExp, uint64_t sig)
{
union { struct extFloat80M s; extFloat80_t f; } u;
u.s.signExp = signExp;
u.s.signif = sig;
return u.f;
}
/* Unpack for display. */
static void unpack80(extFloat80_t v, uint16_t *signExp, uint64_t *sig)
{
union { struct extFloat80M s; extFloat80_t f; } u;
u.f = v;
*signExp = u.s.signExp;
*sig = u.s.signif;
}
static void show(const char *label, extFloat80_t v)
{
uint16_t se; uint64_t sig;
unpack80(v, &se, &sig);
printf(" %-12s = %04X.%016llX\n", label, se, (unsigned long long)sig);
}
static void heading(const char *desc)
{
printf("\n--- %s ---\n", desc);
}
int main(void)
{
extFloat80_t a, b, r;
softfloat_roundingMode = softfloat_round_near_even;
extF80_roundingPrecision = 80;
/*
* Bug 1: Addition conjures value from nothing.
*
* {exp=0x3FFF, sig=0} is an unnormal-zero.
* The significand is 0, so the value is 0 regardless of the exponent.
* Adding it to itself should give 0.
*/
heading("Bug 1: 0 + 0 should be 0");
a = pack80(0x3FFF, UINT64_C(0x0000000000000000));
printf(" Input: {exp=0x3FFF, sig=0} -- value = 0 (sig is zero)\n");
r = extF80_add(a, a);
show("0 + 0", r);
printf(" Expected: 0000.0000000000000000 (= 0)\n");
/*
* Bug 2: 0.5 + 0.5 = 3.0
*
* {exp=0x3FFF, sig=0x4000000000000000} has J=0.
* Value = 2^(0x3FFF-16383) * 0x4000000000000000 / 2^63 = 1.0 * 0.5 = 0.5
*/
heading("Bug 2: 0.5 + 0.5 should be 1.0");
a = pack80(0x3FFF, UINT64_C(0x4000000000000000));
printf(" Input: {exp=0x3FFF, sig=0x4000..0} -- value = 0.5\n");
r = extF80_add(a, a);
show("0.5 + 0.5", r);
printf(" Expected: 3FFF.8000000000000000 (= 1.0)\n");
/*
* Bug 3: Finite value promoted to infinity.
*
* {exp=0x7FFE, sig=0x7FFFFFFFFFFFFFFF} has J=0.
* This is a large but finite value (just below the max normal).
* Adding 0 should return it unchanged.
*/
heading("Bug 3: large_finite + 0 = +Inf");
a = pack80(0x7FFE, UINT64_C(0x7FFFFFFFFFFFFFFF));
b = pack80(0x0000, UINT64_C(0x0000000000000000));
printf(" Input: {exp=0x7FFE, sig=0x7FFF..F} -- finite, near max\n");
r = extF80_add(a, b);
show("big + 0", r);
printf(" Expected: 7FFD.FFFFFFFFFFFFFFFE (finite)\n");
/*
* Bug 4: Pseudo-denormal addition gives zero.
*
* {exp=0, sig=0x8000000000000000} is a pseudo-denormal with J=1.
* Value = 2^(1-16383) * 1.0 = 2^(-16382).
* Adding it to itself should give 2^(-16381).
*/
heading("Bug 4: 2^(-16382) + 2^(-16382) should be 2^(-16381)");
a = pack80(0x0000, UINT64_C(0x8000000000000000));
printf(" Input: {exp=0, sig=0x8000..0} -- pseudo-denormal = 2^(-16382)\n");
r = extF80_add(a, a);
show("a + a", r);
printf(" Expected: 0002.8000000000000000 (= 2^(-16381))\n");
/*
* Bug 5: Subtraction of nearly-equal values gives huge wrong answer.
*
* A = {exp=1, sig=0x8000000000000000} = 2^(-16382)
* B = {exp=0, sig=0x8000000000000001} = 2^(-16382) + 2^(-16445)
* A - B should be approximately -2^(-16445) (tiny negative).
*/
heading("Bug 5: A - B where A ~ B gives wrong sign and magnitude");
a = pack80(0x0001, UINT64_C(0x8000000000000000));
b = pack80(0x0000, UINT64_C(0x8000000000000001));
printf(" A = {exp=1, sig=0x8000..0} = 2^(-16382)\n");
printf(" B = {exp=0, sig=0x8000..1} = 2^(-16382) + 2^(-16445)\n");
r = extF80_sub(a, b);
show("A - B", r);
printf(" Expected: 8000.0000000000000001 (tiny negative)\n");
/*
* Bug 6: Infinity times zero = Infinity (should be NaN).
*
* A = canonical +Inf = {exp=0x7FFF, sig=0x8000000000000000}
* B = unnormal-zero = {exp=0x0001, sig=0x0000000000000000}
* B has sig=0, so its value is 0. Inf * 0 is undefined (NaN).
*/
heading("Bug 6: Inf * 0 should be NaN (invalid)");
a = pack80(0x7FFF, UINT64_C(0x8000000000000000));
b = pack80(0x0001, UINT64_C(0x0000000000000000));
printf(" A = +Inf (canonical)\n");
printf(" B = {exp=1, sig=0} -- value = 0 (sig is zero)\n");
softfloat_exceptionFlags = 0;
r = extF80_mul(a, b);
show("Inf * 0", r);
printf(" Flags: %s%s\n",
(softfloat_exceptionFlags & softfloat_flag_invalid) ? "invalid " : "",
(softfloat_exceptionFlags == 0) ? "(none)" : "");
printf(" Expected: NaN with invalid flag\n");
/*
* Bug 7: rem(1.0, Inf) gives NaN (should be 1.0).
*
* B = pseudo-infinity = {exp=0x7FFF, sig=0x0000000000000000}
* This is infinity (exp=max, fraction=0).
* rem(finite, Inf) = the finite operand, unchanged.
*/
heading("Bug 7: rem(1.0, Inf) should be 1.0");
a = pack80(0x3FFF, UINT64_C(0x8000000000000000));
b = pack80(0x7FFF, UINT64_C(0x0000000000000000));
printf(" A = 1.0 (canonical)\n");
printf(" B = +Inf (pseudo-infinity: exp=0x7FFF, sig=0)\n");
softfloat_exceptionFlags = 0;
r = extF80_rem(a, b);
show("rem(1, Inf)", r);
printf(" Flags: %s%s\n",
(softfloat_exceptionFlags & softfloat_flag_invalid) ? "invalid " : "",
(softfloat_exceptionFlags == 0) ? "(none)" : "");
printf(" Expected: 3FFF.8000000000000000 (= 1.0, no flags)\n");
printf("\n");
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment