digitalmars.D - The Joy of Signalling NaNs! (A compiler patch)

Don (185/185) Mar 18 2009 It's great that D initializes floating-point variables to NaN, instead

dennis luehring (4/4) Mar 19 2009 i talked about the signaling NaN exception "feature" to coworkers

Don (10/15) Mar 19 2009 Yup, I've had many NaN nightmares. Whenever you have an uninitialized

Mason Green (6/11) Mar 19 2009 I second adding this feature to D. I've spent many, many hours chasing

Nick Sabalausky (4/15) Mar 19 2009 Warnings or errors on the use of uninitialized variables would also solv...

Walter Bright (1/1) Mar 19 2009 Pls add to bugzilla as an enhancement request!

Don (2/3) Mar 19 2009 2746.

bearophile (4/6) Mar 19 2009 It may be OK in the specs too.
Andrei Alexandrescu (5/12) Mar 19 2009 That's terrific, Don. I've been bitten by NaNs in calculations many
Georg Wrede (8/15) Mar 19 2009 This being the least significant bit should show to everybody that the

Walter Bright (6/7) Mar 19 2009 I originally had this in the Digital Mars C compiler years and years

Don (9/18) Mar 19 2009 I noticed there was a place in the backend where it was careful to

Walter Bright (4/24) Mar 19 2009 You can still know it's not the case for C/C++ given the crappy support

Fawzi Mohamed (3/218) Mar 20 2009 Great work Don!

Don <nospam nospam.com> writes:

It's great that D initializes floating-point variables to NaN, instead 
of whatever random garbage happened to be in RAM.
But, if your calculation ends up with a NaN, you have to work out where 
it came from. Worse, the NaN might not necessarily
be visible in your final results, but you results may nonetheless be wrong.

The hardware has excellent support for debugging these problems -- all 
you need to do is activitating the floating-point 'invalid' trap,
and you'll get a hardware exception whenever you _create_ a NaN. What 
about uninitialized variables?
Signalling NaNs are designed for exactly this situation. The instant you 
access a signalling NaN, a hardware exception occurs,
and you drop straight into your debugger (just like accessing a null 
pointer).
But this could only work if the compiler initialized every uninitialised 
floating-point variable to a signalling NaN.

Now that we have access to the backend (thanks Walter!), we can do 
exactly that. My patch(below) is enabled only when compiled with DMC.
real.nan is unchanged, and won't cause exceptions if you use it, but 
real.init is now a signalling nan.
This doesn't make any difference to anything, until you enable FP 
exceptions.
And when you do, if no exceptions occur, you can use the code coverage 
feature to give very high confidence that you're not using any 
uninitialised floating-point variables.

I propose that this should become part of DMD. It doesn't need to be in 
the spec, it's primarily for debugging.

Don.

==============
Example of usage:
==============

void main()
{
     double a, b, c;
     a*=7;        // Exactly the same as it is now, a is nan.

     enableExceptions();

     c = 6;      // ok, c is initialized now
     c *= 10;
     b *= 10;    // BANG ! Straight into the debugger
     b *= 5;

     disableExceptions();
}

-------

void enableExceptions() {
     version(D_InlineAsm_X86) {
          short cont;
          asm {
              fclex;
              fstcw cont;
              mov AX, cont;
              and AX, 0xFFFE; // enable invalid exception
              mov cont, AX;
              fldcw cont;
          }
      }
  }

void disableExceptions() {
     version(D_InlineAsm_X86) {
          short cont;
          asm {
              fclex;
              fstcw cont;
              mov AX, cont;
              or AX, 0x1; // disable invalid exception
              mov cont, AX;
              fldcw cont;
          }
      }
  }


=================================
Patches to DMD to turn all unitialized floats into SNANs.

Changes are in mytype.c and e2ir.c
=================================
mytype.c:
=================================
line 2150:

Expression *TypeBasic::defaultInit(Loc loc)
{   integer_t value = 0;
#if __DMC__
	// Note: could be up to 16 bytes long.
	unsigned short snan[8] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xBFFF, 0x7FFF, 0, 
0, 0 };
	d_float80 fvalue = *(long double*)snan;
#endif

line 2177:

	case Tfloat80:
#if __DMC__
		return new RealExp(loc, fvalue, this);
#else
	    return getProperty(loc, Id::nan);
#endif

line 2186:

	case Tcomplex80:
#if __DMC__
		{   // Can't use fvalue + I*fvalue (the im part becomes a quiet NaN).
			complex_t cvalue;
			((real_t *)&cvalue)[0] = fvalue;
			((real_t *)&cvalue)[1] = fvalue;
			return new ComplexExp(loc, cvalue, this);
		}
#else
	    return getProperty(loc, Id::nan);
#endif
=================================
e2ir.c line 1182.
=================================

bool isSignallingNaN(real_t x)
{
#if __DMC__
	if (x>=0 || x<0) return false;
	return !((((unsigned short*)&x)[3])&0x4000);
#else
	return false;
#endif
}


elem *RealExp::toElem(IRState *irs)
{   union eve c;
     tym_t ty;

     //printf("RealExp::toElem(%p)\n", this);
     memset(&c, 0, sizeof(c));
     ty = type->toBasetype()->totym();
     switch (tybasic(ty))
     {
	case TYfloat:
	case TYifloat:
	    c.Vfloat = value;
		if (isSignallingNaN(value) ) {
			((unsigned int*)&c.Vfloat)[0] &= 0xFFBFFFFFL;
		}
	    break;

	case TYdouble:
	case TYidouble:
		c.Vdouble = value; // this unfortunately converts SNAN to QNAN.
		if ( isSignallingNaN(value) ) {
			((unsigned int*)&c.Vdouble)[1] &= 0xFFF7FFFFL;
		}
	    break;

	case TYldouble:
	case TYildouble:
	    c.Vldouble = value;
	    break;

	default:
	    print();
	    type->print();
	    type->toBasetype()->print();
	    printf("ty = %d, tym = %x\n", type->ty, ty);
	    assert(0);
     }
     return el_const(ty, &c);
}

elem *ComplexExp::toElem(IRState *irs)
{   union eve c;
     tym_t ty;
     real_t re;
     real_t im;

     re = creall(value);
     im = cimagl(value);

     memset(&c, 0, sizeof(c));
     ty = type->totym();
     switch (tybasic(ty))
     {
	case TYcfloat:
	    c.Vcfloat.re = (float) re;
	    c.Vcfloat.im = (float) im;
		if ( isSignallingNaN(re) && isSignallingNaN(im)) {
			((unsigned int*)&c.Vcfloat.re)[0] &= 0xFFBFFFFFL;
			((unsigned int*)&c.Vcfloat.im)[0] &= 0xFFBFFFFFL;
		}
	    break;

	case TYcdouble:
	    c.Vcdouble.re = (double) re;
	    c.Vcdouble.im = (double) im;
		if ( isSignallingNaN(re) && isSignallingNaN(im)) {
			((unsigned int*)&c.Vcdouble.re)[1] &= 0xFFF7FFFFL;
			((unsigned int*)&c.Vcdouble.im)[1] &= 0xFFF7FFFFL;
		}
	    break;

	case TYcldouble:
	    c.Vcldouble.re = re;
	    c.Vcldouble.im = im;
	    break;

	default:
	    assert(0);
     }
     return el_const(ty, &c);
}

Mar 18 2009

dennis luehring <dl.soluz gmx.net> writes:

i talked about the signaling NaN exception "feature" to coworkers 
yesterday - and was dreaming about a D support of them :-)

they are absolutely great if you writing math code all the time - im 
getting tired of writing ::is_nan into my c++ functions to prevent chaos

Mar 19 2009

Don <nospam nospam.com> writes:

dennis luehring wrote:
 i talked about the signaling NaN exception "feature" to coworkers 
 yesterday - and was dreaming about a D support of them :-)
 
 they are absolutely great if you writing math code all the time - im 
 getting tired of writing ::is_nan into my c++ functions to prevent chaos

Yup, I've had many NaN nightmares. Whenever you have an uninitialized 
variable in C++ there's about a 1% chance that it's a NaN. That's really 
fun to track down. NOT.
I think it's criminal that this feature has been in hardware since *** 
1980 *** and not made available even in systems languages.

BTW, the line numbers I gave were for DMD2, but should also work for 
DMD1. Dual license Artistic/GPL, since it's from the DMD front-end.
Only tested on Windows, I believe it should be OK on Linux and OSX, but 
someone should check.

Mar 19 2009

Mason Green <mason.green gmail.com> writes:

Don wrote:
 Yup, I've had many NaN nightmares. Whenever you have an uninitialized 
 variable in C++ there's about a 1% chance that it's a NaN. That's really 
 fun to track down. NOT.
 I think it's criminal that this feature has been in hardware since *** 
 1980 *** and not made available even in systems languages.

I second adding this feature to D.  I've spent many, many hours chasing 
NaN bugs when porting box2d from c++.  Most of the time, it came down to 
simply forgetting to initialize a float somewhere. I actually make a 
post on dsource a few days ago:

http://www.dsource.org/forums/viewtopic.php?t=4501&sid=2e8f71e52efbcece43a63cbb261e8706

Mar 19 2009

"Nick Sabalausky" <a a.a> writes:

"Mason Green" <mason.green gmail.com> wrote in message 
news:gpt70t$2b5t$1 digitalmars.com...
 Don wrote:
 Yup, I've had many NaN nightmares. Whenever you have an uninitialized 
 variable in C++ there's about a 1% chance that it's a NaN. That's really 
 fun to track down. NOT.
 I think it's criminal that this feature has been in hardware since *** 
 1980 *** and not made available even in systems languages.

 I second adding this feature to D.  I've spent many, many hours chasing 
 NaN bugs when porting box2d from c++.  Most of the time, it came down to 
 simply forgetting to initialize a float somewhere. I actually make a post 
 on dsource a few days ago:

 http://www.dsource.org/forums/viewtopic.php?t=4501&sid=2e8f71e52efbcece43a63cbb261e8706

Warnings or errors on the use of uninitialized variables would also solve 
that problem *wink* *wink* *nudge* *nudge*.

Mar 19 2009

Walter Bright <newshound1 digitalmars.com> writes:

Pls add to bugzilla as an enhancement request!

Mar 19 2009

Don <nospam nospam.com> writes:

Walter Bright wrote:
 Pls add to bugzilla as an enhancement request!

2746.

Mar 19 2009

bearophile <bearophileHUGS lycos.com> writes:

Don:
 I propose that this should become part of DMD. It doesn't need to be in 
 the spec, it's primarily for debugging.

It may be OK in the specs too.

Bye,
bearophile

Mar 19 2009

Andrei Alexandrescu <SeeWebsiteForEmail erdani.org> writes:

Don wrote:
 It's great that D initializes floating-point variables to NaN, instead 
 of whatever random garbage happened to be in RAM.
 But, if your calculation ends up with a NaN, you have to work out where 
 it came from. Worse, the NaN might not necessarily
 be visible in your final results, but you results may nonetheless be wrong.

[...]

 I propose that this should become part of DMD. It doesn't need to be in 
 the spec, it's primarily for debugging.

That's terrific, Don. I've been bitten by NaNs in calculations many 
times. This would be an excellent improvement to D.

Andrei

Mar 19 2009

Georg Wrede <georg.wrede iki.fi> writes:

Don wrote:
              and AX, 0xFFFE; // enable invalid exception
              or AX, 0x1; // disable invalid exception

This being the least significant bit should show to everybody that the 
chipmakers consider this /absolutely fundamental/.

Down wrote:
 I think it's criminal that this feature has been in hardware since
 *** 1980 *** and not made available even in systems languages.

Some four years ago I tried convincing folks on this NG that this is 
absolutely needed. But I guess those were other times...

Don wrote:
 Walter Bright wrote:
 Pls add to bugzilla as an enhancement request!

 2746. 

Thanks, Don and Walter!!

Mar 19 2009

Walter Bright <newshound1 digitalmars.com> writes:

Georg Wrede wrote:
 Thanks, Don and Walter!!

I originally had this in the Digital Mars C compiler years and years 
ago. I dropped it and moved away from it because not a single person 
noticed or cared about it, plus the standard C spec failed to 
distinguish between quiet and signaling nan's.

I'm glad to see it actually being of value, and it's a great idea.

Mar 19 2009

Don <nospam nospam.com> writes:

Walter Bright wrote:
 Georg Wrede wrote:
 Thanks, Don and Walter!!

 
 I originally had this in the Digital Mars C compiler years and years 
 ago. I dropped it and moved away from it because not a single person 
 noticed or cared about it, plus the standard C spec failed to 
 distinguish between quiet and signaling nan's.
 
 I'm glad to see it actually being of value, and it's a great idea.

I noticed there was a place in the backend where it was careful to 
preserve the signallingness <g> of NaNs. So I thought, hmm, someone's 
been here before.

The key thing that makes it possible is that D initializes all floats to 
NaN anyway, and since C doesn't do that, it's not as obviously 
beneficial. By making NaN the default, you've made sure that practically 
every D user knows about them. I bet that's not the case for C/C++. (I 
even know numerical analyists who don't know much about them).

Mar 19 2009

Walter Bright <newshound1 digitalmars.com> writes:

Don wrote:
 Walter Bright wrote:
 Georg Wrede wrote:
 Thanks, Don and Walter!!

 I originally had this in the Digital Mars C compiler years and years 
 ago. I dropped it and moved away from it because not a single person 
 noticed or cared about it, plus the standard C spec failed to 
 distinguish between quiet and signaling nan's.

 I'm glad to see it actually being of value, and it's a great idea.

 
 I noticed there was a place in the backend where it was careful to 
 preserve the signallingness <g> of NaNs. So I thought, hmm, someone's 
 been here before.

Like a chicken that still has genes for dinosaur teeth lurking unactivated!

 
 The key thing that makes it possible is that D initializes all floats to 
 NaN anyway, and since C doesn't do that, it's not as obviously 
 beneficial. By making NaN the default, you've made sure that practically 
 every D user knows about them. I bet that's not the case for C/C++. (I 
 even know numerical analyists who don't know much about them).

You can still know it's not the case for C/C++ given the crappy support 
for nan in modern compilers.

Mar 19 2009

Fawzi Mohamed <fmohamed mac.com> writes:

Great work Don!
Fawzi

On 2009-03-19 05:58:40 +0100, Don <nospam nospam.com> said:

 It's great that D initializes floating-point variables to NaN, instead 
 of whatever random garbage happened to be in RAM.
 But, if your calculation ends up with a NaN, you have to work out where 
 it came from. Worse, the NaN might not necessarily
 be visible in your final results, but you results may nonetheless be wrong.
 
 The hardware has excellent support for debugging these problems -- all 
 you need to do is activitating the floating-point 'invalid' trap,
 and you'll get a hardware exception whenever you _create_ a NaN. What 
 about uninitialized variables?
 Signalling NaNs are designed for exactly this situation. The instant 
 you access a signalling NaN, a hardware exception occurs,
 and you drop straight into your debugger (just like accessing a null pointer).
 But this could only work if the compiler initialized every 
 uninitialised floating-point variable to a signalling NaN.
 
 Now that we have access to the backend (thanks Walter!), we can do 
 exactly that. My patch(below) is enabled only when compiled with DMC.
 real.nan is unchanged, and won't cause exceptions if you use it, but 
 real.init is now a signalling nan.
 This doesn't make any difference to anything, until you enable FP exceptions.
 And when you do, if no exceptions occur, you can use the code coverage 
 feature to give very high confidence that you're not using any 
 uninitialised floating-point variables.
 
 I propose that this should become part of DMD. It doesn't need to be in 
 the spec, it's primarily for debugging.
 
 Don.
 
 ==============
 Example of usage:
 ==============
 
 void main()
 {
      double a, b, c;
      a*=7;        // Exactly the same as it is now, a is nan.
 
      enableExceptions();
 
      c = 6;      // ok, c is initialized now
      c *= 10;
      b *= 10;    // BANG ! Straight into the debugger
      b *= 5;
 
      disableExceptions();
 }
 
 -------
 
 void enableExceptions() {
      version(D_InlineAsm_X86) {
           short cont;
           asm {
               fclex;
               fstcw cont;
               mov AX, cont;
               and AX, 0xFFFE; // enable invalid exception
               mov cont, AX;
               fldcw cont;
           }
       }
   }
 
 void disableExceptions() {
      version(D_InlineAsm_X86) {
           short cont;
           asm {
               fclex;
               fstcw cont;
               mov AX, cont;
               or AX, 0x1; // disable invalid exception
               mov cont, AX;
               fldcw cont;
           }
       }
   }
 
 
 =================================
 Patches to DMD to turn all unitialized floats into SNANs.
 
 Changes are in mytype.c and e2ir.c
 =================================
 mytype.c:
 =================================
 line 2150:
 
 Expression *TypeBasic::defaultInit(Loc loc)
 {   integer_t value = 0;
 #if __DMC__
 	// Note: could be up to 16 bytes long.
 	unsigned short snan[8] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xBFFF, 0x7FFF, 0, 0, 0 };
 	d_float80 fvalue = *(long double*)snan;
 #endif
 
 line 2177:
 
 	case Tfloat80:
 #if __DMC__
 		return new RealExp(loc, fvalue, this);
 #else
 	    return getProperty(loc, Id::nan);
 #endif
 
 line 2186:
 
 	case Tcomplex80:
 #if __DMC__
 		{   // Can't use fvalue + I*fvalue (the im part becomes a quiet NaN).
 			complex_t cvalue;
 			((real_t *)&cvalue)[0] = fvalue;
 			((real_t *)&cvalue)[1] = fvalue;
 			return new ComplexExp(loc, cvalue, this);
 		}
 #else
 	    return getProperty(loc, Id::nan);
 #endif
 =================================
 e2ir.c line 1182.
 =================================
 
 bool isSignallingNaN(real_t x)
 {
 #if __DMC__
 	if (x>=0 || x<0) return false;
 	return !((((unsigned short*)&x)[3])&0x4000);
 #else
 	return false;
 #endif
 }
 
 
 elem *RealExp::toElem(IRState *irs)
 {   union eve c;
      tym_t ty;
 
      //printf("RealExp::toElem(%p)\n", this);
      memset(&c, 0, sizeof(c));
      ty = type->toBasetype()->totym();
      switch (tybasic(ty))
      {
 	case TYfloat:
 	case TYifloat:
 	    c.Vfloat = value;
 		if (isSignallingNaN(value) ) {
 			((unsigned int*)&c.Vfloat)[0] &= 0xFFBFFFFFL;
 		}
 	    break;
 
 	case TYdouble:
 	case TYidouble:
 		c.Vdouble = value; // this unfortunately converts SNAN to QNAN.
 		if ( isSignallingNaN(value) ) {
 			((unsigned int*)&c.Vdouble)[1] &= 0xFFF7FFFFL;
 		}
 	    break;
 
 	case TYldouble:
 	case TYildouble:
 	    c.Vldouble = value;
 	    break;
 
 	default:
 	    print();
 	    type->print();
 	    type->toBasetype()->print();
 	    printf("ty = %d, tym = %x\n", type->ty, ty);
 	    assert(0);
      }
      return el_const(ty, &c);
 }
 
 elem *ComplexExp::toElem(IRState *irs)
 {   union eve c;
      tym_t ty;
      real_t re;
      real_t im;
 
      re = creall(value);
      im = cimagl(value);
 
      memset(&c, 0, sizeof(c));
      ty = type->totym();
      switch (tybasic(ty))
      {
 	case TYcfloat:
 	    c.Vcfloat.re = (float) re;
 	    c.Vcfloat.im = (float) im;
 		if ( isSignallingNaN(re) && isSignallingNaN(im)) {
 			((unsigned int*)&c.Vcfloat.re)[0] &= 0xFFBFFFFFL;
 			((unsigned int*)&c.Vcfloat.im)[0] &= 0xFFBFFFFFL;
 		}
 	    break;
 
 	case TYcdouble:
 	    c.Vcdouble.re = (double) re;
 	    c.Vcdouble.im = (double) im;
 		if ( isSignallingNaN(re) && isSignallingNaN(im)) {
 			((unsigned int*)&c.Vcdouble.re)[1] &= 0xFFF7FFFFL;
 			((unsigned int*)&c.Vcdouble.im)[1] &= 0xFFF7FFFFL;
 		}
 	    break;
 
 	case TYcldouble:
 	    c.Vcldouble.re = re;
 	    c.Vcldouble.im = im;
 	    break;
 
 	default:
 	    assert(0);
      }
      return el_const(ty, &c);
 }

Mar 20 2009

D Programming

C/C++ Programming

Other

digitalmars.D - The Joy of Signalling NaNs! (A compiler patch)