digitalmars.D.bugs - [Issue 3751] New: Optimalization error in some floating point code
- d-bugmail puremagic.com (297/297) Jan 28 2010 http://d.puremagic.com/issues/show_bug.cgi?id=3751
- d-bugmail puremagic.com (6/6) Jan 28 2010 http://d.puremagic.com/issues/show_bug.cgi?id=3751
- d-bugmail puremagic.com (10/10) Jan 28 2010 http://d.puremagic.com/issues/show_bug.cgi?id=3751
- d-bugmail puremagic.com (48/49) Jan 29 2010 http://d.puremagic.com/issues/show_bug.cgi?id=3751
- d-bugmail puremagic.com (17/17) Jan 29 2010 http://d.puremagic.com/issues/show_bug.cgi?id=3751
- d-bugmail puremagic.com (22/30) Jan 29 2010 http://d.puremagic.com/issues/show_bug.cgi?id=3751
- d-bugmail puremagic.com (21/29) Jan 29 2010 http://d.puremagic.com/issues/show_bug.cgi?id=3751
- d-bugmail puremagic.com (11/31) Jan 29 2010 http://d.puremagic.com/issues/show_bug.cgi?id=3751
- d-bugmail puremagic.com (15/19) Jan 29 2010 http://d.puremagic.com/issues/show_bug.cgi?id=3751
- d-bugmail puremagic.com (17/33) Jan 29 2010 http://d.puremagic.com/issues/show_bug.cgi?id=3751
http://d.puremagic.com/issues/show_bug.cgi?id=3751
Summary: Optimalization error in some floating point code
Product: D
Version: 2.039
Platform: x86
OS/Version: Linux
Status: NEW
Severity: major
Priority: P2
Component: DMD
AssignedTo: nobody puremagic.com
ReportedBy: baryluk smp.if.uj.edu.pl
20:17:58 PST ---
I tested this in 2.039 and v2.028, so probably all version beetwen are
affected.
I don't know how far this bug was introducent.
Here is simplified test case (functionally it is buggy but it is sufficient to
show bug):
import std.math;
import std.stdio;
double bisect(double z) {
double left = -7.0, right = 7.0, half;
while (true) { // do {} while(true); // also have this problem
half = (left+right)*0.5;
version(something) {
writefln("%s", half); // adding this solves problem
}
if (left == half) {
return half; // or break
}
if (half == right) {
return half; // or break
}
/+
// the same effect as two if statments.
if ((left == half) || (half == right)) {
return half; // or break
}
+/
double fhalf = exp(-0.5*half*half) * (half + 0.7) - z;
if (fhalf > 0.0) {
right = half;
} else if (fhalf <= 0.0) {
left = half;
}
};
//return half; // not rechable, irrevelant
}
void main() {
foreach (i; 1 .. 10) {
auto x = 0.1 + 0.001*i;
writefln("%g %g", x, bisect(x));
}
}
compile without optimalisation:
0.101 -0.580467
0.102 -0.579361
0.103 -0.578256
0.104 -0.577153
0.105 -0.57605
0.106 -0.574949
0.107 -0.573849
0.108 -0.57275
0.109 -0.571653
compile with optimalisation:
0.101 -0.580467
.... nothing happens, CPU usage 100%
^C
compile with optimalisation and dummy write:
...
...
0.109 -0.571653
You can check asmbler code here:
version without optimalisations:
.text._D4blad6bisectFdZd segment
assume CS:.text._D4blad6bisectFdZd
_D4blad6bisectFdZd:
push EBP
mov EBP,ESP
sub ESP,020h
fld qword ptr FLAT:.rodata[08h]
fstp qword ptr -020h[EBP]
fld qword ptr FLAT:.rodata[019h]
fstp qword ptr -018h[EBP]
fld qword ptr FLAT:.rodata[02Ah]
fstp qword ptr -010h[EBP]
L21: fld qword ptr -020h[EBP]
fadd qword ptr -018h[EBP]
fmul qword ptr _TMP6 SYM32[09h]
fstp qword ptr -010h[EBP]
fld qword ptr -020h[EBP]
fld qword ptr -010h[EBP]
fucompp ST(1),ST
fstsw AX
sahf
jne L46
jp L46
fld qword ptr -010h[EBP]
leave
ret 8
L46: fld qword ptr -010h[EBP]
fld qword ptr -018h[EBP]
fucompp ST(1),ST
fstsw AX
sahf
jne L5C
jp L5C
fld qword ptr -010h[EBP]
leave
ret 8
L5C: fld qword ptr -010h[EBP]
fmul qword ptr _TMP6 SYM32[049h]
fmul qword ptr -010h[EBP]
sub ESP,8
fstp qword ptr [ESP]
call near ptr _D3std4math3expFNaNbdZd PC32
fld qword ptr -010h[EBP]
fadd qword ptr _TMP10 SYM32
fmulp ST(1),ST
fsub qword ptr 8[EBP]
fstp qword ptr -8[EBP]
fld qword ptr -8[EBP]
ftst
fstsw AX
sahf
fstp ST
jbe L98
fld qword ptr -010h[EBP]
fstp qword ptr -018h[EBP]
jmp short L21
L98: fld qword ptr -8[EBP]
ftst
fstsw AX
sahf
fstp ST
ja L21
jp L21
fld qword ptr -010h[EBP]
fstp qword ptr -020h[EBP]
jmp near ptr L21
nop
nop
nop
.text._D4blad6bisectFdZd ends
Version with optimalistion:
_D4blad6bisectFdZd:
push EBP
mov EBP,ESP
sub ESP,020h
mov dword ptr -010h[EBP],0
fld qword ptr FLAT:.rodata[0Fh]
fld qword ptr FLAT:.rodata[01Dh]
fxch ST1
mov dword ptr -0Ch[EBP],0
fstp qword ptr -020h[EBP]
fstp qword ptr -018h[EBP]
L28: fld qword ptr -010h[EBP]
fld qword ptr -018h[EBP]
fucompp ST(1),ST
fstsw AX
sahf
jne L40
jp L40
L37: fld qword ptr -010h[EBP]
mov ESP,EBP
pop EBP
ret 8
L40: fld qword ptr -010h[EBP]
fmul qword ptr _TMP6 SYM32[025h]
fmul qword ptr -010h[EBP]
sub ESP,8
fstp qword ptr [ESP]
call near ptr _D3std4math3expFNaNbdZd PC32
fld qword ptr -010h[EBP]
fadd qword ptr _TMP6 SYM32[044h]
fmulp ST(1),ST
fsub qword ptr 8[EBP]
fst qword ptr -8[EBP]
ftst
fstsw AX
fstp ST
sahf
jbe L93
fld qword ptr -010h[EBP]
fstp qword ptr -018h[EBP]
L77: fld qword ptr -020h[EBP]
fld ST0
fadd qword ptr -018h[EBP]
fmul qword ptr _TMP10 SYM32[09h]
fst qword ptr -010h[EBP]
fucompp ST(1),ST
fstsw AX
sahf
jp L28
je L37
jmp short L28
L93: fld qword ptr -8[EBP]
ftst
fstsw AX
fstp ST
sahf
ja L77
jp L77
fld qword ptr -010h[EBP]
fstp qword ptr -020h[EBP]
jmp short L77
nop
nop
nop
.text._D4blad6bisectFdZd ends
version with "something" added artifacialy:
.text._D4blad6bisectFdZd segment
assume CS:.text._D4blad6bisectFdZd
_D4blad6bisectFdZd:
push EBP
mov EBP,ESP
sub ESP,020h
mov dword ptr -010h[EBP],0
fld qword ptr _TMP0 SYM32[017h]
fld qword ptr _TMP0 SYM32[025h]
fxch ST1
mov dword ptr -0Ch[EBP],0
fstp qword ptr -020h[EBP]
fstp qword ptr -018h[EBP]
push dword ptr _TMP0 SYM32[02Eh]
push dword ptr _TMP0 SYM32[030h]
push 0
push 0
call near ptr ...writeflnTAyaTdZ8writeflnFAyadZv PC32
L3D: fld qword ptr -010h[EBP]
fld qword ptr -018h[EBP]
fucompp ST(1),ST
fstsw AX
sahf
jne L55
jp L55
L4C: fld qword ptr -010h[EBP]
mov ESP,EBP
pop EBP
ret 8
L55: fld qword ptr -010h[EBP]
fmul qword ptr _TMP7 SYM32[03Ah]
fmul qword ptr -010h[EBP]
sub ESP,8
fstp qword ptr [ESP]
call near ptr _D3std4math3expFNaNbdZd PC32
fld qword ptr -010h[EBP]
fadd qword ptr _TMP10 SYM32[09h]
fmulp ST(1),ST
fsub qword ptr 8[EBP]
fst qword ptr -8[EBP]
ftst
fstsw AX
fstp ST
sahf
jbe LCA
fld qword ptr -010h[EBP]
fstp qword ptr -018h[EBP]
L8C: push dword ptr _TMP10 SYM32[02h]
push dword ptr _TMP10 SYM32[04h]
fld qword ptr -020h[EBP]
fadd qword ptr -018h[EBP]
fmul qword ptr _TMP11 SYM32[028h]
fst qword ptr -010h[EBP]
sub ESP,8
fstp qword ptr [ESP]
call near ptr ...writeflnTAyaTdZ8writeflnFAyadZv PC32
fld qword ptr -020h[EBP]
fld qword ptr -010h[EBP]
fucompp ST(1),ST
fstsw AX
sahf
jp L3D
je L4C
jmp near ptr L3D
LCA: fld qword ptr -8[EBP]
ftst
fstsw AX
fstp ST
sahf
ja L8C
jp L8C
fld qword ptr -010h[EBP]
fstp qword ptr -020h[EBP]
jmp short L8C
.text._D4blad6bisectFdZd ends
What is interesting, that just adding single writefln makes this asembler code
change in much more places than just call of this function.
--
Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
Jan 28 2010
http://d.puremagic.com/issues/show_bug.cgi?id=3751 20:24:33 PST --- exp(-0.5*half*half) can be change to 1.0/(half*half) and bug is still the same. -- Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email ------- You are receiving this mail because: -------
Jan 28 2010
http://d.puremagic.com/issues/show_bug.cgi?id=3751
Stephan Dilly <spam extrawurst.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
CC| |spam extrawurst.org
---
is this maybe related to issue 3736 ?
--
Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
Jan 28 2010
http://d.puremagic.com/issues/show_bug.cgi?id=3751 04:28:20 PST ---is this maybe related to issue 3736 ?I don't know. This is quite different issues (i'm not using structs, and i doesn't call functions when error occure). My bug have probably something with FPU register allocation, but this is just a guess. I further reduced test case, change "write("%s",half)" to nic(), where nic is empty function, and write code directly in main import std.math; import std.stdio; void nic() {} void main() { foreach (i; 1 .. 10) { auto z = 0.1 + 0.001*i; double left = -7.0, right = 7.0, half; while (true) { half = (left+right)*0.5; // or left+0.5*(right-left); version(something) { nic(); // adding this solves problem } if (left == half) { break; // or break } if (half == right) { break; // or break } /+ // the same effect if ((left == half) || (half == right)) { return half; // or break } +/ double fhalf = 1.0/(half*half) * (half + 0.7) - z; if (fhalf > 0.0) { right = half; } else if (fhalf <= 0.0) { left = half; } }; writefln("%g %g", z, half); } } One of the dide by side diff (left -O, right -O + nic): http://pastebin.com/f17a77299 -- Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email ------- You are receiving this mail because: -------
Jan 29 2010
http://d.puremagic.com/issues/show_bug.cgi?id=3751
Don <clugdbug yahoo.com.au> changed:
What |Removed |Added
----------------------------------------------------------------------------
CC| |clugdbug yahoo.com.au
I'm not certain that there's a bug here (difficult to tell without reducing the
test case further). It could be that in the optimised case you have a temporary
value stored in a register, increasing the precision. It's legal for the
compiler to do that. Adding a function call stops DMD from keeping the value in
a register.
Anyway you need to cut the test case down significantly.
BTW it behaves the same way on DMD0.175, so this is definitely not a
regression.
--
Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
Jan 29 2010
http://d.puremagic.com/issues/show_bug.cgi?id=3751
Steven Schveighoffer <schveiguy yahoo.com> changed:
What |Removed |Added
----------------------------------------------------------------------------
CC| |schveiguy yahoo.com
05:44:47 PST ---
I'm not certain that there's a bug here (difficult to tell without reducing the
test case further). It could be that in the optimised case you have a temporary
value stored in a register, increasing the precision. It's legal for the
compiler to do that. Adding a function call stops DMD from keeping the value in
a register.
Anyway you need to cut the test case down significantly.
BTW it behaves the same way on DMD0.175, so this is definitely not a
regression.
I think this is exactly the problem. You are using floating point expecting
that all floats are created equal.
One rule about floating point is never to build an infinite loop waiting for
two floats to converge using ==. You must use an epsilon if you expect any
reasonable result.
Your exit condition is this:
if (left == half || right == half)
What about trying this:
if(half - left < SOME_EPSILON || right - half < SOME_EPSILON)
where SOME_EPSILON is a very small insignificant number, like 0.000001
floating point convergence is an art form, you need to understand its
limitations.
--
Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
Jan 29 2010
http://d.puremagic.com/issues/show_bug.cgi?id=3751 05:48:52 PST ---I'm not certain that there's a bug here (difficult to tell without reducing the test case further). It could be that in the optimised case you have a temporary value stored in a register, increasing the precision. It's legal for the compiler to do that. Adding a function call stops DMD from keeping the value in a register. Anyway you need to cut the test case down significantly. BTW it behaves the same way on DMD0.175, so this is definitely not a regression.Hmm, You are probably right. Changing coditions to: if (left <= half) { break; } if (half >= right) { break; } solves problem. Interesingly compiling in gdc even with -O3 -finline -frelease -funsafe-math-optimizations -frounding-math -ffast-math doesn't bring this error. What is also problematic is that even adding forcibly something like cast(double) doesn't solve problem (they are noop's, becuase they are already of correct type). -- Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email ------- You are receiving this mail because: -------
Jan 29 2010
http://d.puremagic.com/issues/show_bug.cgi?id=3751 05:50:47 PST ---I think this is exactly the problem. You are using floating point expecting that all floats are created equal. One rule about floating point is never to build an infinite loop waiting for two floats to converge using ==. You must use an epsilon if you expect any reasonable result.I know this.Your exit condition is this: if (left == half || right == half) What about trying this: if(half - left < SOME_EPSILON || right - half < SOME_EPSILON) where SOME_EPSILON is a very small insignificant number, like 0.000001 floating point convergence is an art form, you need to understand its limitations.Yes I know, but I was relaying of IEEE 754 semantic, in which my code should work (there will be point in execuion when left == half or right == half exactly!). I was just trying to write this routine to be slightly faster just by not using EPSILONS and substractions. -- Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email ------- You are receiving this mail because: -------
Jan 29 2010
http://d.puremagic.com/issues/show_bug.cgi?id=3751 06:06:41 PST ---Yes I know, but I was relaying of IEEE 754 semantic, in which my code should work (there will be point in execuion when left == half or right == half exactly!). I was just trying to write this routine to be slightly faster just by not using EPSILONS and substractions.Are you sure? I'm not really a floating point expert, so I don't know what the rules of IEEE are, but I have written floating point convergence algorithms for things like programming competitions, and I had to learn that things will get stuck if you don't use an epsilon. You can picture what's happening if you make left and right integers. What ends up happening when left - right == 1? half becomes left + .5, so it does not compare equal to left or right, but if your calculation then assigns left equal to half, the .5 is dropped and left is stuck in the same loop. I would say that this is not a bug, but again, not a floating point expert. -- Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email ------- You are receiving this mail because: -------
Jan 29 2010
http://d.puremagic.com/issues/show_bug.cgi?id=3751
Witold Baryluk <baryluk smp.if.uj.edu.pl> changed:
What |Removed |Added
----------------------------------------------------------------------------
Status|NEW |RESOLVED
Resolution| |INVALID
15:50:21 PST ---
Yes I know, but I was relaying of IEEE 754 semantic, in which my code should
work (there will be point in execuion when left == half or right == half
exactly!). I was just trying to write this routine to be slightly faster just
by not using EPSILONS and substractions.
Are you sure? I'm not really a floating point expert, so I don't know what the
rules of IEEE are, but I have written floating point convergence algorithms for
things like programming competitions, and I had to learn that things will get
stuck if you don't use an epsilon.
You can picture what's happening if you make left and right integers. What
ends up happening when left - right == 1? half becomes left + .5, so it does
not compare equal to left or right, but if your calculation then assigns left
equal to half, the .5 is dropped and left is stuck in the same loop.
Whell it can't be that "half is not equal to left", and then after assigning it
is actually the same value. It is only becuase we have intermidiate half in
higher precision than double precision.
I'm actually using formula half = left + 0.5*(right-left); which is safer in
regards of overflows, but this doesn't really metter.
I would say that this is not a bug, but again, not a floating point expert.
Closing it. Sorry for a problem.
--
Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
Jan 29 2010









d-bugmail puremagic.com 