## digitalmars.D.bugs - Bug in asm version of std.math.poly since 0.143

Grzegorz Adam Hankiewicz <fake dont.use> writes:
```The following program, as extracted from phobos, outputs:

\$ ./asmbug
The result should be 215.130000
The C version outputs 215.130000
The asm version outputs 56.100000

I don't know enough asm to solve this. Test program:

import std.stdio;

real poly_asm(real x, real[] A)
in
{
assert(A.length > 0);
}
body
{
version (D_InlineAsm_X86)
{
asm     // assembler by W. Bright
{
// EDX = (A.length - 1) * real.sizeof
mov     ECX,A[EBP]          ; // ECX = A.length
dec     ECX                 ;
lea     EDX,[ECX][ECX*8]    ;
fld     real ptr [EDX]      ; // ST0 = coeff[ECX]
jecxz   return_ST           ;
fld     x[EBP]              ; // ST0 = x
fxch    ST(1)               ; // ST1 = x, ST0 = r
align   4                   ;
L2:     fmul    ST,ST(1)            ; // r *= x
fld     real ptr -10[EDX]   ;
sub     EDX,10              ; // deg--
dec     ECX                 ;
jne     L2                  ;
fxch    ST(1)               ; // ST1 = r, ST0 = x
fstp    ST(0)               ; // dump x
align   4                   ;
return_ST:                          ;
;
}
}
else
{
writefl("Sorry, you don't seem to have InlineAsm_X86");
return 0;
}
}

real poly_c(real x, real[] A)
in
{
assert(A.length > 0);
}
body
{
int i = A.length - 1;
real r = A[i];
while (--i >= 0)
{
r *= x;
r += A[i];
}
return r;
}

int main()
{
real x = 3.1;
static real pp[] = [56.1, 32.7, 6];

writefln("The result should be %f",(56.1L + (32.7L + 6L * x) * x));
writefln("The C version outputs %f", poly_c(x, pp));
writefln("The asm version outputs %f", poly_asm(x, pp));

return 0;
}
```
Feb 04 2006
Don Clugston <dac nospam.com.au> writes:
```The code works correctly on DMD-Windows 0.145.
I think the problem is the extra alignment for reals  on Linux (12
bytes) compared to Windows (10 bytes). Just a guess, but I think that
this line:
sub     EDX,10              ; // deg--

should be
sub     EDX,12              ; // deg--

on Linux.
This also needs to be changed:
lea     EDX,[ECX][ECX*8]    ;

to
lea     EDX,[ECX][ECX*4]    ;

which isn't optimal, but see if it works.

If that doesn't work, try changing the line
fld     real ptr -10[EDX]   ;

to
fld     real ptr -12[EDX]   ;

Grzegorz Adam Hankiewicz wrote:
The following program, as extracted from phobos, outputs:

\$ ./asmbug
The result should be 215.130000
The C version outputs 215.130000
The asm version outputs 56.100000

I don't know enough asm to solve this. Test program:

import std.stdio;

real poly_asm(real x, real[] A)
in
{
assert(A.length > 0);
}
body
{
version (D_InlineAsm_X86)
{
asm     // assembler by W. Bright
{
// EDX = (A.length - 1) * real.sizeof
mov     ECX,A[EBP]          ; // ECX = A.length
dec     ECX                 ;
lea     EDX,[ECX][ECX*8]    ;
fld     real ptr [EDX]      ; // ST0 = coeff[ECX]
jecxz   return_ST           ;
fld     x[EBP]              ; // ST0 = x
fxch    ST(1)               ; // ST1 = x, ST0 = r
align   4                   ;
L2:     fmul    ST,ST(1)            ; // r *= x
fld     real ptr -10[EDX]   ;
sub     EDX,10              ; // deg--
dec     ECX                 ;
jne     L2                  ;
fxch    ST(1)               ; // ST1 = r, ST0 = x
fstp    ST(0)               ; // dump x
align   4                   ;
return_ST:                          ;
;
}
}
else
{
writefl("Sorry, you don't seem to have InlineAsm_X86");
return 0;
}
}

real poly_c(real x, real[] A)
in
{
assert(A.length > 0);
}
body
{
int i = A.length - 1;
real r = A[i];
while (--i >= 0)
{
r *= x;
r += A[i];
}
return r;
}

int main()
{
real x = 3.1;
static real pp[] = [56.1, 32.7, 6];

writefln("The result should be %f",(56.1L + (32.7L + 6L * x) * x));
writefln("The C version outputs %f", poly_c(x, pp));
writefln("The asm version outputs %f", poly_asm(x, pp));

return 0;
}

```
Feb 06 2006
Grzegorz Adam Hankiewicz <fake dont.use> writes:
```The Mon, 06 Feb 2006 09:28:33 +0100, Don Clugston wrote:
The code works correctly on DMD-Windows 0.145.  I think the problem
is the extra alignment for reals on Linux (12 bytes) compared to
Windows (10 bytes). Just a guess, but I think that this line:
>                 sub     EDX,10              ; // deg--
should be
>                 sub     EDX,12              ; // deg--
on Linux.

This also needs to be changed:
>                 lea     EDX,[ECX][ECX*8]    ;
>                 add     EDX,ECX             ;
to
>                 lea     EDX,[ECX][ECX*4]    ;
>                 add     EDX,ECX             ;