www.digitalmars.com         C & C++   DMDScript  

digitalmars.D.bugs - [Issue 4952] New: One missing binary search for switch()

reply d-bugmail puremagic.com writes:
http://d.puremagic.com/issues/show_bug.cgi?id=4952

           Summary: One missing binary search for switch()
           Product: D
           Version: D1 & D2
          Platform: x86
        OS/Version: Windows
            Status: NEW
          Keywords: performance
          Severity: enhancement
          Priority: P2
         Component: DMD
        AssignedTo: nobody puremagic.com
        ReportedBy: bearophile_hugs eml.cc


--- Comment #0 from bearophile_hugs eml.cc 2010-09-28 12:38:15 PDT ---
I have compiled a D program with dmd and a C program with gcc and llvm-gcc, it
shows how compilers implement one common switch() situation. gcc and llvm-gcc
use a binary search, dmd a linear one.


// D code
import std.c.stdio: puts;
import std.c.stdlib: atoi;

void f1() { puts("f1 called"); }
void f2() { puts("f2 called"); }
void f3() { puts("f3 called"); }

void main() {
    int i = atoi("3");

    switch (i) {
        case 140: f1(); break;
        case 300: f1(); break;
        case 1280: f1(); break;
        case 1540: f1(); break;
        case 1660: f1(); break;
        case 1770: f2(); break;
        case 2150: f2(); break;
        case 2190: f1(); break;
        case 2530: f2(); break;
        case 2560: f2(); break;
        case 2590: f1(); break;
        case 2660: f1(); break;
        case 2720: f2(); break;
        case 3010: f1(); break;
        case 3100: f1(); break;
        case 3390: f2(); break;
        case 3760: f1(); break;
        case 3970: f2(); break;
        case 4050: f1(); break;
        case 4140: f1(); break;
        case 4360: f2(); break;
        case 4540: f1(); break;
        case 4600: f2(); break;
        case 4720: f2(); break;
        case 4730: f2(); break;
        case 4740: f2(); break;
        case 4880: f2(); break;
        case 4950: f1(); break;

        default: f3();
    }
}



// C code
#include "stdio.h"
#include "stdlib.h"

void f1() { puts("f1 called"); }
void f2() { puts("f2 called"); }
void f3() { puts("f3 called"); }

int main() {
    int i = atoi("1000");

    switch (i) {
        case 140: f1(); break;
        case 300: f1(); break;
        case 1280: f1(); break;
        case 1540: f1(); break;
        case 1660: f1(); break;
        case 1770: f2(); break;
        case 2150: f2(); break;
        case 2190: f1(); break;
        case 2530: f2(); break;
        case 2560: f2(); break;
        case 2590: f1(); break;
        case 2660: f1(); break;
        case 2720: f2(); break;
        case 3010: f1(); break;
        case 3100: f1(); break;
        case 3390: f2(); break;
        case 3760: f1(); break;
        case 3970: f2(); break;
        case 4050: f1(); break;
        case 4140: f1(); break;
        case 4360: f2(); break;
        case 4540: f1(); break;
        case 4600: f2(); break;
        case 4720: f2(); break;
        case 4730: f2(); break;
        case 4740: f2(); break;
        case 4880: f2(); break;
        case 4950: f1(); break;

        default: f3();
    }

    return 0;
}


------------------------------

DMD 2.49, optimized build:

__Dmain comdat
        push    EBX
        mov EAX,offset FLAT:_DATA[024h]
        push    EAX
        call    near ptr _atoi
        add ESP,4
        cmp EAX,08Ch
        je  L115
        cmp EAX,012Ch
        je  L115
        cmp EAX,0500h
        je  L115
        cmp EAX,0604h
        je  L115
        cmp EAX,067Ch
        je  L115
        cmp EAX,06EAh
        je  L105
        cmp EAX,0866h
        je  L105
        cmp EAX,088Eh
        je  L115
        cmp EAX,09E2h
        je  L105
        cmp EAX,0A00h
        je  L105
        cmp EAX,0A1Eh
        je  L115
        cmp EAX,0A64h
        je  L115
        cmp EAX,0AA0h
        je  L105
        cmp EAX,0BC2h
        je  L115
        cmp EAX,0C1Ch
        je  L115
        cmp EAX,0D3Eh
        je  L105
        cmp EAX,0EB0h
        je  L115
        cmp EAX,0F82h
        je  L105
        cmp EAX,0FD2h
        je  L115
        cmp EAX,0102Ch
        je  L115
        cmp EAX,01108h
        je  L105
        cmp EAX,011BCh
        je  L115
        cmp EAX,011F8h
        je  L105
        cmp EAX,01270h
        je  L105
        cmp EAX,0127Ah
        je  L105
        cmp EAX,01284h
        je  L105
        cmp EAX,01310h
        je  L105
        cmp EAX,01356h
        je  L115
        jmp short   L125
L105:       mov ECX,offset FLAT:_DATA[0Ch]
        push    ECX
        call    near ptr _puts
        add ESP,4
        jmp short   L133
L115:       mov EDX,offset FLAT:_DATA
        push    EDX
        call    near ptr _puts
        add ESP,4
        jmp short   L133
L125:       mov EBX,offset FLAT:_DATA[018h]
        push    EBX
        call    near ptr _puts
        add ESP,4
L133:       pop EBX
        xor EAX,EAX
        ret

----------------------------------

GCC 4.5.1, -O3

_main:
    pushl   %ebp
    movl    %esp, %ebp
    andl    $-16, %esp
    subl    $16, %esp
    call    ___main
    movl    $LC3, (%esp)
    call    _atoi
    cmpl    $3010, %eax
    je  L33
    jle L43
    cmpl    $4360, %eax
    je  L32
    .p2align 4,,6
    jle L44
    cmpl    $4730, %eax
    je  L32
    .p2align 4,,6
    jle L45
    cmpl    $4880, %eax
    je  L32
    cmpl    $4950, %eax
    je  L33
    cmpl    $4740, %eax
    jne L5
    .p2align 4,,7
L32:
    movl    $LC1, (%esp)
    call    _puts
    xorl    %eax, %eax
    leave
LCFI16:
    ret
    .p2align 4,,7
L45:
LCFI17:
    cmpl    $4600, %eax
    je  L32
    cmpl    $4720, %eax
    je  L32
    cmpl    $4540, %eax
    jne L5
    .p2align 4,,7
L33:
    movl    $LC0, (%esp)
    call    _puts
L41:
    xorl    %eax, %eax
    leave
LCFI18:
    ret
    .p2align 4,,7
L43:
LCFI19:
    cmpl    $2150, %eax
    je  L32
    .p2align 4,,4
    jle L46
    cmpl    $2560, %eax
    je  L32
    .p2align 4,,6
    jle L47
    cmpl    $2660, %eax
    je  L33
    cmpl    $2720, %eax
    je  L32
    cmpl    $2590, %eax
    jne L5
    jmp L33
    .p2align 4,,7
L44:
    cmpl    $3760, %eax
    je  L33
    .p2align 4,,6
    jle L48
    cmpl    $4050, %eax
    je  L33
    cmpl    $4140, %eax
    je  L33
    cmpl    $3970, %eax
    jne L5
    jmp L32
    .p2align 4,,7
L46:
    cmpl    $1280, %eax
    je  L33
    .p2align 4,,6
    jle L49
    cmpl    $1660, %eax
    je  L33
    cmpl    $1770, %eax
    je  L32
    cmpl    $1540, %eax
    jne L5
    jmp L33
    .p2align 4,,7
L47:
    cmpl    $2190, %eax
    je  L33
    cmpl    $2530, %eax
    je  L32
    .p2align 4,,7
L5:
    movl    $LC2, (%esp)
    call    _puts
    jmp L41
    .p2align 4,,7
L49:
    cmpl    $140, %eax
    je  L33
    cmpl    $300, %eax
    jne L5
    jmp L33
    .p2align 4,,7
L48:
    cmpl    $3100, %eax
    je  L33
    cmpl    $3390, %eax
    jne L5
    jmp L32

---------------------------

llvm-gcc V.2.7, -O3

_main:
    pushl   %ebp
    movl    %esp, %ebp
    subl    $8, %esp
    call    ___main
    movl    $L_.str3, (%esp)
    call    _atoi
    cmpl    $299, %eax
    jg  LBB4_4
    cmpl    $140, %eax
    jne LBB4_56
LBB4_2:
    movl    $L_.str2, (%esp)
LBB4_3:
    call    _puts
    xorl    %eax, %eax
    addl    $8, %esp
    popl    %ebp
    ret
LBB4_4:
    cmpl    $1279, %eax
    jg  LBB4_6
    cmpl    $300, %eax
    je  LBB4_2
    jmp LBB4_56
LBB4_6:
    cmpl    $1539, %eax
    jg  LBB4_8
    cmpl    $1280, %eax
    je  LBB4_2
    jmp LBB4_56
LBB4_8:
    cmpl    $1659, %eax
    jg  LBB4_10
    cmpl    $1540, %eax
    je  LBB4_2
    jmp LBB4_56
LBB4_10:
    cmpl    $1769, %eax
    jg  LBB4_12
    cmpl    $1660, %eax
    je  LBB4_2
    jmp LBB4_56
LBB4_12:
    cmpl    $2149, %eax
    jg  LBB4_15
    cmpl    $1770, %eax
    jne LBB4_56
LBB4_14:
    movl    $L_.str1, (%esp)
    jmp LBB4_3
LBB4_15:
    cmpl    $4949, %eax
    jg  LBB4_55
    cmpl    $4879, %eax
    jg  LBB4_54
    cmpl    $2189, %eax
    jg  LBB4_19
    cmpl    $2150, %eax
    je  LBB4_14
    jmp LBB4_56
LBB4_19:
    cmpl    $2529, %eax
    jg  LBB4_21
    cmpl    $2190, %eax
    je  LBB4_2
    jmp LBB4_56
LBB4_21:
    cmpl    $2559, %eax
    jg  LBB4_23
    cmpl    $2530, %eax
    je  LBB4_14
    jmp LBB4_56
LBB4_23:
    cmpl    $2589, %eax
    jg  LBB4_25
    cmpl    $2560, %eax
    je  LBB4_14
    jmp LBB4_56
LBB4_25:
    cmpl    $2659, %eax
    jg  LBB4_27
    cmpl    $2590, %eax
    je  LBB4_2
    jmp LBB4_56
LBB4_27:
    cmpl    $2719, %eax
    jg  LBB4_29
    cmpl    $2660, %eax
    je  LBB4_2
    jmp LBB4_56
LBB4_29:
    cmpl    $3009, %eax
    jg  LBB4_31
    cmpl    $2720, %eax
    je  LBB4_14
    jmp LBB4_56
LBB4_31:
    cmpl    $3099, %eax
    jg  LBB4_33
    cmpl    $3010, %eax
    je  LBB4_2
    jmp LBB4_56
LBB4_33:
    cmpl    $3389, %eax
    jg  LBB4_35
    cmpl    $3100, %eax
    je  LBB4_2
    jmp LBB4_56
LBB4_35:
    cmpl    $3759, %eax
    jg  LBB4_37
    cmpl    $3390, %eax
    je  LBB4_14
    jmp LBB4_56
LBB4_37:
    cmpl    $3969, %eax
    jg  LBB4_39
    cmpl    $3760, %eax
    je  LBB4_2
    jmp LBB4_56
LBB4_39:
    cmpl    $4049, %eax
    jg  LBB4_41
    cmpl    $3970, %eax
    je  LBB4_14
    jmp LBB4_56
LBB4_41:
    cmpl    $4139, %eax
    jg  LBB4_43
    cmpl    $4050, %eax
    je  LBB4_2
    jmp LBB4_56
LBB4_43:
    cmpl    $4359, %eax
    jg  LBB4_45
    cmpl    $4140, %eax
    je  LBB4_2
    jmp LBB4_56
LBB4_45:
    cmpl    $4539, %eax
    jg  LBB4_47
    cmpl    $4360, %eax
    je  LBB4_14
    jmp LBB4_56
LBB4_47:
    cmpl    $4599, %eax
    jg  LBB4_49
    cmpl    $4540, %eax
    je  LBB4_2
    jmp LBB4_56
LBB4_49:
    cmpl    $4719, %eax
    jg  LBB4_51
    cmpl    $4600, %eax
    je  LBB4_14
    jmp LBB4_56
LBB4_51:
    cmpl    $4720, %eax
    je  LBB4_14
    cmpl    $4730, %eax
    je  LBB4_14
    cmpl    $4740, %eax
    je  LBB4_14
    jmp LBB4_56
LBB4_54:
    cmpl    $4880, %eax
    je  LBB4_14
    jmp LBB4_56
LBB4_55:
    cmpl    $4950, %eax
    je  LBB4_2
LBB4_56:
    movl    $L_.str, (%esp)
    jmp LBB4_3

------------------------------------

A situation like this may be handled with a mixed strategy, a table search for
the values in 3...7 and a binary search for the values in 100...900:


// C code
#include "stdio.h"
#include "stdlib.h"

void f1() { puts("f1 called"); }
void f2() { puts("f2 called"); }
void f3() { puts("f3 called"); }

int main() {
    int i = atoi("1000");
    switch (i) {
        case 3: f1(); break;
        case 5: f2(); break;
        case 6: f2(); break;
        case 7: f2(); break;
        case 100: f1(); break;
        case 200: f2(); break;
        case 250: f2(); break;
        case 700: f2(); break;
        case 750: f2(); break;
        case 900: f2(); break;

        default: f3();
    }
    return 0;
}

-- 
Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
Sep 28 2010
parent d-bugmail puremagic.com writes:
http://d.puremagic.com/issues/show_bug.cgi?id=4952



--- Comment #1 from bearophile_hugs eml.cc 2010-09-28 18:32:31 PDT ---
Some simple benchmark code, for comparison:

Timings, NLOOPS = 100_000, best of 6, seconds:
  DMD: 7.70
  GCC: 2.42

gcc  4.5.1, -Wall -O3 -s
dmd 2.049, -O -release -inline


// D code
import std.c.stdio: printf;

enum int NLOOPS = 100000;

int c1, c2, c3;

void f1() { c1++; }
void f2() { c2++; }
void f3() { c3++; }

int main() {
    int i, j;
    for (i = 0; i < NLOOPS; i++) {
        for (j = 0; j < 5000; j++) {
            switch (j) {
                case 140: f1(); break;
                case 300: f1(); break;
                case 1280: f1(); break;
                case 1540: f1(); break;
                case 1660: f1(); break;
                case 1770: f2(); break;
                case 2150: f2(); break;
                case 2190: f1(); break;
                case 2530: f2(); break;
                case 2560: f2(); break;
                case 2590: f1(); break;
                case 2660: f1(); break;
                case 2720: f2(); break;
                case 3010: f1(); break;
                case 3100: f1(); break;
                case 3390: f2(); break;
                case 3760: f1(); break;
                case 3970: f2(); break;
                case 4050: f1(); break;
                case 4140: f1(); break;
                case 4360: f2(); break;
                case 4540: f1(); break;
                case 4600: f2(); break;
                case 4720: f2(); break;
                case 4730: f2(); break;
                case 4740: f2(); break;
                case 4880: f2(); break;
                case 4950: f1(); break;

                default: f3();
            }
        }
    }

    printf("%d %d %d\n", c1, c2, c3);
    return 0;
}



// C code
#include "stdio.h"

#define NLOOPS 100000

int c1, c2, c3;

void f1() { c1++; }
void f2() { c2++; }
void f3() { c3++; }

int main() {
    int i, j;
    for (i = 0; i < NLOOPS; i++) {
        for (j = 0; j < 5000; j++) {
            switch (j) {
                case 140: f1(); break;
                case 300: f1(); break;
                case 1280: f1(); break;
                case 1540: f1(); break;
                case 1660: f1(); break;
                case 1770: f2(); break;
                case 2150: f2(); break;
                case 2190: f1(); break;
                case 2530: f2(); break;
                case 2560: f2(); break;
                case 2590: f1(); break;
                case 2660: f1(); break;
                case 2720: f2(); break;
                case 3010: f1(); break;
                case 3100: f1(); break;
                case 3390: f2(); break;
                case 3760: f1(); break;
                case 3970: f2(); break;
                case 4050: f1(); break;
                case 4140: f1(); break;
                case 4360: f2(); break;
                case 4540: f1(); break;
                case 4600: f2(); break;
                case 4720: f2(); break;
                case 4730: f2(); break;
                case 4740: f2(); break;
                case 4880: f2(); break;
                case 4950: f1(); break;

                default: f3();
            }
        }
    }

    printf("%d %d %d\n", c1, c2, c3);
    return 0;
}

-- 
Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
Sep 28 2010