Welcome to Web-News
A Web-based News Reader
Subject More embarrassing microbenchmars for D's GC.
From Leandro Lucarella <llucax@gmail.com>
Date Mon, 9 Jun 2008 17:34:08 -0300
Newsgroups digitalmars.D
Attachment(s) list-test-c.clist-test-cpp.cpplist-test-d-gc.dlist-test-d.dlist-test-py.py

I've done a very simple microbenchmark testing a single linked list.

What the test does is fill the list with long elements, and then iterate
the list and increment the associated value. This test mostly test the
allocation speed.

I've done plain a C version, a C++ version (using STL double-linked list,
so it has some disvantage here), a D version without using the GC, a D
version using the GC, and a Python version.

Here are the result (Celeron ~2GHz, 1M elements, average from 3 runs):

       C         C++       D no gc   D gc       D gc dis  Python
Fill   0.136136  0.142705  0.131238  22.628577  0.242637  12.952816
Inc    0.025134  0.038768  0.037456   0.050480  0.051545   3.765271
Total  0.161270  0.181473  0.168694  22.679057  0.294182  16.718087

Results are in seconds, compiled with gcc/g++/gdc (-O3 -finline, plus
-frelease for D code). Using phobos (gdc 0.25-20080419-4.1.2-22, with
debian patches).

You can see Python is almost twice faster than D doing allocation (and the
python example has some intentional overhead to make it as close as D code
as possible, using more Pythonic code could yield better results).

But there are a few other results I can't explain:
1) Why is D gc (disabled or not) version ~25% slower than the D version
   that uses malloc when iterating the list? It shouldn't be any GC
   activity in that part. Could be some GC locallity issue that yields
   more cache misses?
2) Why is D malloc version ~33% slower than the C version? I took a look
   at the generated assembly and it's almost identical:
        <_Dmain+198>:   lea    -0x20(%ebp),%eax
        <_Dmain+201>:   lea    0x0(%esi,%eiz,1),%esi
        <_Dmain+208>:   addl   $0x1,0x8(%eax)
        <_Dmain+212>:   adcl   $0x0,0xc(%eax)
        <_Dmain+216>:   mov    (%eax),%eax
        <_Dmain+218>:   test   %eax,%eax
        <_Dmain+220>:   jne    0x804a240 <_Dmain+208>


        <main+248>:     lea    -0x1c(%ebp),%eax
        <main+251>:     nop
        <main+252>:     lea    0x0(%esi,%eiz,1),%esi
        <main+256>:     addl   $0x1,0x4(%eax)
        <main+260>:     adcl   $0x0,0x8(%eax)
        <main+264>:     mov    (%eax),%eax
        <main+266>:     test   %eax,%eax
        <main+268>:     jne    0x8048550 <main+256>
        <main+270>:     movl   $0x0,0x4(%esp)
        <main+278>:     movl   $0x8049800,(%esp)

Tests attached.

--
Leandro Lucarella (luca) | Blog colectivo: http://www.mazziblog.com.ar/blog/
----------------------------------------------------------------------------
GPG Key: 5F5A8D05 (F8CD F9A7 BF00 5431 4145  104C 949E BFB6 5F5A 8D05)
----------------------------------------------------------------------------
RENUNCIO PARA IR A REZARLE A SAN CAYETANO
        -- Crónica TV



#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include "timer.h"

struct node_t
{
        struct node_t* next;
        uint64_t       data;
};

int main(int argc, char* argv[])
{
        if (argc < 2)
                abort();

        int n = atol(argv[1]);

        timer_start();
        struct node_t list = { NULL, -1ull };
        struct node_t* it = &list;
        for (int i = 0; i < n; ++i)
        {
                it->data = i;
                it->next = (struct node_t*) malloc(sizeof(struct node_t));
                it = it->next;
        }
        it->next = NULL;
        unsigned long elapsed1 = timer_stop();

        timer_start();
        it = &list;
        while (it)
        {
                ++(it->data);
                it = it->next;
        }
        unsigned long elapsed2 = timer_stop();

        printf("Fill:  %lu usec\n", elapsed1);
        printf("Inc:   %lu usec\n", elapsed2);
        printf("Total: %lu usec\n", elapsed1 + elapsed2);
        return 0;
}




#include <stdint.h>
#include <list>
#include <stdio.h>
#include "timer.h"
#include <boost/foreach.hpp>

int main(int argc, char* argv[])
{
        if (argc < 2)
                abort();

        int n = atol(argv[1]);

        timer_start();
        std::list< uint64_t > list;
        for (int i = 0; i < n; ++i)
                list.push_back(i);
        unsigned long elapsed1 = timer_stop();

        timer_start();
        BOOST_FOREACH (uint64_t& data, list)
                ++data;
        unsigned long elapsed2 = timer_stop();

        printf("Fill:  %lu usec\n", elapsed1);
        printf("Inc:   %lu usec\n", elapsed2);
        printf("Total: %lu usec\n", elapsed1 + elapsed2);
        return 0;
}




module test;

import std.stdio;
import std.conv;
import std.gc;
import std.c.time;

alias int suseconds_t;

struct timeval {
        time_t      tv_sec;     /* seconds */
        suseconds_t tv_usec;    /* microseconds */
}

struct timezone {
        int tz_minuteswest;     /* minutes west of Greenwich */
        int tz_dsttime;         /* type of DST correction */
}

extern (C) int gettimeofday(timeval *tv, timezone *tz);

timeval tv_s, tv_e;

struct node_t
{
        node_t* next;
        ulong   data;
};

int main(char[][] args)
{
        if (args.length < 2)
                return 1;

        ulong n = toUlong(args[1]);

        //std.gc.disable(); "D gc dis" were with this line uncommented
        gettimeofday(&tv_s, null);
        node_t list = node_t(null, -1uL);
        node_t* it = &list;
        for (int i = 0; i < n; ++i)
        {
                it.data = i;
                it.next = new node_t;
                it = it.next;
        }
        it.next = null;
        gettimeofday(&tv_e, null);
        ulong elapsed1 = (tv_e.tv_sec - tv_s.tv_sec) * 1000000uL
                                + (tv_e.tv_usec - tv_s.tv_usec);

        gettimeofday(&tv_s, null);
        it = &list;
        while (it)
        {
                ++(it.data);
                it = it.next;
        }
        gettimeofday(&tv_e, null);
        ulong elapsed2 = (tv_e.tv_sec - tv_s.tv_sec) * 1000000uL
                                + (tv_e.tv_usec - tv_s.tv_usec);

        writefln("Fill:  ", elapsed1, " usecs");
        writefln("Inc:   ", elapsed2, " usecs");
        writefln("Total: ", elapsed1 + elapsed2, " usecs");
        return 0;
}




module test;

import std.stdio;
import std.c.stdlib;
import std.conv;
import std.c.time;

alias int suseconds_t;

struct timeval {
        time_t      tv_sec;     /* seconds */
        suseconds_t tv_usec;    /* microseconds */
}

struct timezone {
        int tz_minuteswest;     /* minutes west of Greenwich */
        int tz_dsttime;         /* type of DST correction */
}

extern (C) int gettimeofday(timeval *tv, timezone *tz);

timeval tv_s, tv_e;


struct node_t
{
        node_t* next;
        ulong   data;
};

int main(char[][] args)
{
        if (args.length < 2)
                return 1;

        ulong n = toUlong(args[1]);

        gettimeofday(&tv_s, null);
        node_t list = node_t(null, -1uL);
        node_t* it = &list;
        for (int i = 0; i < n; ++i)
        {
                it.data = i;
                it.next = cast(node_t*) malloc(node_t.sizeof);
                it = it.next;
        }
        it.next = null;
        gettimeofday(&tv_e, null);
        ulong elapsed1 = (tv_e.tv_sec - tv_s.tv_sec) * 1000000uL
                                + (tv_e.tv_usec - tv_s.tv_usec);


        gettimeofday(&tv_s, null);
        it = &list;
        while (it)
        {
                ++(it.data);
                it = it.next;
        }
        gettimeofday(&tv_e, null);
        ulong elapsed2 = (tv_e.tv_sec - tv_s.tv_sec) * 1000000uL
                                + (tv_e.tv_usec - tv_s.tv_usec);

        writefln("Fill:  ", elapsed1, " usecs");
        writefln("Inc:   ", elapsed2, " usecs");
        writefln("Total: ", elapsed1 + elapsed2, " usecs");
        return 0;
}





import sys
from datetime import datetime

#import psyco
#psyco.full()


class Node:
        def __init__(self):
                self.data = 0
                self.next = None

start = datetime.now()
list = Node()
n = list
for i in xrange(long(sys.argv[1])):
        n.data = i
        n.next = Node()
        n = n.next
n.next = None
end = datetime.now()
delta = datetime.now() - start
elapsed1 = delta.seconds * 1000000 + delta.microseconds

start = datetime.now()
p = list
while p:
        p.data += 1
        p = p.next
delta = datetime.now() - start
elapsed2 = delta.seconds * 1000000 + delta.microseconds

print "Fill:  %lu usec" % elapsed1
print "Inc:   %lu usec" % elapsed2
print "Total: %lu usec" % (elapsed1 + elapsed2)



Recent messages in this thread
 
-# More embarrassing microbenchmars for D's GC. (Current message) Leandro Lucarella 09-Jun-2008 04:34 pm
.-# Re: More embarrassing microbenchmars for D's GC. bearophile 09-Jun-2008 05:03 pm
.|-# Re: More embarrassing microbenchmars for D's GC. Leandro Lucarella 10-Jun-2008 09:42 am
.|.\# Re: More embarrassing microbenchmars for D's GC. bearophile 10-Jun-2008 10:34 am
.-# Re: More embarrassing microbenchmars for D's GC. Sean Kelly 09-Jun-2008 06:53 pm
.|-# Re: More embarrassing microbenchmars for D's GC. Leandro Lucarella 10-Jun-2008 10:28 am
.|.|# Re: More embarrassing microbenchmars for D's GC. Sean Kelly 10-Jun-2008 02:19 pm
.|.-# Re: More embarrassing microbenchmars for D's GC. Marius Muja 11-Jun-2008 12:38 am
.|..\# Re: More embarrassing microbenchmars for D's GC. Leandro Lucarella 11-Jun-2008 12:35 pm
.-# Re: More embarrassing microbenchmars for D's GC. Walter Bright 09-Jun-2008 08:12 pm
.|-# Re: More embarrassing microbenchmars for D's GC. Leandro Lucarella 10-Jun-2008 10:40 am
.|.-# Re: More embarrassing microbenchmars for D's GC. Walter Bright 10-Jun-2008 05:13 pm
.|..\# Re: More embarrassing microbenchmars for D's GC. Leandro Lucarella 11-Jun-2008 12:53 pm
.\# Re: More embarrassing microbenchmars for D's GC. Leandro Lucarella 10-Jun-2008 10:48 am