www.digitalmars.com         C & C++   DMDScript  

digitalmars.D - Conversion to string + string building benchmark

reply bearophile <bearophileHUGS lycos.com> writes:
About the versions:

This benchmark (test1/Test1) comes from a reduction of some code of mine, it
converts integer numbers to string and builds a single very long string with
them. I have seen the Java code significantly faster than the D one.

The successive benchmarks are experiments to better understand where the low
performance comes from:

test2a/Test2a just build the string, without integer conversions.

test2b/Test2b just convert ints to strings.

test3 is a try to perform a faster int to string conversion using the C library.

test4 is my faster D solution, it shows that there are ways to write a D
program faster than the Java code, but they aren't handy.

Timings, best of 3, seconds:
  D test1  10_000_000 7.38
  D test2a 10_000_000 5.91
  D test2b 10_000_000 2.65
  D test3  10_000_000 3.13
  D test4  10_000_000 1.25
  
  Java -Xmx500M -server Test1  10_000_000 1.74
  Java -Xmx500M -server Test2a 10_000_000 1.69
  Java -Xmx500M -server Test2b 10_000_000 1.67

And Java uses 2-byte long chars.

dmd -O -release -inline

--------------------------

// D program #1
import std.stdio, std.conv, std.array;

int test(int n) {
    Appender!string s;
    foreach (i; 0 .. n)
        s.put(to!string(i));
    return s.data.length;
}

void main(string[] args) {
    int n = args.length == 2 ? to!int(args[1]) : 100;
    writeln(test(n));
}

--------------------------

// Java program #1
final class Test1 {
    static int test(int n) {
        StringBuilder s = new StringBuilder();
        for (int i = 0; i < n; i++)
            s.append(i);
        return s.toString().length();
    }

    public static void main(String args[]) {
        int n = Integer.parseInt(args[0]);
        System.out.println(Test1.test(n));
    }
}

--------------------------

// D program #2a
import std.stdio, std.conv, std.array;

int test(int n) {
    Appender!string s;
    foreach (i; 0 .. n)
        s.put("12345678");
    return s.data.length;
}

void main(string[] args) {
    int n = args.length == 2 ? to!int(args[1]) : 100;
    writeln(test(n));
}


--------------------------

// Java program #2a
final class Test2a {
    static int test(int n) {
        StringBuilder s = new StringBuilder();
        for (int i = 0; i < n; i++)
            s.append("12345678");
        return s.toString().length();
    }

    public static void main(String args[]) {
        int n = Integer.parseInt(args[0]);
        System.out.println(Test1.test(n));
    }
}

--------------------------

// D program #2b
import std.stdio, std.conv, std.array;

int test(int n) {
    int totalLen = 0;
    foreach (i; 0 .. n)
        totalLen += to!string(i).length;
    return totalLen;
}

void main(string[] args) {
    int n = args.length == 2 ? to!int(args[1]) : 100;
    writeln(test(n));
}


--------------------------

// Java program #2b
final class Test2b {
    static int test(int n) {
        int totalLen = 0;
        for (int i = 0; i < n; i++)
            totalLen += Integer.toString(i).length();
        return totalLen;
    }

    public static void main(String args[]) {
        int n = Integer.parseInt(args[0]);
        System.out.println(Test1.test(n));
    }
}

--------------------------

// D program #3
import std.stdio, std.conv, std.array, core.stdc.stdio;

int test(int n) {
    Appender!string s;
    char[15] buffer;
    foreach (i; 0 .. n) {
        int len = sprintf(buffer.ptr, "%d", i);
        s.put(buffer[0 .. len]);
    }
    return s.data.length;
}

void main(string[] args) {
    int n = args.length == 2 ? to!int(args[1]) : 100;
    writeln(test(n));
}

--------------------------

// D program #4
import std.stdio, std.conv, std.array, core.stdc.stdio,
       core.stdc.stdlib, std.traits, core.exception, core.bitop;

uint nextPow2(uint n) {
    if (n == 0)
        return 1;
    int first_bit_set = bsr(n);
    if ((1 << first_bit_set) == n)
        return n;
    return 2 << first_bit_set;
}

bool isPow2(long x) {
    return (x < 1L) ? false : !(x & (x-1L));
}

struct ArrayBuilder(T, double FAST_GROW=2, double SLOW_GROW=1.3, int
BIG_MEM=(1<<27)/T.sizeof) {
    const int STARTING_SIZE = 4;

    static assert(FAST_GROW > 1);
    static assert(SLOW_GROW > 1);
    static assert(BIG_MEM > 1);
    static assert(FAST_GROW >= SLOW_GROW);

    T* data;
    int _length, _capacity;

    void opCatAssign(T[] array) {
        int newlen = this._length + array.length;

        if (newlen > this._capacity) {
            if (newlen < STARTING_SIZE)
                this._grow_capacity(STARTING_SIZE);
            else if (newlen > BIG_MEM)
                this._grow_capacity(cast(int)((newlen + 1) * SLOW_GROW));
            else
                this._grow_capacity(cast(int)(nextPow2(newlen + 1) *
FAST_GROW));
        }

        static if (isStaticArray!(T))
            this.data[this._length .. newlen][] = array[];
        else
            this.data[this._length .. newlen] = array[];

        this._length = newlen;
    }

    T[] toarray() {
        return this.data[0 .. this._length];
    }

    void _grow_capacity(int new_capacity) {
        assert(new_capacity >= 0, "ArrayBuilder!()._grow_capacity < 0 internal
error.");

        this.data = cast(T*)realloc(this.data, new_capacity * T.sizeof);
        if (this.data is null)
            throw new OutOfMemoryError();
        this._capacity = new_capacity;
    }
}

char[] fastUintToString(T)(T u) if (is(T == uint)) {
    if (u < 10)
        return (cast(char[])"0123456789")[u .. u + 1];
    else {
        static char[10] buffer = void;

        buffer[$ - 1] = cast(char)((u % 10) + '0');
        u /= 10;

        buffer[$ - 2] = cast(char)((u % 10) + '0');
        u /= 10;
        if (!u) return buffer[$-2 .. $];

        buffer[$ - 3] = cast(char)((u % 10) + '0');
        u /= 10;
        if (!u) return buffer[$-3 .. $];

        buffer[$ - 4] = cast(char)((u % 10) + '0');
        u /= 10;
        if (!u) return buffer[$-4 .. $];

        buffer[$ - 5] = cast(char)((u % 10) + '0');
        u /= 10;
        if (!u) return buffer[$-5 .. $];

        buffer[$ - 6] = cast(char)((u % 10) + '0');
        u /= 10;
        if (!u) return buffer[$-6 .. $];

        buffer[$ - 7] = cast(char)((u % 10) + '0');
        u /= 10;
        if (!u) return buffer[$-7 .. $];

        buffer[$ - 8] = cast(char)((u % 10) + '0');
        u /= 10;
        if (!u) return buffer[$-8 .. $];

        buffer[$ - 9] = cast(char)((u % 10) + '0');
        u /= 10;
        if (!u) return buffer[$-9 .. $];

        buffer[$ - 10] = cast(char)((u % 10) + '0');
        return buffer;
    }
}

int test(int n) {
    ArrayBuilder!char s;
    char[15] buffer;
    foreach (i; 0 .. n)
        s ~= fastUintToString(cast(uint)i);
    return s.toarray.length;
}

void main(string[] args) {
    int n = args.length == 2 ? to!int(args[1]) : 100;
    writeln(test(n));
}

--------------------------

Bye,
bearophile
Mar 17 2011
parent reply "Robert Jacques" <sandford jhu.edu> writes:
On Thu, 17 Mar 2011 11:06:34 -0400, bearophile <bearophileHUGS lycos.com>  
wrote:

 About the versions:

 This benchmark (test1/Test1) comes from a reduction of some code of  
 mine, it converts integer numbers to string and builds a single very  
 long string with them. I have seen the Java code significantly faster  
 than the D one.

 The successive benchmarks are experiments to better understand where the  
 low performance comes from:

 test2a/Test2a just build the string, without integer conversions.

 test2b/Test2b just convert ints to strings.

 test3 is a try to perform a faster int to string conversion using the C  
 library.

 test4 is my faster D solution, it shows that there are ways to write a D  
 program faster than the Java code, but they aren't handy.

Hi bearophile, Since I've been working on this problem recently, here's an analysis of what's happening: Both Appender and your test cases work by growing an array in a 'smart' manner. The issue with this approach is that once the arrays get big the conservative GC starts pinning them due to false pointers. This slows down the GC a lot (due to some naivety in the GC, which is being patched) and creates excessive memory usage. And I would hazard that Java's StringBuilder isn't giving you O(1) access to the underlying array like Appender is, which would allow it to drastically reduce memory churn. In the future, you should also include program ram usages in these kind of benchmarks.
Mar 19 2011
next sibling parent bearophile <bearophileHUGS lycos.com> writes:
Robert Jacques:

Happy to see my post was not fully lost in the noise :-)

 And I would  
 hazard that Java's StringBuilder isn't giving you O(1) access to the  
 underlying array like Appender is, which would allow it to drastically  
 reduce memory churn.

The first purpose of an Appender/builder is to build an array as fast as possible. I don't need a very access to the array while I build it (a Deque too allows O(1) too, just a bit slower).
 In the future, you should also include program ram usages in these kind of  
 benchmarks.

The amount of memory used/commited is less easy to measure precisely, here are approximated values, the result are weird: Timings, best of 3, n = 10_000_000, MB (commit): D test1: 290 D test2a: 186 D test2b: 1.9 D test3: 188 D test4: 106 Java -Xmx500M -server Test1: 355 Java -Xmx500M -server Test2a: 355 Java -Xmx500M -server Test2b: 355 Bye, bearophile
Mar 19 2011
prev sibling parent "Robert Jacques" <sandford jhu.edu> writes:
On Sat, 19 Mar 2011 22:14:50 -0400, bearophile <bearophileHUGS lycos.com>  
wrote:

 Robert Jacques:

 Happy to see my post was not fully lost in the noise :-)

 And I would
 hazard that Java's StringBuilder isn't giving you O(1) access to the
 underlying array like Appender is, which would allow it to drastically
 reduce memory churn.

The first purpose of an Appender/builder is to build an array as fast as possible. I don't need a very access to the array while I build it (a Deque too allows O(1) too, just a bit slower).
 In the future, you should also include program ram usages in these kind  
 of
 benchmarks.

The amount of memory used/commited is less easy to measure precisely, here are approximated values, the result are weird: Timings, best of 3, n = 10_000_000, MB (commit): D test1: 290 D test2a: 186 D test2b: 1.9 D test3: 188 D test4: 106 Java -Xmx500M -server Test1: 355 Java -Xmx500M -server Test2a: 355 Java -Xmx500M -server Test2b: 355 Bye, bearophile

I've filled this issue in bugzilla as issue 5813 (http://d.puremagic.com/issues/show_bug.cgi?id=5813) along with a patch I've developed.
Apr 03 2011