www.digitalmars.com         C & C++   DMDScript  

digitalmars.D.learn - my first D program (and benchmark against perl)

reply perlancar <perlancar gmail.com> writes:
Here's my first non-hello-world D program, which is a direct 
translation from the Perl version. I was trying to get a feel 
about D's performance:

---BEGIN asciitable.d---
import std.string;
import std.stdio;

string fmttable(ref string[][] table) {
     string res = "";

     // column widths
     int[] widths;

     if (table.length == 0) return "";

     widths.length = table[0].length;

     for (int colnum=0; colnum < table[0].length; colnum++) {
         int width = 0;
         for (int rownum=0; rownum < table.length; rownum++) {
             if (table[rownum][colnum].length > width)
                 width = cast(int) table[rownum][colnum].length;
         }
         widths[colnum] = width;
     }

     for (int rownum=0; rownum < table.length; rownum++) {
         res ~= "|";
         for (int colnum=0; colnum < table[rownum].length; 
colnum++) {
             res ~= leftJustify(table[rownum][colnum], 
widths[colnum]);
             res ~= "|";
         }
         res ~= "\n";
     }

     return res;
}

void main() {
     // tiny table (1x1)
     /*
     string[][] table = [
         ["row1.1"],
     ];
     */

     // small table (3x5)
     string[][] table = [
         ["row1.1", "row1.2  ", "row1.3"],
         ["row2.1", "row2.2", "row2.3"],
         ["row3.1", "row3.2", "row3.3  "],
         ["row4.1", "row4.2", "row4.3"],
         ["row5.1", "row5.2", "row5.3"],
     ];

     write(fmttable(table));
     for (int i=0; i < 1000000; i++) {
         fmttable(table);
     }
}
---END asciitable.d---

Perl version:

---BEGIN asciitable.pl---


sub fmttable {
     my $table = shift;

     my $res = "";


     my  widths;

     if ( $table == 0) { return "" }


         my $width = 0;

             if (length($table->[$rownum][$colnum]) > $width) {
                 $width = length($table->[$rownum][$colnum]);
             }
         }
         $widths[$colnum] = $width;
     }


         $res .= "|";

             $res .= sprintf("%-".$widths[$colnum]."s|", 
$table->[$rownum][$colnum]);
         }
         $res .= "\n";
     }
     $res;
}


#my $table = [["row1.1"]];


my $table = [
     ["row1.1", "row1.2", "row1.3"],
     ["row2.1", "row2.2  ", "row2.3"],
     ["row3.1", "row3.2", "row3.3  "],
     ["row4.1", "row4.2", "row4.3"],
     ["row5.1", "row5.2", "row5.3"],
];

print fmttable($table);

for (1..1_000_000) {
     fmttable($table);
}
---END asciitable.pl---

While I am quite impressed with how easy I was able to write D, I 
am not so impressed with the performance. Using rdmd (build 
20151103), the D program runs in 17.127s while the Perl version 
runs in 11.391s (so the D version is quite a bit *slower* than 
Perl's). While using gdc (Debian 4.9.2-10), I am able to run it 
in 3.988s (only about 3x faster than Perl's version).

I understand that string processing (concatenation, allocation) 
is quite optimized in Perl, I was wondering if the D version 
could still be sped up significantly?
Nov 11 2015
next sibling parent Edwin van Leeuwen <edder tkwsping.nl> writes:
On Wednesday, 11 November 2015 at 13:32:00 UTC, perlancar wrote:
     for (int rownum=0; rownum < table.length; rownum++) {
         res ~= "|";
         for (int colnum=0; colnum < table[rownum].length; 
 colnum++) {
             res ~= leftJustify(table[rownum][colnum], 
 widths[colnum]);
             res ~= "|";
         }
         res ~= "\n";
Not sure if this will be faster, but you could try rewriting the above for loop with more functional code (code below is untested): table.map!((col) { return zip(col,widths) .map!( (e) => leftJustify(e[0], e[1] ) ) .join("|"); }).join("\n"); Cheers, Edwin
Nov 11 2015
prev sibling next sibling parent reply Rikki Cattermole <alphaglosined gmail.com> writes:
On 12/11/15 2:31 AM, perlancar wrote:
 Here's my first non-hello-world D program, which is a direct translation
 from the Perl version. I was trying to get a feel about D's performance:

 ---BEGIN asciitable.d---
 import std.string;
 import std.stdio;

 string fmttable(ref string[][] table) {
      string res = "";

      // column widths
      int[] widths;

      if (table.length == 0) return "";

      widths.length = table[0].length;

      for (int colnum=0; colnum < table[0].length; colnum++) {
          int width = 0;
          for (int rownum=0; rownum < table.length; rownum++) {
              if (table[rownum][colnum].length > width)
                  width = cast(int) table[rownum][colnum].length;
          }
          widths[colnum] = width;
      }

      for (int rownum=0; rownum < table.length; rownum++) {
          res ~= "|";
          for (int colnum=0; colnum < table[rownum].length; colnum++) {
              res ~= leftJustify(table[rownum][colnum], widths[colnum]);
              res ~= "|";
          }
          res ~= "\n";
      }

      return res;
 }

 void main() {
      // tiny table (1x1)
      /*
      string[][] table = [
          ["row1.1"],
      ];
      */

      // small table (3x5)
      string[][] table = [
          ["row1.1", "row1.2  ", "row1.3"],
          ["row2.1", "row2.2", "row2.3"],
          ["row3.1", "row3.2", "row3.3  "],
          ["row4.1", "row4.2", "row4.3"],
          ["row5.1", "row5.2", "row5.3"],
      ];

      write(fmttable(table));
      for (int i=0; i < 1000000; i++) {
          fmttable(table);
      }
 }
 ---END asciitable.d---

 Perl version:

 ---BEGIN asciitable.pl---


 sub fmttable {
      my $table = shift;

      my $res = "";


      my  widths;

      if ( $table == 0) { return "" }


          my $width = 0;

              if (length($table->[$rownum][$colnum]) > $width) {
                  $width = length($table->[$rownum][$colnum]);
              }
          }
          $widths[$colnum] = $width;
      }


          $res .= "|";

              $res .= sprintf("%-".$widths[$colnum]."s|",
 $table->[$rownum][$colnum]);
          }
          $res .= "\n";
      }
      $res;
 }


 #my $table = [["row1.1"]];


 my $table = [
      ["row1.1", "row1.2", "row1.3"],
      ["row2.1", "row2.2  ", "row2.3"],
      ["row3.1", "row3.2", "row3.3  "],
      ["row4.1", "row4.2", "row4.3"],
      ["row5.1", "row5.2", "row5.3"],
 ];

 print fmttable($table);

 for (1..1_000_000) {
      fmttable($table);
 }
 ---END asciitable.pl---

 While I am quite impressed with how easy I was able to write D, I am not
 so impressed with the performance. Using rdmd (build 20151103), the D
 program runs in 17.127s while the Perl version runs in 11.391s (so the D
 version is quite a bit *slower* than Perl's). While using gdc (Debian
 4.9.2-10), I am able to run it in 3.988s (only about 3x faster than
 Perl's version).

 I understand that string processing (concatenation, allocation) is quite
 optimized in Perl, I was wondering if the D version could still be sped
 up significantly?
I turned it into mostly using large allocations, instead of small ones. Although I'd recommend using Appender instead of my custom functions for this. Oh and for me, I got it at 2 secs, 513 ms, 397 μs, and 5 hnsecs. Unoptimized, using dmd. When release mode is enabled on dmd: 1 sec, 550 ms, 838 μs, and 9 hnsecs. So significant improvement even with dmds awful optimizer. import std.string; import std.stdio; static string SPACES = " "; string fmttable(string[][] table) { char[] res; // column widths int[] widths; size_t totalSize; if (table.length == 0) return ""; widths.length = table[0].length; foreach(colnum; 0 .. table[0].length) { int width = 0; size_t count; foreach(rownum; 0 .. table.length) { if (table[rownum][colnum].length > width) width = cast(int) table[rownum][colnum].length; count += table[rownum].length; } totalSize += ((width + 1) * count) + 2; widths[colnum] = width; } char[] buffer = new char[](totalSize); void assignText(string toAdd) { if (res.length < buffer.length - toAdd.length) { } else { buffer.length += toAdd.length; } res = buffer[0 .. res.length + toAdd.length]; res[$-toAdd.length .. $] = toAdd[]; } foreach(rownum; 0 .. table.length) { assignText("|"); foreach(colnum; 0 .. table[rownum].length) { assignText(SPACES[0 .. widths[colnum] - table[rownum][colnum].length]); assignText(table[rownum][colnum]); assignText("|"); } assignText("\n"); } return cast(string)res; } void main() { // tiny table (1x1) /* string[][] table = [ ["row1.1"], ]; */ // small table (3x5) string[][] table = [ ["row1.1", "row1.2 ", "row1.3"], ["row2.1", "row2.2", "row2.3"], ["row3.1", "row3.2", "row3.3 "], ["row4.1", "row4.2", "row4.3"], ["row5.1", "row5.2", "row5.3"], ]; import std.datetime : StopWatch, TickDuration, Duration; StopWatch sw; TickDuration start = sw.peek(); sw.start(); write(fmttable(table)); for (int i=0; i < 1000000; i++) { fmttable(table); } sw.stop(); writeln(cast(Duration)(sw.peek() - start)); }
Nov 11 2015
next sibling parent Rikki Cattermole <alphaglosined gmail.com> writes:
On 12/11/15 3:20 AM, Rikki Cattermole wrote:
 On 12/11/15 2:31 AM, perlancar wrote:
 Here's my first non-hello-world D program, which is a direct translation
 from the Perl version. I was trying to get a feel about D's performance:

 ---BEGIN asciitable.d---
 import std.string;
 import std.stdio;

 string fmttable(ref string[][] table) {
      string res = "";

      // column widths
      int[] widths;

      if (table.length == 0) return "";

      widths.length = table[0].length;

      for (int colnum=0; colnum < table[0].length; colnum++) {
          int width = 0;
          for (int rownum=0; rownum < table.length; rownum++) {
              if (table[rownum][colnum].length > width)
                  width = cast(int) table[rownum][colnum].length;
          }
          widths[colnum] = width;
      }

      for (int rownum=0; rownum < table.length; rownum++) {
          res ~= "|";
          for (int colnum=0; colnum < table[rownum].length; colnum++) {
              res ~= leftJustify(table[rownum][colnum], widths[colnum]);
              res ~= "|";
          }
          res ~= "\n";
      }

      return res;
 }

 void main() {
      // tiny table (1x1)
      /*
      string[][] table = [
          ["row1.1"],
      ];
      */

      // small table (3x5)
      string[][] table = [
          ["row1.1", "row1.2  ", "row1.3"],
          ["row2.1", "row2.2", "row2.3"],
          ["row3.1", "row3.2", "row3.3  "],
          ["row4.1", "row4.2", "row4.3"],
          ["row5.1", "row5.2", "row5.3"],
      ];

      write(fmttable(table));
      for (int i=0; i < 1000000; i++) {
          fmttable(table);
      }
 }
 ---END asciitable.d---

 Perl version:

 ---BEGIN asciitable.pl---


 sub fmttable {
      my $table = shift;

      my $res = "";


      my  widths;

      if ( $table == 0) { return "" }


          my $width = 0;

              if (length($table->[$rownum][$colnum]) > $width) {
                  $width = length($table->[$rownum][$colnum]);
              }
          }
          $widths[$colnum] = $width;
      }


          $res .= "|";

              $res .= sprintf("%-".$widths[$colnum]."s|",
 $table->[$rownum][$colnum]);
          }
          $res .= "\n";
      }
      $res;
 }


 #my $table = [["row1.1"]];


 my $table = [
      ["row1.1", "row1.2", "row1.3"],
      ["row2.1", "row2.2  ", "row2.3"],
      ["row3.1", "row3.2", "row3.3  "],
      ["row4.1", "row4.2", "row4.3"],
      ["row5.1", "row5.2", "row5.3"],
 ];

 print fmttable($table);

 for (1..1_000_000) {
      fmttable($table);
 }
 ---END asciitable.pl---

 While I am quite impressed with how easy I was able to write D, I am not
 so impressed with the performance. Using rdmd (build 20151103), the D
 program runs in 17.127s while the Perl version runs in 11.391s (so the D
 version is quite a bit *slower* than Perl's). While using gdc (Debian
 4.9.2-10), I am able to run it in 3.988s (only about 3x faster than
 Perl's version).

 I understand that string processing (concatenation, allocation) is quite
 optimized in Perl, I was wondering if the D version could still be sped
 up significantly?
I turned it into mostly using large allocations, instead of small ones. Although I'd recommend using Appender instead of my custom functions for this. Oh and for me, I got it at 2 secs, 513 ms, 397 μs, and 5 hnsecs. Unoptimized, using dmd. When release mode is enabled on dmd: 1 sec, 550 ms, 838 μs, and 9 hnsecs. So significant improvement even with dmds awful optimizer. import std.string; import std.stdio; static string SPACES = " "; string fmttable(string[][] table) { char[] res; // column widths int[] widths; size_t totalSize; if (table.length == 0) return ""; widths.length = table[0].length; foreach(colnum; 0 .. table[0].length) { int width = 0; size_t count; foreach(rownum; 0 .. table.length) { if (table[rownum][colnum].length > width) width = cast(int) table[rownum][colnum].length; count += table[rownum].length; } totalSize += ((width + 1) * count) + 2; widths[colnum] = width; } char[] buffer = new char[](totalSize); void assignText(string toAdd) { if (res.length < buffer.length - toAdd.length) { } else { buffer.length += toAdd.length; } res = buffer[0 .. res.length + toAdd.length]; res[$-toAdd.length .. $] = toAdd[]; } foreach(rownum; 0 .. table.length) { assignText("|"); foreach(colnum; 0 .. table[rownum].length) { assignText(SPACES[0 .. widths[colnum] - table[rownum][colnum].length]); assignText(table[rownum][colnum]); assignText("|"); } assignText("\n"); } return cast(string)res; } void main() { // tiny table (1x1) /* string[][] table = [ ["row1.1"], ]; */ // small table (3x5) string[][] table = [ ["row1.1", "row1.2 ", "row1.3"], ["row2.1", "row2.2", "row2.3"], ["row3.1", "row3.2", "row3.3 "], ["row4.1", "row4.2", "row4.3"], ["row5.1", "row5.2", "row5.3"], ]; import std.datetime : StopWatch, TickDuration, Duration; StopWatch sw; TickDuration start = sw.peek(); sw.start(); write(fmttable(table)); for (int i=0; i < 1000000; i++) { fmttable(table); } sw.stop(); writeln(cast(Duration)(sw.peek() - start)); }
I didn't realize that leftJustify strips out whitespace. Just throw an assignment in the first foreach rownum loop. That strips out hint std.string : strip. Although I would be interested in seeing the performance of this in e.g. ldc and gdc.
Nov 11 2015
prev sibling parent reply perlancar <perlancar gmail.com> writes:
On Wednesday, 11 November 2015 at 14:20:51 UTC, Rikki Cattermole 
wrote:
 I turned it into mostly using large allocations, instead of 
 small ones.
 Although I'd recommend using Appender instead of my custom 
 functions for this.

 Oh and for me, I got it at 2 secs, 513 ms, 397 μs, and 5 
 hnsecs. Unoptimized, using dmd.
 When release mode is enabled on dmd: 1 sec, 550 ms, 838 μs, and 
 9 hnsecs. So significant improvement even with dmds awful 
 optimizer.
Hi Rikki, Thanks. With your version, I've managed to be ~4x faster: dmd : 0m1.588s dmd (release): 0m1.010s gdc : 0m2.093s ldc : 0m1.594s Perl version : 0m11.391s So, I'm satisfied enough with the speed for now. Turns out dmd is not always slower.
Nov 12 2015
parent reply Daniel Kozak via Digitalmars-d-learn <digitalmars-d-learn puremagic.com> writes:
V Thu, 12 Nov 2015 12:13:10 +0000
perlancar via Digitalmars-d-learn <digitalmars-d-learn puremagic.com>
napsáno:

 On Wednesday, 11 November 2015 at 14:20:51 UTC, Rikki Cattermole 
 wrote:
 I turned it into mostly using large allocations, instead of 
 small ones.
 Although I'd recommend using Appender instead of my custom 
 functions for this.

 Oh and for me, I got it at 2 secs, 513 ms, 397 μs, and 5 
 hnsecs. Unoptimized, using dmd.
 When release mode is enabled on dmd: 1 sec, 550 ms, 838 μs, and 
 9 hnsecs. So significant improvement even with dmds awful 
 optimizer.  
Hi Rikki, Thanks. With your version, I've managed to be ~4x faster: dmd : 0m1.588s dmd (release): 0m1.010s gdc : 0m2.093s ldc : 0m1.594s Perl version : 0m11.391s So, I'm satisfied enough with the speed for now. Turns out dmd is not always slower.
It depends which flags do you use on ldc and gdc ldc (-singleobj -release -O3 -boundscheck=off) gdc (-O3 -finline -frelease -fno-bounds-check)
Nov 12 2015
parent reply Daniel Kozak <kozzi11 gmail.com> writes:
On Thursday, 12 November 2015 at 12:25:08 UTC, Daniel Kozak wrote:
 V Thu, 12 Nov 2015 12:13:10 +0000
 perlancar via Digitalmars-d-learn 
 <digitalmars-d-learn puremagic.com>
 napsáno:

 On Wednesday, 11 November 2015 at 14:20:51 UTC, Rikki 
 Cattermole wrote:
 I turned it into mostly using large allocations, instead of
 small ones.
 Although I'd recommend using Appender instead of my custom
 functions for this.

 Oh and for me, I got it at 2 secs, 513 ms, 397 μs, and 5
 hnsecs. Unoptimized, using dmd.
 When release mode is enabled on dmd: 1 sec, 550 ms, 838 μs, 
 and
 9 hnsecs. So significant improvement even with dmds awful
 optimizer.
Hi Rikki, Thanks. With your version, I've managed to be ~4x faster: dmd : 0m1.588s dmd (release): 0m1.010s gdc : 0m2.093s ldc : 0m1.594s Perl version : 0m11.391s So, I'm satisfied enough with the speed for now. Turns out dmd is not always slower.
It depends which flags do you use on ldc and gdc ldc (-singleobj -release -O3 -boundscheck=off) gdc (-O3 -finline -frelease -fno-bounds-check)
import std.stdio; auto fmttable(string[][] table) { import std.array : appender, uninitializedArray; import std.range : take, repeat; import std.exception : assumeUnique; if (table.length == 0) return ""; // column widths auto widths = new int[](table[0].length); size_t total = (table[0].length + 1) * table.length + table.length; foreach (rownum, row; table) { foreach (colnum, cell; row) { if (cell.length > widths[colnum]) widths[colnum] = cast(int)cell.length; } } foreach (colWidth; widths) { total += colWidth * table.length; } auto res = appender(uninitializedArray!(char[])(total)); res.clear(); foreach (row; table) { res ~= "|"; foreach (colnum, cell; row) { int l = widths[colnum] - cast(int)cell.length; res ~= cell; if (l) res ~= ' '.repeat().take(l); res ~= "|"; } res.put("\n"); } return res.data.assumeUnique(); } void main() { auto table = [ ["row1.1", "row1.2 ", "row1.3"], ["row2.1", "row2.2", "row2.3"], ["row3.1", "row3.2", "row3.3 "], ["row4.1", "row4.2", "row4.3"], ["row5.1", "row5.2", "row5.3"], ]; writeln(fmttable(table)); for (int i=0; i < 1000000; ++i) { fmttable(table); } } dmd -O -release -inline -boundscheck=off asciitable.d real 0m1.463s user 0m1.453s sys 0m0.003s ldc2 -singleobj -release -O3 -boundscheck=off asciitable.d real 0m0.945s user 0m0.940s sys 0m0.000s gdc -O3 -finline -frelease -fno-bounds-check -o asciitable asciitable.d real 0m0.618s user 0m0.613s sys 0m0.000s perl: real 0m14.198s user 0m14.170s sys 0m0.000s
Nov 12 2015
next sibling parent Daniel Kozak <kozzi11 gmail.com> writes:
On Thursday, 12 November 2015 at 12:49:55 UTC, Daniel Kozak wrote:
 On Thursday, 12 November 2015 at 12:25:08 UTC, Daniel Kozak 
 wrote:
 ...	
     auto res = appender(uninitializedArray!(char[])(total));
     res.clear();
 ...
this is faster for DMD and ldc: auto res = appender!(string)(); res.reserve(total); but for gdc(fronend version 2.066) it makes it two times slower (same for dmd, ldc 2.066 and older)
Nov 12 2015
prev sibling parent perlancar <perlancar gmail.com> writes:
On Thursday, 12 November 2015 at 12:49:55 UTC, Daniel Kozak wrote:
 dmd -O -release -inline -boundscheck=off  asciitable.d

 real	0m1.463s
 user	0m1.453s
 sys	0m0.003s


 ldc2 -singleobj -release -O3 -boundscheck=off asciitable.d

 real	0m0.945s
 user	0m0.940s
 sys	0m0.000s

 gdc -O3 -finline -frelease -fno-bounds-check -o asciitable 
 asciitable.d

 real	0m0.618s
 user	0m0.613s
 sys	0m0.000s


 perl:

 real	0m14.198s
 user	0m14.170s
 sys	0m0.000s
Nice! Seems like I can get a further 100% improvement in speed from the last version (so a total of ~8x speedup from my original D version). Now I wonder how C would fare...
Nov 12 2015
prev sibling next sibling parent reply Andrea Fontana <nospam example.com> writes:
On Wednesday, 11 November 2015 at 13:32:00 UTC, perlancar wrote:
 While I am quite impressed with how easy I was able to write D, 
 I am not so impressed with the performance. Using rdmd (build 
 20151103), the D program runs in 17.127s while the Perl version 
 runs in 11.391s (so the D version is quite a bit *slower* than 
 Perl's). While using gdc (Debian 4.9.2-10), I am able to run it 
 in 3.988s (only about 3x faster than Perl's version).

 I understand that string processing (concatenation, allocation) 
 is quite optimized in Perl, I was wondering if the D version 
 could still be sped up significantly?
Did you try rdmd -O -noboundscheck -release yourscript.d ? You should try using appender!string rather than concatenate improve performace. You should also switch from for to foreach. Andrea
Nov 11 2015
next sibling parent reply "H. S. Teoh via Digitalmars-d-learn" <digitalmars-d-learn puremagic.com> writes:
On Wed, Nov 11, 2015 at 02:26:28PM +0000, Andrea Fontana via
Digitalmars-d-learn wrote:
 On Wednesday, 11 November 2015 at 13:32:00 UTC, perlancar wrote:
While I am quite impressed with how easy I was able to write D, I am
not so impressed with the performance. Using rdmd (build 20151103),
the D program runs in 17.127s while the Perl version runs in 11.391s
(so the D version is quite a bit *slower* than Perl's). While using
gdc (Debian 4.9.2-10), I am able to run it in 3.988s (only about 3x
faster than Perl's version).

I understand that string processing (concatenation, allocation) is
quite optimized in Perl, I was wondering if the D version could still
be sped up significantly?
Did you try rdmd -O -noboundscheck -release yourscript.d ?
[...] If performance is a problem, my first reaction would be to try GDC or LDC. While there have been recent improvements in DMD code generation quality, it still has a ways to go to catch with GDC/LDC's optimizer. T -- Старый друг лучше новых двух.
Nov 11 2015
parent cym13 <cpicard openmailbox.org> writes:
On Wednesday, 11 November 2015 at 16:02:07 UTC, H. S. Teoh wrote:
 If performance is a problem, my first reaction would be to try 
 GDC or LDC.  While there have been recent improvements in DMD 
 code generation quality, it still has a ways to go to catch 
 with GDC/LDC's optimizer.


 T
My computer seems to agree (note that I did those a bunch of time with intermediate rounds to heat the cache, these numbers are only to give a rough idea): $time rdmd --compiler=ldc test.d |row1.1|row1.2 |row1.3 | |row2.1|row2.2 |row2.3 | |row3.1|row3.2 |row3.3 | |row4.1|row4.2 |row4.3 | |row5.1|row5.2 |row5.3 | rdmd --compiler=ldc test.d 6.07s user 0.10s system 99% cpu 6.177 total $time rdmd --compiler=dmd test.d |row1.1|row1.2 |row1.3 | |row2.1|row2.2 |row2.3 | |row3.1|row3.2 |row3.3 | |row4.1|row4.2 |row4.3 | |row5.1|row5.2 |row5.3 | rdmd --compiler=dmd test.d 21.21s user 0.09s system 97% cpu 21.919 total $time ./test.pl |row1.1|row1.2 |row1.3 | |row2.1|row2.2 |row2.3 | |row3.1|row3.2 |row3.3 | |row4.1|row4.2 |row4.3 | |row5.1|row5.2 |row5.3 | ./test.pl 13.71s user 0.00s system 99% cpu 13.715 total With optimization on it is better but still not enough for dmd: $time rdmd --compiler=ldc -inline -release -O test.d |row1.1|row1.2 |row1.3 | |row2.1|row2.2 |row2.3 | |row3.1|row3.2 |row3.3 | |row4.1|row4.2 |row4.3 | |row5.1|row5.2 |row5.3 | rdmd --compiler=ldc -inline -release -O test.d 4.99s user 0.09s system 98% cpu 5.170 total $time rdmd --compiler=dmd -inline -release -O test.d |row1.1|row1.2 |row1.3 | |row2.1|row2.2 |row2.3 | |row3.1|row3.2 |row3.3 | |row4.1|row4.2 |row4.3 | |row5.1|row5.2 |row5.3 | rdmd --compiler=dmd -inline -release -O test.d 12.67s user 0.06s system 99% cpu 12.736 total
Nov 11 2015
prev sibling parent perlancar <perlancar gmail.com> writes:
On Wednesday, 11 November 2015 at 14:26:32 UTC, Andrea Fontana 
wrote:
 Did you try rdmd -O -noboundscheck -release yourscript.d ?
I just did. It improves speed from 17.127s to 14.831s. Nice, but nowhere near gdc/ldc level.
 You should try using appender!string rather than concatenate 

 capacity 

 improve performace.
 You should also switch from for to foreach.
Thanks for the above 2 tips.
Nov 12 2015
prev sibling parent reply Daniel Kozak <kozzi dlang.cz> writes:
On Wednesday, 11 November 2015 at 13:32:00 UTC, perlancar wrote:
 Here's my first non-hello-world D program, which is a direct 
 translation from the Perl version. I was trying to get a feel 
 about D's performance:

 ...

 While I am quite impressed with how easy I was able to write D, 
 I am not so impressed with the performance. Using rdmd (build 
 20151103), the D program runs in 17.127s while the Perl version 
 runs in 11.391s (so the D version is quite a bit *slower* than 
 Perl's). While using gdc (Debian 4.9.2-10), I am able to run it 
 in 3.988s (only about 3x faster than Perl's version).

 I understand that string processing (concatenation, allocation) 
 is quite optimized in Perl, I was wondering if the D version 
 could still be sped up significantly?
Main problem is with allocations and with stripLeft, here is my version which is 10x faster than perls even with DMD. With LDC is 12x faster import std.stdio; import std.array : appender; import std.range; auto fmttable(T)(T table) { auto res = appender!(string)(); res.reserve(64); if (table.length == 0) return ""; // column widths auto widths = new int[](table[0].length); foreach (rownum, row; table) { foreach (colnum, cell; row) { if (cell.length > widths[colnum]) widths[colnum] = cast(int)cell.length; } } foreach (row; table) { res.put("|"); foreach (colnum, cell; row) { int l = widths[colnum] - cast(int)cell.length; res.put(cell); if (l) res.put(' '.repeat().take(l)); res.put("|"); } res.put("\n"); } return res.data; } void main() { auto table = [ ["row1.1", "row1.2 ", "row1.3"], ["row2.1", "row2.2", "row2.3"], ["row3.1", "row3.2", "row3.3 "], ["row4.1", "row4.2", "row4.3"], ["row5.1", "row5.2", "row5.3"], ]; write(fmttable(table)); for (int i=0; i < 1000000; ++i) { fmttable(table); } }
Nov 12 2015
parent reply Daniel Kozak via Digitalmars-d-learn <digitalmars-d-learn puremagic.com> writes:
V Thu, 12 Nov 2015 09:12:32 +0000
Daniel Kozak via Digitalmars-d-learn
<digitalmars-d-learn puremagic.com> napsáno:

 On Wednesday, 11 November 2015 at 13:32:00 UTC, perlancar wrote:
 Here's my first non-hello-world D program, which is a direct 
 translation from the Perl version. I was trying to get a feel 
 about D's performance:

 ...

 While I am quite impressed with how easy I was able to write D, 
 I am not so impressed with the performance. Using rdmd (build 
 20151103), the D program runs in 17.127s while the Perl version 
 runs in 11.391s (so the D version is quite a bit *slower* than 
 Perl's). While using gdc (Debian 4.9.2-10), I am able to run it 
 in 3.988s (only about 3x faster than Perl's version).

 I understand that string processing (concatenation, allocation) 
 is quite optimized in Perl, I was wondering if the D version 
 could still be sped up significantly?  
Main problem is with allocations and with stripLeft, here is my version which is 10x faster than perls even with DMD. With LDC is 12x faster import std.stdio; import std.array : appender; import std.range; auto fmttable(T)(T table) { auto res = appender!(string)(); res.reserve(64); if (table.length == 0) return ""; // column widths auto widths = new int[](table[0].length); foreach (rownum, row; table) { foreach (colnum, cell; row) { if (cell.length > widths[colnum]) widths[colnum] = cast(int)cell.length; } } foreach (row; table) { res.put("|"); foreach (colnum, cell; row) { int l = widths[colnum] - cast(int)cell.length; res.put(cell); if (l) res.put(' '.repeat().take(l)); res.put("|"); } res.put("\n"); } return res.data; } void main() { auto table = [ ["row1.1", "row1.2 ", "row1.3"], ["row2.1", "row2.2", "row2.3"], ["row3.1", "row3.2", "row3.3 "], ["row4.1", "row4.2", "row4.3"], ["row5.1", "row5.2", "row5.3"], ]; write(fmttable(table)); for (int i=0; i < 1000000; ++i) { fmttable(table); } }
or with ~ operator: import std.stdio; auto fmttable(string[][] table) { import std.array : appender, uninitializedArray; import std.range : take, repeat; import std.exception : assumeUnique; auto res = appender(uninitializedArray!(char[])(128)); res.clear(); if (table.length == 0) return ""; // column widths auto widths = new int[](table[0].length); foreach (rownum, row; table) { foreach (colnum, cell; row) { if (cell.length > widths[colnum]) widths[colnum] = cast(int)cell.length; } } foreach (row; table) { res ~= "|"; foreach (colnum, cell; row) { int l = widths[colnum] - cast(int)cell.length; res ~= cell; if (l) res ~= ' '.repeat().take(l); res ~= "|"; } res.put("\n"); } return res.data.assumeUnique(); } void main() { auto table = [ ["row1.1", "row1.2 ", "row1.3"], ["row2.1", "row2.2", "row2.3"], ["row3.1", "row3.2", "row3.3 "], ["row4.1", "row4.2", "row4.3"], ["row5.1", "row5.2", "row5.3"], ]; write(fmttable(table)); for (int i=0; i < 1000000; ++i) { fmttable(table); } }
Nov 12 2015
parent reply Tobias Pankrath <tobias pankrath.net> writes:
 or with ~ operator:

 import std.stdio;

 [...]
Did anyone check that the last loop isn't optimized out? Could also be improved further if you make the function take an output range and reuse one appender for every call, but that might be to far off the original perl solution.
Nov 12 2015
parent Daniel Kozak via Digitalmars-d-learn <digitalmars-d-learn puremagic.com> writes:
V Thu, 12 Nov 2015 11:03:38 +0000
Tobias Pankrath via Digitalmars-d-learn
<digitalmars-d-learn puremagic.com> napsáno:

 or with ~ operator:

 import std.stdio;

 [...]  
Did anyone check that the last loop isn't optimized out?
Yes, it is not optimized out
 Could also be improved further if you make the function take an
 output range and reuse one appender for every call, but that might be
 to far off the original perl solution.
I agree, that would be to far off the original solution.
Nov 12 2015