www.digitalmars.com         C & C++   DMDScript  

digitalmars.D.learn - an example of parallel calculation of metrics

reply Jay Norwood <jayn prismnet.com> writes:
This is something I'm playing with for work. We do this a lot, 
capture counter events for some number of on-chip performance 
counters, compute some metrics, display the outputs. This seems 
ideal for the application.

import std.algorithm, std.parallelism, std.range;
import std.stdio;
import std.datetime;
import std.typecons;
import std.meta;

// define some input measurement sample tuples and output metric 
tuples
alias TI = Tuple!(long, long, long, long, long);
alias TO = Tuple!(long, long, long, long);

// various metric definitions
// the Tuples could also define names for each member and use the 
names here in the metrics.
long met1( TI m){ return m[0] + m[1] + m[2]; }
long met2( TI m){ return m[1] + m[2] + m[3]; }
long met3( TI m){ return m[0] - m[1] + m[2]; }
long met4( TI m){ return m[0] + m[1] - m[2]; }

// a convenience to use all the metrics above as a list
alias Metrics = AliasSeq!(met1,met2,met3,met4);

void main(string[] argv)
{
	auto samples = iota(1_000);
	auto meas = new TI[samples.length];
	auto results = new TO[samples.length];

	// Initialize some values for the measured samples
	foreach(i, ref m; meas){
		m[0] = i;
		m[1] = i+1;
		m[2] = i+2;
		m[3] = i+3;
		m[4] = i+4;
	}

	std.datetime.StopWatch sw;
	sw.start();

     ref TI getTerm(int i)
     {
         return meas[i];
     }

	// compute the metric results for the above measured sample 
values in parallel
	taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results);

	// how long did this take
	long exec_ms = sw.peek().msecs;
	writeln("results:", results);
	writeln("time:", exec_ms);

}
Sep 30 2015
parent reply Jay Norwood <jayn prismnet.com> writes:
On Wednesday, 30 September 2015 at 22:24:25 UTC, Jay Norwood 
wrote:
 // various metric definitions
 // the Tuples could also define names for each member and use 
 the names here in the metrics.
 long met1( TI m){ return m[0] + m[1] + m[2]; }
 long met2( TI m){ return m[1] + m[2] + m[3]; }
 long met3( TI m){ return m[0] - m[1] + m[2]; }
 long met4( TI m){ return m[0] + m[1] - m[2]; }
should use reference parameters here: long met1( ref TI m){ return m[0] + m[1] + m[2]; } long met2( ref TI m){ return m[1] + m[2] + m[3]; } long met3( ref TI m){ return m[0] - m[1] + m[2]; } long met4( ref TI m){ return m[0] + m[1] - m[2]; }
Sep 30 2015
parent reply Jay Norwood <jayn prismnet.com> writes:
This compiles and appears to execute correctly, but if I 
uncomment the taskPool line I get a compile error message about 
wrong buffer type.  Am I breaking some rule for 
std.parallelism.amap?

import std.algorithm, std.parallelism, std.range;
import std.stdio;
import std.datetime;
import std.typecons;
import std.meta;

// define some input measurement sample tuples and output metric 
tuples
alias TR = Tuple!(double,"per_sec", double, "per_cycle", 
long,"raw");
alias TI = Tuple!(long, "proc_cyc", long, "DATA_RD", long, 
"DATA_WR", long, "INST_FETCH", long, "L1I_MISS", long, "L1I_HIT", 
long,"L1D_HIT", long, "L1D_MISS");
alias TO = Tuple!(TR,"L1_MISS", TR, "L1_HIT", TR,"DATA_ACC", 
TR,"ALL_ACC");
const double CYC_PER_SEC = 1_600_000_000;

// various metric definitions
// using Tuples with defined names for each member, and use the 
names here in the metrics.
TR met_l1_miss ( ref TI m){ TR rv; with(rv) with(m) { raw = 
L1I_MISS+L1D_MISS; per_cycle = cast(double)raw/proc_cyc; per_sec 
= per_cycle*CYC_PER_SEC;} return rv; }
TR met_l1_hit ( ref TI m){ TR rv; with(rv) with(m) { raw = 
L1I_HIT+L1D_HIT; per_cycle = cast(double)raw/proc_cyc; per_sec = 
per_cycle*CYC_PER_SEC;} return rv; }
TR met_data_acc ( ref TI m){ TR rv; with(rv) with(m) { raw = 
DATA_RD+DATA_WR; per_cycle = cast(double)raw/proc_cyc; per_sec = 
per_cycle*CYC_PER_SEC;} return rv; }
TR met_all_acc( ref TI m){ TR rv; with(rv) with(m) { raw = 
DATA_RD+DATA_WR+INST_FETCH; per_cycle = cast(double)raw/proc_cyc; 
per_sec = per_cycle*CYC_PER_SEC;} return rv; }

// a convenience to use all the metrics above as a list
alias Metrics = 
AliasSeq!(met_l1_miss,met_l1_hit,met_data_acc,met_all_acc);

void main(string[] argv)
{
	auto samples = iota(1_00);
	auto meas = new TI[samples.length];
	auto results = new TO[samples.length];

	// Initialize some values for the measured samples
	foreach(i, ref m; meas){
		with(m){ proc_cyc = 1_000_000+i*2; DATA_RD = 1000+i; DATA_WR= 
2000+i; INST_FETCH=proc_cyc/2;
		        L1I_HIT= INST_FETCH-100; L1I_MISS=100;
				L1D_HIT= DATA_RD+DATA_WR - 200; L1D_MISS=200;}
	}

	std.datetime.StopWatch sw;
	sw.start();

     ref TI getTerm(int i)
     {
         return meas[i];
     }

	// compute the metric results for the above measured sample 
values in parallel
	//taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results);

	TR rv1 = met_l1_miss( meas[0]);
	TR rv2 = met_l1_hit( meas[0]);
	TR rv3 = met_data_acc( meas[0]);
	TR rv4 = met_all_acc( meas[0]);

	// how long did this take
	long exec_ms = sw.peek().msecs;
	writeln("measurements:", meas[0]);
	writeln("rv1:", rv1);
	writeln("rv2:", rv2);
	writeln("rv3:", rv3);
	writeln("rv4:", rv4);
	writeln("results:", results[1]);
	writeln("time:", exec_ms);

}
Sep 30 2015
parent reply =?UTF-8?Q?Ali_=c3=87ehreli?= <acehreli yahoo.com> writes:
On 09/30/2015 09:15 PM, Jay Norwood wrote:

 alias TO = Tuple!(TR,"L1_MISS", TR, "L1_HIT", TR,"DATA_ACC", 
TR,"ALL_ACC"); Looks like a bug. Workaround: Get rid of member names there: alias TO = Tuple!(TR, TR, TR, TR);
 
//taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results); For some reason, having member names prevents 'results' passing one of amap's requirements. The following check in std.parallelism thinks that 'results' does not have random assignable elements if it is a Tuple with member names. else static if(randAssignable!(Args[$ - 1]) && Args.length > 1) { static assert(0, "Wrong buffer type."); } Ali
Oct 01 2015
parent reply Jay Norwood <jayn prismnet.com> writes:
On Thursday, 1 October 2015 at 07:03:40 UTC, Ali Çehreli wrote:
 Looks like a bug. Workaround: Get rid of member names
Thanks. My particular use case, working with metric expressions, is easier to understand if I use the names. I converted the use of Tuple to struct to see if I could get an easier error msg. Turns out the use of struct also results in much cleaner writeln text. Still has the compile error, though. import std.algorithm, std.parallelism, std.range; import std.stdio; import std.datetime; import std.typecons; import std.meta; // define some input measurement sample tuples and output metric tuples struct TR {double per_sec; double per_cycle; long raw;} struct TI {long proc_cyc; long DATA_RD; long DATA_WR; long INST_FETCH; long L1I_MISS; long L1I_HIT; long L1D_HIT; long L1D_MISS;} struct TO { TR L1_MISS; TR L1_HIT; TR DATA_ACC; TR ALL_ACC;} const double CYC_PER_SEC = 1_600_000_000; // various metric definitions // using Tuples with defined names for each member, and use the names here in the metrics. TR met_l1_miss ( ref TI m){ TR rv; with(rv) with(m) { raw = L1I_MISS+L1D_MISS; per_cycle = cast(double)raw/proc_cyc; per_sec = per_cycle*CYC_PER_SEC;} return rv; } TR met_l1_hit ( ref TI m){ TR rv; with(rv) with(m) { raw = L1I_HIT+L1D_HIT; per_cycle = cast(double)raw/proc_cyc; per_sec = per_cycle*CYC_PER_SEC;} return rv; } TR met_data_acc ( ref TI m){ TR rv; with(rv) with(m) { raw = DATA_RD+DATA_WR; per_cycle = cast(double)raw/proc_cyc; per_sec = per_cycle*CYC_PER_SEC;} return rv; } TR met_all_acc( ref TI m){ TR rv; with(rv) with(m) { raw = DATA_RD+DATA_WR+INST_FETCH; per_cycle = cast(double)raw/proc_cyc; per_sec = per_cycle*CYC_PER_SEC;} return rv; } // a convenience to use all the metrics above as a list alias Metrics = AliasSeq!(met_l1_miss,met_l1_hit,met_data_acc,met_all_acc); void main(string[] argv) { auto samples = iota(1_00); auto meas = new TI[samples.length]; auto results = new TO[samples.length]; // Initialize some values for the measured samples foreach(i, ref m; meas){ with(m){ proc_cyc = 1_000_000+i*2; DATA_RD = 1000+i; DATA_WR= 2000+i; INST_FETCH=proc_cyc/2; L1I_HIT= INST_FETCH-100; L1I_MISS=100; L1D_HIT= DATA_RD+DATA_WR - 200; L1D_MISS=200;} } std.datetime.StopWatch sw; sw.start(); ref TI getTerm(int i) { return meas[i]; } // compute the metric results for the above measured sample values in parallel taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results); TR rv1 = met_l1_miss( meas[0]); TR rv2 = met_l1_hit( meas[0]); TR rv3 = met_data_acc( meas[0]); TR rv4 = met_all_acc( meas[0]); // how long did this take long exec_ms = sw.peek().msecs; writeln("measurements:", meas[0]); writeln("rv1:", rv1); writeln("rv2:", rv2); writeln("rv3:", rv3); writeln("rv4:", rv4); writeln("results:", results[1]); writeln("time:", exec_ms); }
Oct 01 2015
parent reply =?UTF-8?Q?Ali_=c3=87ehreli?= <acehreli yahoo.com> writes:
On 10/01/2015 08:56 AM, Jay Norwood wrote:

 Thanks.  My particular use case, working with metric expressions, is
 easier to understand if I use the names.
Makes sense. Please open a bug at least for investigation why tuples with named members don't work with amap.
 I converted the use of Tuple
 to struct to see if I could get an easier error msg. Turns out the use
 of struct also results in much cleaner writeln text.

 Still has the compile error, though.
We have to live with the fact that amap and friends produce a Tuple result if there are multiple functions. A struct won't work. However, if you prove to yourself that the result tuple and your struct have the same memory layout, you can cast the tuple slice to struct slice after calling amap: alias TO_for_amap_result = Tuple!(TR, TR, TR, TR); struct TO { TR L1_MISS; TR L1_HIT; TR DATA_ACC; TR ALL_ACC;} // ... auto results_for_amap = new TO_for_amap_result[samples.length]; // ... taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results_for_amap); auto results = cast(TO[])results_for_amap; // Use 'results' from this point on... Ali
Oct 01 2015
next sibling parent Jay Norwood <jayn prismnet.com> writes:
On Thursday, 1 October 2015 at 18:08:31 UTC, Ali Çehreli wrote:
 Makes sense. Please open a bug at least for investigation why 
 tuples with named members don't work with amap.
ok, thanks. I opened the issue. https://issues.dlang.org/show_bug.cgi?id=15134
Oct 01 2015
prev sibling parent reply Jay Norwood <jayn prismnet.com> writes:
On Thursday, 1 October 2015 at 18:08:31 UTC, Ali Çehreli wrote:
 However, if you prove to yourself that the result tuple and 
 your struct have the same memory layout, you can cast the tuple 
 slice to struct slice after calling amap:
After re-reading your explanation, I see that the problem is only that the results needs to be a Tuple. It works with named tuple members in this example as the result and array of struct as the input. I'll re-check if the multi-member result also works with named members. I'll update the issue report. import std.meta; import std.stdio; // define some input measurement sample tuples and output metric tuples struct TI {long L1I_MISS; long L1D_MISS; } alias TO = Tuple!(long, "raw"); // various metric definitions // using Tuples with defined names for each member, and use the names here in the metrics. TO met_l1_miss ( ref TI m){ TO rv; rv.raw = m.L1I_MISS+m.L1D_MISS; return rv; } // a convenience to use all the metrics above as a list alias Metrics = AliasSeq!(met_l1_miss); void main(string[] argv) { auto samples = iota(100); auto meas = new TI[samples.length]; auto results = new TO[samples.length]; // Initialize some values for the measured samples foreach(i, ref m; meas){ m.L1D_MISS= 100+i; m.L1I_MISS=100-i; } ref TI getTerm(int i) { return meas[i]; } // compute the metric results for the above measured sample values in parallel taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results); TO rv1 = met_l1_miss( meas[1]); writeln("measurements:", meas[1]); writeln("rv1:", rv1); writeln("results:", results[1]); }
Oct 01 2015
parent reply Jay Norwood <jayn prismnet.com> writes:
So, this is a condensed version of the original problem. It looks 
like the problem is that the return value for taskPool.amap can't 
be a tuple of tuples or a tuple of struct.  Either way, it fails 
with the Wrong buffer type error message if I uncomment the 
taskPool line

import std.algorithm, std.parallelism, std.range;
import std.typecons;
import std.meta;
import std.stdio;

// define some input measurement sample tuples and output metric 
tuples

struct TR { long raw; double per_cyc;}
//alias TR = Tuple!(long, "raw", double, "per_cyc");
alias TI = Tuple!(long, "L1I_MISS",long, "L1D_MISS", long, 
"L1D_READ", long, "L1D_WRITE", long, "cycles" );
alias TO = Tuple!(TR, "L1_MISS", TR, "L1D_ACCESS");

// various metric definitions
// using Tuples with defined names for each member, and use the 
names here in the metrics.
TR met_l1_miss ( ref TI m){ TR rv;  rv.raw = 
m.L1I_MISS+m.L1D_MISS;  rv.per_cyc = cast(double)rv.raw/m.cycles; 
return rv; }
TR met_l1_access ( ref TI m){ TR rv;  rv.raw = 
m.L1D_READ+m.L1D_WRITE;  rv.per_cyc = 
cast(double)rv.raw/m.cycles; return rv; }

// a convenience to use all the metrics above as a list
alias Metrics = AliasSeq!(met_l1_miss, met_l1_access);

void main(string[] argv)
{
	auto samples = iota(100);
	auto meas = new TI[samples.length];
	auto results = new TO[samples.length];

	// Initialize some values for the measured samples
	foreach(i, ref m; meas){
		m.L1D_MISS= 100+i; m.L1I_MISS=100-i;
		m.L1D_READ= 200+i; m.L1D_WRITE=200-i;
		m.cycles= 10+i;
	}

     ref TI getTerm(int i)
     {
         return meas[i];
     }

	// compute the metric results for the above measured sample 
values in parallel
	//taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results);

	TR rv1 = met_l1_miss( meas[1]);
	TR rv2 = met_l1_access( meas[1]);

	writeln("measurements:", meas[1]);
	writeln("rv1:", rv1);
	writeln("rv2:", rv2);
	writeln("results:", results[1]);

}
Oct 01 2015
parent reply Jay Norwood <jayn prismnet.com> writes:
I re-submitted this as:
https://issues.dlang.org/show_bug.cgi?id=15135
Oct 01 2015
parent Jay Norwood <jayn prismnet.com> writes:
This is another attempt with the metric parallel processing. This 
uses the results only to return an int value, which could be used 
later as an error return value.  The metric value locations are 
now allocated as a part of the input measurement values tuple.

The Tuple vs struct definitions seem to have a big difference in 
default output formatting.


import std.algorithm, std.parallelism, std.range;
import std.typecons;
import std.meta;
import std.stdio;

// define some input measurement sample tuples and output metric 
tuples
alias TR = Tuple!(long,"raw",double, "per_cycle");
//struct TR {long raw; double per_cycle;}
alias TO = Tuple!(TR, "l1_miss", TR, "l1_access" );
//struct TO {TR l1_miss; TR l1_access; };
alias TI = Tuple!(long, "L1I_MISS",long, "L1D_MISS", long, 
"L1D_READ", long, "L1D_WRITE", long, "cycles", TO, "res");

// various metric definitions
// using Tuples with defined names for each member, and use the 
names here in the metrics.
long met_l1_miss ( ref TI m){  return m.L1I_MISS + m.L1D_MISS; }
long met_l1_access ( ref TI m){  return  m.L1D_READ + 
m.L1D_WRITE; }

int met_all (ref TI m) {

	with (m.res){
	 l1_miss.raw = met_l1_miss(m);
	 l1_access.raw = met_l1_access(m);
	 l1_miss.per_cycle =  (m.cycles == 0)? double.nan : l1_miss.raw 
/ cast(double)m.cycles;
	 l1_access.per_cycle = (m.cycles == 0)? double.nan : 
l1_access.raw / cast(double)m.cycles;
	}
	return 0;
}

// a convenience to use all the metrics above as a list
alias Metrics = AliasSeq!(met_all);

void main(string[] argv)
{
	auto samples = iota(100);
	auto meas = new TI[samples.length];
	auto results = new int[samples.length];

	// Initialize some values for the measured samples
	foreach(i, ref m; meas){
		m.L1D_MISS= 100+i; m.L1I_MISS=100-i;
		m.L1D_READ= 200+i; m.L1D_WRITE=200-i;
		m.cycles= 10+i;
	}

     ref TI getTerm(int i)
     {
         return meas[i];
     }

	// compute the metric results for the above measured sample 
values in parallel
	taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results);


	writeln("measurements:", meas[1]);
	foreach(ref m; meas){
		writeln(m.res);
	}

}
Oct 01 2015