www.digitalmars.com         C & C++   DMDScript  

digitalmars.D.learn - How do I get names of regex captures during iteration? Populate AAs

Is there a way to get the name of a named capture when iterating 
over captures from a regular expression match?  I've looked at 
the std.regex code and it seems like "no" to my eyes, but I 
wonder if others here have... a way.

My original problem is this: I need to populate an associative 
array (AA) with all named captures that successfully matched 
during a regex match (and none of the captures that failed).  I 
was wondering what the best way to do this might be.

Thanks!

Please see comments in the below program for details and my 
current progress:

void main()
{
         import std.compiler;
         import std.regex;
         import std.range;
         import std.stdio;

         writefln("Compiler name:    %s", std.compiler.name);
         writefln("Compiler version: %s.%s", version_major, 
version_minor);
         writeln("");

         enum pattern = `(?P<var>\w+)\s*=\s*(?P<value>\d+)?;`;
         writefln("Regular expression: `%s`", pattern);
         writeln("");

         auto re = regex(pattern);

         auto c = matchFirst("a = 42;", re);
         reportCaptures(re, c);

         c = matchFirst("a = ;", re);
         reportCaptures(re, c);
}

void reportCaptures(Regex, RegexCaptures)(Regex re, RegexCaptures 
captures)
{
         import std.range;
         import std.regex;
         import std.stdio;

         writefln("Captures from matched string '%s'", 
captures[0]);

         string[string] captureList;

         // I am trying to read the captures from a regular 
expression match
         // into the above AA.
         //
         // ...
         //
         // This kind of works, but requires a string lookup for 
each capture
         // and using it in practice relies on undocumented 
behavior regarding
         // the return value of std.regex.Capture's 
opIndex[string] method
         // when the string index is a valid named capture that 
was not actually
         // captured during the match (ex: the named capture was 
qualified with
         // the ? operator or the * operator in the regex and 
never appeared in
         // the matched string).
         foreach( captureName; re.namedCaptures )
         {
                 auto capture = captures[captureName];
                 if ( capture is null )
                         writefln("  captures[%s] is null", 
captureName);
                 else if ( capture.empty )
                         writefln("  captures[%s] is empty", 
captureName);
                 else
                 {
                         writefln("  captures[%s] is '%s'", 
captureName, capture);
                         captureList[captureName] = capture;
                 }
         }

         writefln("Total captures: %s", captureList);

         /+
         // I really want to do something like this, instead:
         foreach( capture; captures )
                 captureList[capture.name] = capture.value;

         // And, in reality, it might need to be more like this:
         foreach( capture; captures )
                 foreach ( valueIndex, value; capture.values )
                         
captureList[format("%s-%s",capture.name,valueIndex)] = value;
         // Because, logically, named captures qualified with the
         // *, +, or {} operators in regular expressions may 
capture
         // multiple slices.

         writefln("Total captures: %s", captureList);
         +/

         writeln("");
}


//Output, DMD64 D Compiler v2.073.1:
//---
//
//Compiler name:    Digital Mars D
//Compiler version: 2.73
//
//Regular expression: `(?P<var>\w+)\s*=\s*(?P<value>\d+)?;`
//
//Captures from matched string 'a = 42;'
//  captures[value] is '42'
//  captures[var] is 'a'
//Total captures: ["value":"42", "var":"a"]
//
//Captures from matched string 'a = ;'
//  captures[value] is empty
//  captures[var] is 'a'
//Total captures: ["var":"a"]
Feb 28