www.digitalmars.com         C & C++   DMDScript  

D - problems with ?/{0,1} in RegExp

reply "Mike Wynn" <mike.wynn l8night.co.uk> writes:
I'm having grief from the RegExp libs,
I'm tryng to match -- item [optional item] item
eg "(a)(b)?(c)"  ()'s used so I can use replace( "$1" ); later

but this will match to "acc" but I can not use $2 or $3 in a replace
statement.

also I want to do ..
"(a)((b|c)?(d|e)*)(f) and use 'a' and 'f' (which might be say '\S+\s*\{' and
'}\s*\S+')
ideally with the following replace symantics (see end of message for the
current o/p)
$1 -> (a)
$2 -> ((b|c)?(d|e)*)
$3 -> (f)
even if $2 == ""
and either another regexp has to be run over $2 or
$21 -> (b|c)?
$22 -> (d|e)*

I'm not 100% used to regexps I've always avoided them so am a bit unsure
what the regexp experts would be expecting; I would be happy if I could use
$1 -> (a)
$2 -> ((b|c)?(d|e)*)
$3 -> (b|c)?
$4 -> (d|e)*
$5 -> (f)

the code I've been trying is this

import regexp;
import string;

char[] convInner( RegExp form, char[] inp )
{
 if ( form.test( inp ) != 0 )
 {
  char[] one = "1";
  char[] two = "2";
  char[] three = "3";
  one = form.replace( "$1" );
// comment out theses lines to get this to not throw an exception
  two = form.replace( "$2" );
  three = form.replace( "$3" );
  return "Transfromed '"~one~"' + '"~two~"' + '"~three~"';";
 }
 return "NO TRANSFORM "~inp;
}

void myConvert1( char[] inp )
{
 char[] rv;
 rv = convInner( new RegExp( '^(a)(b){0,1}(c*)', null ), inp );
 printf( "conv1) %s -> %s\n", (char *)inp, (char *)rv );
}

void myConvert2( char[] inp )
{
 char[] rv;
 rv = convInner( new RegExp( '^(a)((b){0,1})(c*)', null ), inp );
 printf( "conv2) %s -> %s\n", (char *)inp, (char *)rv );
}

void myConvert3( char[] inp )
{
 char[] rv;
 rv = convInner( new RegExp( '^(a)(b)?(c*)', null ), inp );
 printf( "conv3) %s -> %s\n", (char *)inp, (char *)rv );
}

void myConvert4( char[] inp )
{
 char[] rv;
 rv = convInner( new RegExp( '^(a)((b)?)(c*)', null ), inp );
 printf( "conv4) %s -> %s\n", (char *)inp, (char *)rv );
}

int main( char[][] args )
{
 char[] str1 = "abcc";
 char[] str2 = "acc";

 myConvert1( str1 );
 myConvert2( str1 );
 myConvert3( str1 );
 myConvert4( str1 );

 myConvert1( str2 );
 myConvert2( str2 );
 myConvert3( str2 );
 myConvert4( str2 );
 return 0;
}

which outputs
conv1) abcc -> Transfromed 'a' + 'b' + 'cc';
conv2) abcc -> Transfromed 'a' + 'b' + 'b';
conv3) abcc -> Transfromed 'a' + 'b' + 'cc';
conv4) abcc -> Transfromed 'a' + 'b' + 'b';
Error: ArrayBoundsError regexp(2396)
Oct 25 2002
parent reply "Walter" <walter digitalmars.com> writes:
I'll take a look. -Walter

"Mike Wynn" <mike.wynn l8night.co.uk> wrote in message
news:apc16h$c6r$1 digitaldaemon.com...
 I'm having grief from the RegExp libs,
 I'm tryng to match -- item [optional item] item
 eg "(a)(b)?(c)"  ()'s used so I can use replace( "$1" ); later

 but this will match to "acc" but I can not use $2 or $3 in a replace
 statement.

 also I want to do ..
 "(a)((b|c)?(d|e)*)(f) and use 'a' and 'f' (which might be say '\S+\s*\{'

 '}\s*\S+')
 ideally with the following replace symantics (see end of message for the
 current o/p)
 $1 -> (a)
 $2 -> ((b|c)?(d|e)*)
 $3 -> (f)
 even if $2 == ""
 and either another regexp has to be run over $2 or
 $21 -> (b|c)?
 $22 -> (d|e)*

 I'm not 100% used to regexps I've always avoided them so am a bit unsure
 what the regexp experts would be expecting; I would be happy if I could

 $1 -> (a)
 $2 -> ((b|c)?(d|e)*)
 $3 -> (b|c)?
 $4 -> (d|e)*
 $5 -> (f)

 the code I've been trying is this

 import regexp;
 import string;

 char[] convInner( RegExp form, char[] inp )
 {
  if ( form.test( inp ) != 0 )
  {
   char[] one = "1";
   char[] two = "2";
   char[] three = "3";
   one = form.replace( "$1" );
 // comment out theses lines to get this to not throw an exception
   two = form.replace( "$2" );
   three = form.replace( "$3" );
   return "Transfromed '"~one~"' + '"~two~"' + '"~three~"';";
  }
  return "NO TRANSFORM "~inp;
 }

 void myConvert1( char[] inp )
 {
  char[] rv;
  rv = convInner( new RegExp( '^(a)(b){0,1}(c*)', null ), inp );
  printf( "conv1) %s -> %s\n", (char *)inp, (char *)rv );
 }

 void myConvert2( char[] inp )
 {
  char[] rv;
  rv = convInner( new RegExp( '^(a)((b){0,1})(c*)', null ), inp );
  printf( "conv2) %s -> %s\n", (char *)inp, (char *)rv );
 }

 void myConvert3( char[] inp )
 {
  char[] rv;
  rv = convInner( new RegExp( '^(a)(b)?(c*)', null ), inp );
  printf( "conv3) %s -> %s\n", (char *)inp, (char *)rv );
 }

 void myConvert4( char[] inp )
 {
  char[] rv;
  rv = convInner( new RegExp( '^(a)((b)?)(c*)', null ), inp );
  printf( "conv4) %s -> %s\n", (char *)inp, (char *)rv );
 }

 int main( char[][] args )
 {
  char[] str1 = "abcc";
  char[] str2 = "acc";

  myConvert1( str1 );
  myConvert2( str1 );
  myConvert3( str1 );
  myConvert4( str1 );

  myConvert1( str2 );
  myConvert2( str2 );
  myConvert3( str2 );
  myConvert4( str2 );
  return 0;
 }

 which outputs
 conv1) abcc -> Transfromed 'a' + 'b' + 'cc';
 conv2) abcc -> Transfromed 'a' + 'b' + 'b';
 conv3) abcc -> Transfromed 'a' + 'b' + 'cc';
 conv4) abcc -> Transfromed 'a' + 'b' + 'b';
 Error: ArrayBoundsError regexp(2396)

Oct 25 2002
parent reply "Mike Wynn" <mike.wynn l8night.co.uk> writes:
as an aside, I had a look around the Perl site for info on Regexp's (its one
thing Perl's very good at)
and found http://dev.perl.org/rfc/360.html
regexps are slow (in comparison to arithmetic ops) so having Objects rather
than char[] returned would not represent a major performance hit.
( semi c++ pseudo code to show ownership)
i.e.

RegExpElement [] Regexp::Eval( char[] str ); // runs the regexp over the
string, can reuse the Regexp object
char[] RegExpElement::toString(); // get the "value" of the () element
RegExpElement RegExpElement::next(); // get the next if the group had a
postfix of *,+,? or {}
RegExpElement[] RegExpElement::chlidren(); // get the child groups from the
regexp

e.g
RegExp rex = new RegExp( "(a)*(b+)(c+(d+)(e)*)f" );

then
rex.Eval( "aabbbcdddef" );
would return
[
    "a" :next-> "a" :next-> null
    "bbb"
    "cdde" <children> [
        "dd"
        "e" :next-> null;
    ]
]

then
rex.Eval( "bbcdf" );
would return
[
    MT :next-> null
    "bb"
    "cd" <children> [
        "d"
        MT :next-> null;
    ]
]

i'm not sure if MT should be "" or null or a special 'empty' Element.


"Walter" <walter digitalmars.com> wrote in message
news:apcrb4$17d1$1 digitaldaemon.com...
 I'll take a look. -Walter

 "Mike Wynn" <mike.wynn l8night.co.uk> wrote in message
 news:apc16h$c6r$1 digitaldaemon.com...
 I'm having grief from the RegExp libs,
 I'm tryng to match -- item [optional item] item
 eg "(a)(b)?(c)"  ()'s used so I can use replace( "$1" ); later

 but this will match to "acc" but I can not use $2 or $3 in a replace
 statement.

 also I want to do ..
 "(a)((b|c)?(d|e)*)(f) and use 'a' and 'f' (which might be say '\S+\s*\{'

 '}\s*\S+')
 ideally with the following replace symantics (see end of message for the
 current o/p)
 $1 -> (a)
 $2 -> ((b|c)?(d|e)*)
 $3 -> (f)
 even if $2 == ""
 and either another regexp has to be run over $2 or
 $21 -> (b|c)?
 $22 -> (d|e)*

 I'm not 100% used to regexps I've always avoided them so am a bit unsure
 what the regexp experts would be expecting; I would be happy if I could

 $1 -> (a)
 $2 -> ((b|c)?(d|e)*)
 $3 -> (b|c)?
 $4 -> (d|e)*
 $5 -> (f)

 the code I've been trying is this

 import regexp;
 import string;

 char[] convInner( RegExp form, char[] inp )
 {
  if ( form.test( inp ) != 0 )
  {
   char[] one = "1";
   char[] two = "2";
   char[] three = "3";
   one = form.replace( "$1" );
 // comment out theses lines to get this to not throw an exception
   two = form.replace( "$2" );
   three = form.replace( "$3" );
   return "Transfromed '"~one~"' + '"~two~"' + '"~three~"';";
  }
  return "NO TRANSFORM "~inp;
 }

 void myConvert1( char[] inp )
 {
  char[] rv;
  rv = convInner( new RegExp( '^(a)(b){0,1}(c*)', null ), inp );
  printf( "conv1) %s -> %s\n", (char *)inp, (char *)rv );
 }

 void myConvert2( char[] inp )
 {
  char[] rv;
  rv = convInner( new RegExp( '^(a)((b){0,1})(c*)', null ), inp );
  printf( "conv2) %s -> %s\n", (char *)inp, (char *)rv );
 }

 void myConvert3( char[] inp )
 {
  char[] rv;
  rv = convInner( new RegExp( '^(a)(b)?(c*)', null ), inp );
  printf( "conv3) %s -> %s\n", (char *)inp, (char *)rv );
 }

 void myConvert4( char[] inp )
 {
  char[] rv;
  rv = convInner( new RegExp( '^(a)((b)?)(c*)', null ), inp );
  printf( "conv4) %s -> %s\n", (char *)inp, (char *)rv );
 }

 int main( char[][] args )
 {
  char[] str1 = "abcc";
  char[] str2 = "acc";

  myConvert1( str1 );
  myConvert2( str1 );
  myConvert3( str1 );
  myConvert4( str1 );

  myConvert1( str2 );
  myConvert2( str2 );
  myConvert3( str2 );
  myConvert4( str2 );
  return 0;
 }

 which outputs
 conv1) abcc -> Transfromed 'a' + 'b' + 'cc';
 conv2) abcc -> Transfromed 'a' + 'b' + 'b';
 conv3) abcc -> Transfromed 'a' + 'b' + 'cc';
 conv4) abcc -> Transfromed 'a' + 'b' + 'b';
 Error: ArrayBoundsError regexp(2396)


Oct 26 2002
parent "Walter" <walter digitalmars.com> writes:
"Mike Wynn" <mike.wynn l8night.co.uk> wrote in message
news:apdtc6$2no8$1 digitaldaemon.com...
 as an aside, I had a look around the Perl site for info on Regexp's (its

 thing Perl's very good at)
 and found http://dev.perl.org/rfc/360.html

D regexp's are equivalent to the javascript functionality, but I agree that Perl has gone way beyond that!
Oct 26 2002