www.digitalmars.com         C & C++   DMDScript  

digitalmars.D - A brainfuck parser works now with newCTFE

reply Stefan Koch <uplink.coder googlemail.com> writes:
Hi Guys,

I just ran the first moderately complex ctfe function 
successfully through newCTFE!

This is the code that works now :

module bf_parser;

enum BFTokenEnum
{
     IncPtr = '>',
     DecPtr = '<',
     IncVal = '+',
     DecVal = '-',
     OutputVal = '.',
     InputVal = ',',
     LoopBegin = '[',
     LoopEnd = ']',

     ProgrammBegin,
     ProgrammEnd,
}

struct RepeatedToken
{
     uint _token;
     alias _token this;

      property BFTokenEnum token() const pure
     {
         return cast(BFTokenEnum)(_token >> 24);
     }

      property uint count() const pure
     {
         return _token & 0x00_FF_FF_FF;
     }
}

RepeatedToken Token(BFTokenEnum token) pure
{
     return cast(RepeatedToken)(token << 24 | 1);
}

const(RepeatedToken[]) parseBf(const string input) pure
{
     uint pos;
     RepeatedToken[] result;
     result.length = input.length + 2;
     // the maximal number of diffrent tokens is equal to the 
chars in the input
     // plus the begin and end token

     result[0] = Token(BFTokenEnum.ProgrammBegin);
     uint resultLen = 0;

     while (pos < input.length)
     {
         final switch (input[pos++]) with (BFTokenEnum)
         {
         case '>':
             if ((result[resultLen] & 0xFF_00_00_00) >> 24 == 
IncPtr)
             {
                 result[resultLen]++;
             }
             else
             {
                 result[++resultLen] = Token(IncPtr);
             }
             break;
         case '<':
             if ((result[resultLen] & 0xFF_00_00_00) >> 24 == 
DecPtr)
             {
                 result[resultLen]++;
             }
             else
             {
                 result[++resultLen] = Token(DecPtr);
             }
             break;
         case '+':
             if ((result[resultLen] & 0xFF_00_00_00) >> 24 == 
IncVal)
             {
                 result[resultLen]++;
             }
             else
             {
                 result[++resultLen] = Token(IncVal);
             }
             break;
         case '-':
             if ((result[resultLen] & 0xFF_00_00_00) >> 24 == 
DecVal)
             {
                 result[resultLen]++;
             }
             else
             {
                 result[++resultLen] = Token(DecVal);
             }
             break;
         case '.':
             if ((result[resultLen] & 0xFF_00_00_00) >> 24 == 
OutputVal)
             {
                 result[resultLen]++;
             }
             else
             {
                 result[++resultLen] = Token(OutputVal);
             }
             break;
         case ',':
             if ((result[resultLen] & 0xFF_00_00_00) >> 24 == 
InputVal)
             {
                 result[resultLen]++;
             }
             else
             {
                 result[++resultLen] = Token(InputVal);
             }
             break;
         case '[':
             if ((result[resultLen] & 0xFF_00_00_00) >> 24 == 
LoopBegin)
             {
                 result[resultLen]++;
             }
             else
             {
                 result[++resultLen] = Token(LoopBegin);
             }
             break;
         case ']':
             if ((result[resultLen] & 0xFF_00_00_00) >> 24 == 
LoopEnd)
             {
                 result[resultLen]++;
             }
             else
             {
                 result[++resultLen] = Token(LoopEnd);
             }
             break;
         case '\r':
             pos++;
             goto case '\n';
         case '\n':
             //TODO handle lines and proper position information;
             break;
         }
     }

     result[++resultLen] = Token(BFTokenEnum.ProgrammEnd);
     return result[0 .. resultLen + 1];
}

pragma(msg, parseBf("[,....]")[3].token);

It has been a year of work to get to this point.
And it might seem a little trivial.

But this is actually the a showcase of methods, slices, strings 
and structs interacting.
Jul 29
next sibling parent reply "H. S. Teoh via Digitalmars-d" <digitalmars-d puremagic.com> writes:
On Sun, Jul 30, 2017 at 01:15:50AM +0000, Stefan Koch via Digitalmars-d wrote:
 Hi Guys,
 
 I just ran the first moderately complex ctfe function successfully through
 newCTFE!
 
 This is the code that works now :
 
 module bf_parser;
[...]
 pragma(msg, parseBf("[,....]")[3].token);
 
 It has been a year of work to get to this point.
 And it might seem a little trivial.
 
 But this is actually the a showcase of methods, slices, strings and
 structs interacting.
Very nice indeed! Out of curiosity, how does the performance of newCTFE compare to the current CTFE with the above code? E.g., if you pass in a BF program of non-trivial length, what's the difference in performance? T -- Tech-savvy: euphemism for nerdy.
Jul 29
parent Stefan Koch <uplink.coder googlemail.com> writes:
On Sunday, 30 July 2017 at 01:20:52 UTC, H. S. Teoh wrote:
 On Sun, Jul 30, 2017 at 01:15:50AM +0000, Stefan Koch via 
 Digitalmars-d wrote:
 Hi Guys,
 
 I just ran the first moderately complex ctfe function 
 successfully through newCTFE!
 
 This is the code that works now :
 
 module bf_parser;
[...]
 pragma(msg, parseBf("[,....]")[3].token);
 
 It has been a year of work to get to this point.
 And it might seem a little trivial.
 
 But this is actually the a showcase of methods, slices, 
 strings and structs interacting.
Very nice indeed! Out of curiosity, how does the performance of newCTFE compare to the current CTFE with the above code? E.g., if you pass in a BF program of non-trivial length, what's the difference in performance? T
newCTFE currently is about 3.65x faster. (for a medium sized bf-program of 6467 bytes which is parsed into 3441 RepeatedTokens);
Jul 29
prev sibling parent Stefan Koch <uplink.coder googlemail.com> writes:
On Sunday, 30 July 2017 at 01:15:50 UTC, Stefan Koch wrote:
 [ ... ]
Shortly after posting this I came up with a more effective variant of the parseBf which reduces the generate bytecode from around 650 instructions to around 430 instructions. const(RepeatedToken[]) parseBf(const string input) pure { uint pos; RepeatedToken[] result; result.length = input.length + 2; // the maximal number of diffrent tokens is equal to the chars in the input // plus the begin and end token result[0] = Token(BFTokenEnum.ProgrammBegin); uint resultLen = 0; while (pos < input.length) { uint lastToken = (result[resultLen] >> 24); uint thisToken = BFTokenEnum.ProgrammEnd; final switch (input[pos++]) with (BFTokenEnum) { case '>': thisToken = IncPtr; break; case '<': thisToken = DecPtr; break; case '+': thisToken = IncVal; break; case '-': thisToken = DecVal; break; case '.': thisToken = OutputVal; break; case ',': thisToken = InputVal; break; case '[': thisToken = LoopBegin; break; case ']': thisToken = LoopEnd; break; case '\r': pos++; goto case '\n'; case '\n': //TODO handle lines and proper position information; break; } if (lastToken == thisToken) { result[resultLen]++; } else if (thisToken != BFTokenEnum.ProgrammEnd) { result[++resultLen] = Token(cast(BFTokenEnum)thisToken); } } result[++resultLen] = Token(BFTokenEnum.ProgrammEnd); return result[0 .. resultLen + 1]; } In theory it would be possible to get rid of the switch altogether ;) And compress the function ever more. But readability would suffer and we'd rely on the enum-members to correspond to the tokens, which maybe undesirable.
Jul 29