www.digitalmars.com         C & C++   DMDScript  

digitalmars.D - std.conv with hex strings

reply "John C" <johnch_atms hotmail.com> writes:
The string to integer routines in std.conv don't convert strings with 
hexadecimal characters embedded. Neither do they take a radix base. I 
recently needed this functionality, and provide the source below, based on 
the existing std.conv code, for those who wish to use it. It should be easy 
to replace toUint, toLong, toUlong etc. If you spot any bugs, please let me 
know.

int toInt(char[] value, int fromBase = 10) {
    if (value.length == 0)
        goto L_ERR;

    uint startIndex;
    if (fromBase == 16 && value.length >= 2 && (value[0 .. 1] == "0x" || 
value[0 .. 1] == "0X"))
        startIndex = 2;

    int sign, n;

    for (uint i = startIndex; i < value.length; i++) {
        char ch = value[i];
        if (ch >= '0' && ch <= '9') {
            uint n1 = n;
            n = n * fromBase + (ch - '0');
            if (cast(uint)n < n1) goto L_OVERFLOW;
        }
        else if (ch >= 'A' && ch <= 'F') {
            uint n1 = n;
            n = n * fromBase + (ch - 'A' + 10);
            if (cast(uint)n < n1) goto L_OVERFLOW;
        }
        else if (ch >= 'a' && ch <= 'f') {
            uint n1 = n;
            n = n * fromBase + (ch - 'a' + 10);
            if (cast(uint)n < n1) goto L_OVERFLOW;
        }
        else if (fromBase == 10 && ch == '-' && i == 0) {
            sign = -1;
            if (value.length == 1) goto L_ERR;
        }
        else if (fromBase == 10 && ch == '+' && i == 0)  {
            if (value.length == 1) goto L_ERR;
        }
        else
            goto L_ERR;
    }
    if (sign == -1) {
        if (cast(uint)n > 0x80000000)
            goto L_OVERFLOW;
        n = -n;
    }
    else if (cast(uint)n > 0x7FFFFFFF)
        goto L_OVERFLOW;

    return n;

    L_OVERFLOW:
        throw new ConvOverflowError(value);

    L_ERR:
        throw new ConvError(value);

    return 0;
}
unittest {
    int i = toInt("0x89", 16);
    assert(i == 0x89);
} 
Oct 18 2005
parent reply BCS <BCS_member pathlink.com> writes:
1) no bounds checking in base 10:

#int main()
#{
#	printf("%d\n", toInt("ff", 10));
#	return 0;
#}

prints "165"

2) doesn't work for radix > 16

fix (maybe)
#else if (ch >= 'A' && (ch-'F'+10)<=fromBase) ...


3) doesn't work for radix < 10

fix (maybe)
#if (ch >= '0' && (ch-'9') <= fromBase) ...


In article <dj2no2$27jb$1 digitaldaemon.com>, John C says...
The string to integer routines in std.conv don't convert strings with 
hexadecimal characters embedded. Neither do they take a radix base. I 
recently needed this functionality, and provide the source below, based on 
the existing std.conv code, for those who wish to use it. It should be easy 
to replace toUint, toLong, toUlong etc. If you spot any bugs, please let me 
know.

int toInt(char[] value, int fromBase = 10) {
    if (value.length == 0)
        goto L_ERR;

    uint startIndex;
    if (fromBase == 16 && value.length >= 2 && (value[0 .. 1] == "0x" || 
value[0 .. 1] == "0X"))
        startIndex = 2;

    int sign, n;

    for (uint i = startIndex; i < value.length; i++) {
        char ch = value[i];
        if (ch >= '0' && ch <= '9') {
            uint n1 = n;
            n = n * fromBase + (ch - '0');
            if (cast(uint)n < n1) goto L_OVERFLOW;
        }
        else if (ch >= 'A' && ch <= 'F') {
            uint n1 = n;
            n = n * fromBase + (ch - 'A' + 10);
            if (cast(uint)n < n1) goto L_OVERFLOW;
        }
        else if (ch >= 'a' && ch <= 'f') {
            uint n1 = n;
            n = n * fromBase + (ch - 'a' + 10);
            if (cast(uint)n < n1) goto L_OVERFLOW;
        }
        else if (fromBase == 10 && ch == '-' && i == 0) {
            sign = -1;
            if (value.length == 1) goto L_ERR;
        }
        else if (fromBase == 10 && ch == '+' && i == 0)  {
            if (value.length == 1) goto L_ERR;
        }
        else
            goto L_ERR;
    }
    if (sign == -1) {
        if (cast(uint)n > 0x80000000)
            goto L_OVERFLOW;
        n = -n;
    }
    else if (cast(uint)n > 0x7FFFFFFF)
        goto L_OVERFLOW;

    return n;

    L_OVERFLOW:
        throw new ConvOverflowError(value);

    L_ERR:
        throw new ConvError(value);

    return 0;
}
unittest {
    int i = toInt("0x89", 16);
    assert(i == 0x89);
} 

Oct 18 2005
parent "John C" <johnch_atms hotmail.com> writes:
"BCS" <BCS_member pathlink.com> wrote in message 
news:dj35pf$2jba$1 digitaldaemon.com...
 1) no bounds checking in base 10:

 #int main()
 #{
 # printf("%d\n", toInt("ff", 10));
 # return 0;
 #}

 prints "165"

 2) doesn't work for radix > 16

 fix (maybe)
 #else if (ch >= 'A' && (ch-'F'+10)<=fromBase) ...


 3) doesn't work for radix < 10

 fix (maybe)
 #if (ch >= '0' && (ch-'9') <= fromBase) ...

You're quite right. I'm going to rework it to support radices of 2, 8, 10 and 16, which should be enough for most usages.
 In article <dj2no2$27jb$1 digitaldaemon.com>, John C says...
The string to integer routines in std.conv don't convert strings with
hexadecimal characters embedded. Neither do they take a radix base. I
recently needed this functionality, and provide the source below, based on
the existing std.conv code, for those who wish to use it. It should be 
easy
to replace toUint, toLong, toUlong etc. If you spot any bugs, please let 
me
know.

int toInt(char[] value, int fromBase = 10) {
    if (value.length == 0)
        goto L_ERR;

    uint startIndex;
    if (fromBase == 16 && value.length >= 2 && (value[0 .. 1] == "0x" ||
value[0 .. 1] == "0X"))
        startIndex = 2;

    int sign, n;

    for (uint i = startIndex; i < value.length; i++) {
        char ch = value[i];
        if (ch >= '0' && ch <= '9') {
            uint n1 = n;
            n = n * fromBase + (ch - '0');
            if (cast(uint)n < n1) goto L_OVERFLOW;
        }
        else if (ch >= 'A' && ch <= 'F') {
            uint n1 = n;
            n = n * fromBase + (ch - 'A' + 10);
            if (cast(uint)n < n1) goto L_OVERFLOW;
        }
        else if (ch >= 'a' && ch <= 'f') {
            uint n1 = n;
            n = n * fromBase + (ch - 'a' + 10);
            if (cast(uint)n < n1) goto L_OVERFLOW;
        }
        else if (fromBase == 10 && ch == '-' && i == 0) {
            sign = -1;
            if (value.length == 1) goto L_ERR;
        }
        else if (fromBase == 10 && ch == '+' && i == 0)  {
            if (value.length == 1) goto L_ERR;
        }
        else
            goto L_ERR;
    }
    if (sign == -1) {
        if (cast(uint)n > 0x80000000)
            goto L_OVERFLOW;
        n = -n;
    }
    else if (cast(uint)n > 0x7FFFFFFF)
        goto L_OVERFLOW;

    return n;

    L_OVERFLOW:
        throw new ConvOverflowError(value);

    L_ERR:
        throw new ConvError(value);

    return 0;
}
unittest {
    int i = toInt("0x89", 16);
    assert(i == 0x89);
}


Oct 19 2005