www.digitalmars.com         C & C++   DMDScript  

digitalmars.D.learn - Natural Sort optimzation

Spent the last hour trying to get a natural sort. Ended up having 
to create my own(not sure if it's 100% correct though but works 
in practice). The Rosetta one is not correct.

Here is my implementation, anyone care to perfect it?

// Compares the strings a and b numerically, e.g., "04038284" and 
"000002"  returns 1, the first is larger.
int compareStringNum(string a, string b)
{
	// Remove leading 0's
	a = a.stripLeft('0');
	b = b.stripLeft('0');
	if (a.length > b.length) return 1;
	if (a.length < b.length) return -1;
	auto m = a.length;
	for(int i = 0; i < m; i++)
	{
		auto qqx = a[i];
		auto qqy = b[i];
		// Assume ascii comparision holds, i.e., '0' < '1' < ... < '9'
		if (a[i] > b[i]) return 1;
		if (a[i] < b[i]) return -1;
	}

	return 0;
}

string[] naturalSort(string[] arr) /*pure  safe*/
{
	alias myComp = (x, y) {
		auto ml = min(x.length, y.length)-1;
		auto ofsx = -1; auto ofsy = -1;
		auto numx_found = -1;
		auto numy_found = -1;
		for(int i = 0; i < ml; i++)
		{
			ofsx++;
			ofsy++;

			// If we have not found any numbers(that are not the same) and 
we have a difference, the strings must be different, fall back on 
standard character comparer
			if (!isDigit(x[ofsx]) && !isDigit(y[ofsy]) && x[ofsx] != 
y[ofsy] && numx_found < 0 && numy_found < 0)
				return x[ofsx] > y[ofsy];

			// We are at a position in the string where either we are at 
the end of a digit sequence or the characters have been identical 
up to this point
			if (isDigit(x[ofsx]) && numx_found < 0) numx_found = ofsx;
			if (isDigit(y[ofsy]) && numy_found < 0) numy_found = ofsy;
		
			if (!isDigit(x[ofsx]) && !isDigit(y[ofsy]) && numx_found >= 0 
&& numy_found >= 0)
			{
				auto numx = x[numx_found..ofsx];
				auto numy = y[numy_found..ofsy];
				auto res = compareStringNum(numx, numy);		
				if (res != 0) return (res != 1);
			}

			if (!isDigit(x[ofsx]) && !isDigit(y[ofsy]))
			{
				numx_found = -1;
				numy_found = -1;
			} else
			{
				if (!isDigit(x[ofsx]) && numx_found >= 0) ofsx--;
				if (!isDigit(y[ofsy]) && numy_found >= 0) ofsy--;
			}

			
		}

		return x > y;
	};

	auto x = arr.sort!(myComp).release;
	return x;
}
Jun 30 2017