www.digitalmars.com         C & C++   DMDScript  

digitalmars.D.ldc - D-specific optimisation: opCmp

opCmp is a pretty roundabout way of dealing with comparisons from 
a computational point of view and optimisers seem quite bad at 
dealing with it. For example, based on 
https://github.com/D-Programming-Language/phobos/pull/3927 :

% cat comparisons.d
float opCmp(float a, float b)
{
     return a < b ? -1 : a > b ? +1 : a == b ? 0 : float.nan;
}

int gt(float a, float b) {
     return opCmp(a, b) > 0;
}

int gte(float a, float b) {
     return opCmp(a, b) >= 0;
}

int lt(float a, float b) {
     return opCmp(a, b) < 0;
}

int lte(float a, float b) {
     return opCmp(a, b) <= 0;
}

int gt_direct(float a, float b) {
     return a > b;
}

int gte_direct(float a, float b) {
     return a >= b;
}

int lt_direct(float a, float b) {
     return a < b;
}

int lte_direct(float a, float b) {
     return a <= b;
}
% ldmd2 -O -inline -release -output-s comparisons.d
% cat comparisons.s
	.section	__TEXT,__text,regular,pure_instructions
	.section	__TEXT,__literal4,4byte_literals
	.align	2
LCPI0_0:
	.long	3212836864
LCPI0_1:
	.long	1065353216
LCPI0_2:
	.long	2143289344
	.section	__TEXT,__text,regular,pure_instructions
	.globl	__D11comparisons5opCmpFffZf
	.align	4, 0x90
__D11comparisons5opCmpFffZf:
	.cfi_startproc
	ucomiss	%xmm1, %xmm0
	jbe	LBB0_2
	movss	LCPI0_0(%rip), %xmm1
	movaps	%xmm1, %xmm0
	retq
LBB0_2:
	ucomiss	%xmm0, %xmm1
	jbe	LBB0_5
	movss	LCPI0_1(%rip), %xmm1
	movaps	%xmm1, %xmm0
	retq
LBB0_5:
	cmpeqss	%xmm0, %xmm1
	movss	LCPI0_2(%rip), %xmm0
	andnps	%xmm0, %xmm1
	movaps	%xmm1, %xmm0
	retq
	.cfi_endproc

	.globl	__D11comparisons2gtFffZi
	.align	4, 0x90
__D11comparisons2gtFffZi:
	.cfi_startproc
	xorl	%eax, %eax
	ucomiss	%xmm1, %xmm0
	ja	LBB1_3
	movl	$1, %eax
	ucomiss	%xmm0, %xmm1
	ja	LBB1_3
	xorl	%eax, %eax
LBB1_3:
	retq
	.cfi_endproc

	.globl	__D11comparisons3gteFffZi
	.align	4, 0x90
__D11comparisons3gteFffZi:
	.cfi_startproc
	ucomiss	%xmm1, %xmm0
	jbe	LBB2_2
	xorl	%eax, %eax
	movzbl	%al, %eax
	retq
LBB2_2:
	ucomiss	%xmm0, %xmm1
	setae	%al
	movzbl	%al, %eax
	retq
	.cfi_endproc

	.globl	__D11comparisons2ltFffZi
	.align	4, 0x90
__D11comparisons2ltFffZi:
	.cfi_startproc
	ucomiss	%xmm1, %xmm0
	seta	%al
	movzbl	%al, %eax
	retq
	.cfi_endproc

	.globl	__D11comparisons3lteFffZi
	.align	4, 0x90
__D11comparisons3lteFffZi:
	.cfi_startproc
	movb	$1, %al
	ucomiss	%xmm1, %xmm0
	ja	LBB4_2
	cmpeqss	%xmm0, %xmm1
	movd	%xmm1, %eax
	andl	$1, %eax
LBB4_2:
	movzbl	%al, %eax
	retq
	.cfi_endproc

	.globl	__D11comparisons9gt_directFffZi
	.align	4, 0x90
__D11comparisons9gt_directFffZi:
	.cfi_startproc
	ucomiss	%xmm0, %xmm1
	seta	%al
	movzbl	%al, %eax
	retq
	.cfi_endproc

	.globl	__D11comparisons10gte_directFffZi
	.align	4, 0x90
__D11comparisons10gte_directFffZi:
	.cfi_startproc
	ucomiss	%xmm0, %xmm1
	setae	%al
	movzbl	%al, %eax
	retq
	.cfi_endproc

	.globl	__D11comparisons9lt_directFffZi
	.align	4, 0x90
__D11comparisons9lt_directFffZi:
	.cfi_startproc
	ucomiss	%xmm1, %xmm0
	seta	%al
	movzbl	%al, %eax
	retq
	.cfi_endproc

	.globl	__D11comparisons10lte_directFffZi
	.align	4, 0x90
__D11comparisons10lte_directFffZi:
	.cfi_startproc
	ucomiss	%xmm1, %xmm0
	setae	%al
	movzbl	%al, %eax
	retq
	.cfi_endproc


See how much better the code-gen is for the direct 
implementations? It would be great if LDC was somehow able to get 
this right.
Jan 13