digitalmars.D - Re: Few ideas to reduce template bloat
- bearophile <bearophileHUGS lycos.com> Mar 26 2010
I have asked to llvm devs, it seems llvm already has an optimization pass that removes duplicated functions. But it's disabled on default, you can use it with LDC with "-mergefunc". Its implementation, probably written by nlewycky: https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_25/lib/Transforms/IPO/MergeFunctions.cpp -------------------- I have done few tests in D1 with LDC, the first one, here -mergefunc (and no other optimizations) leaves a single function: import tango.stdc.stdio: printf; import tango.stdc.stdlib: atoi; int factorial1(int X) { if (X == 0) return 1; return X*factorial1(X-1); } int factorial2(int X) { if (X == 0) return 1; return X*factorial2(X-1); } void main(char[][] args) { printf("%d\n", factorial1(atoi(args[1].ptr))); printf("%d\n", factorial2(atoi(args[1].ptr))); } -------------------- Second test I've done: import tango.stdc.stdio: printf; import tango.stdc.stdlib: atoi; T factorial(T)(T x) { if (x == 0) return 1; else return x * factorial(x - 1); } void main(char[][] args) { printf("%d\n", factorial(atoi(args[1].ptr))); printf("%d\n", factorial(cast(uint)atoi(args[1].ptr))); } The resulting asm with -mergefunc shows some duplication left (a person says this is a bug that can be fixed, I have to talk with another developer): __unnamed_1: subl $4, %esp movl %eax, (%esp) testl %eax, %eax jne .LBB2_2 movl $1, %eax addl $4, %esp ret .LBB2_2: movl (%esp), %eax decl %eax call factorial!uint imull (%esp), %eax addl $4, %esp ret factorial!int: subl $4, %esp call __unnamed_1 addl $4, %esp ret factorial!uint: subl $4, %esp call __unnamed_1 addl $4, %esp ret _Dmain: subl $28, %esp movl 36(%esp), %eax movl %eax, 20(%esp) movl 32(%esp), %eax movl %eax, 16(%esp) cmpl $1, 16(%esp) jbe .LBB1_3 movl 20(%esp), %eax movl 12(%eax), %eax movl %eax, (%esp) call atoi call factorial!int movl %eax, 4(%esp) movl $.str, (%esp) call printf cmpl $1, 16(%esp) jbe .LBB1_4 movl 20(%esp), %eax movl 12(%eax), %eax movl %eax, (%esp) call atoi call factorial!uint movl %eax, 4(%esp) movl $.str2, (%esp) call printf xorl %eax, %eax addl $28, %esp ret $8 .LBB1_3: movl .modulefilename+4, %eax movl %eax, 4(%esp) movl .modulefilename, %eax movl %eax, (%esp) movl $12, 8(%esp) call _d_array_bounds .LBB1_4: movl .modulefilename+4, %eax movl %eax, 4(%esp) movl .modulefilename, %eax movl %eax, (%esp) movl $13, 8(%esp) call _d_array_bounds ---------------------------- My third experiment: import tango.stdc.stdio: printf; import tango.stdc.stdlib: atoi; int factorial1(int X) { if (X == 0) return 1; return X*factorial1(X-1); } int factorial2(int X) { if (X == 0) return 1; return X*factorial2(X-1); } int caller(int function(int) func, int x) { return func(x); } void main() { printf("%d\n", caller(&factorial1, atoi("10"))); printf("%d\n", caller(&factorial2, atoi("10"))); } Its asm without mergefunc: _D5temp310factorial1FiZi: subl $4, %esp movl %eax, (%esp) movl (%esp), %eax cmpl $0, %eax jne .LBB1_2 movl $1, %eax addl $4, %esp ret .LBB1_2: movl (%esp), %eax subl $1, %eax call _D5temp310factorial1FiZi movl %eax, %ecx movl (%esp), %eax imull %ecx, %eax addl $4, %esp ret _D5temp310factorial2FiZi: subl $4, %esp movl %eax, (%esp) movl (%esp), %eax cmpl $0, %eax jne .LBB2_2 movl $1, %eax addl $4, %esp ret .LBB2_2: movl (%esp), %eax subl $1, %eax call _D5temp310factorial2FiZi movl %eax, %ecx movl (%esp), %eax imull %ecx, %eax addl $4, %esp ret _D5temp36callerFPFiZiiZi: subl $12, %esp movl 16(%esp), %ecx movl %ecx, 8(%esp) movl %eax, 4(%esp) movl 8(%esp), %ecx movl 4(%esp), %eax call *%ecx addl $12, %esp ret $4 _Dmain: subl $12, %esp leal .str1, %eax movl %eax, (%esp) call atoi movl $_D5temp310factorial1FiZi, (%esp) call _D5temp36callerFPFiZiiZi subl $4, %esp movl $.str, (%esp) movl %eax, 4(%esp) call printf leal .str3, %eax movl %eax, (%esp) call atoi movl $_D5temp310factorial2FiZi, (%esp) call _D5temp36callerFPFiZiiZi subl $4, %esp movl $.str2, (%esp) movl %eax, 4(%esp) call printf xorl %eax, %eax addl $12, %esp ret $8 Its asm with mergefunc: _D5temp310factorial1FiZi: subl $4, %esp movl %eax, (%esp) testl %eax, %eax jne .LBB1_2 movl $1, %eax addl $4, %esp ret .LBB1_2: movl (%esp), %eax decl %eax call _D5temp310factorial1FiZi imull (%esp), %eax addl $4, %esp ret _D5temp310factorial2FiZi: subl $4, %esp call _D5temp310factorial1FiZi addl $4, %esp ret _D5temp36callerFPFiZiiZi: subl $12, %esp movl 16(%esp), %ecx movl %ecx, 8(%esp) movl %eax, 4(%esp) call *8(%esp) addl $12, %esp ret $4 .size _D5temp36callerFPFiZiiZi, .-_D5temp36callerFPFiZiiZi _Dmain: subl $12, %esp movl $.str1, (%esp) call atoi movl $_D5temp310factorial1FiZi, (%esp) call _D5temp36callerFPFiZiiZi subl $4, %esp movl %eax, 4(%esp) movl $.str, (%esp) call printf movl $.str3, (%esp) call atoi movl $_D5temp310factorial2FiZi, (%esp) call _D5temp36callerFPFiZiiZi subl $4, %esp movl %eax, 4(%esp) movl $.str2, (%esp) call printf xorl %eax, %eax addl $12, %esp ret $8 llvm is not able to replace all function pointers factorial2 with the ones to factorial1, so turns factorial2 into a stump that just calls the factorial1. In C/D you can compare function pointers so in this case LLVM has done what it can. But can't here llvm replace factorial2 with just a jump instruction? I am not expert enough yet to answer this. If someone has an answer for me I'd like to hear it :-) Bye, bearophile
Mar 26 2010