www.digitalmars.com         C & C++   DMDScript  

digitalmars.D.ldc - struggling with inline assembler

reply "salsa" <salsa nomail.org> writes:
I'm trying to use Intel's AES instruction set for AES encryption. 
The following piece of code works well with DMD2 but won't 
compile with LDC. ldc2 tells me this:

Basic Block in function 
'_D4main48__T21AES_128_KEY_EXPANSIONVAyaa7_656e6372797074Z21AES_128_KEY_EXPANS
ONFNaNbNiNexPhPhZv' 
does not have terminator!
label %endentry
LLVM ERROR: Broken function found, compilation aborted!

Flow control in the asm block might be the problem.

By the way, how could I access arrays (ubyte[]) instead of 
pointers in inline assembly? Couldn't find a single piece of 
documentation...
I preferrably omit the use of 'naked' assembler functions. I 
tried to do it as in biguintx86.d but was confused with the 
calling conventions. Registers are used in reverse order compared 
to the C calling convention, aren't they?

******************

module main;

import std.stdio;
import core.cpuid;


void main(string[] args)
{

	assert(sse2 && aes, "hardware does not support sse2 and aes!");


	// test vectors
	immutable ubyte[16] plaintext = cast(const 
ubyte[])x"6bc1bee22e409f96e93d7e117393172a";
	immutable ubyte[16] ciphertext = cast(const 
ubyte[])x"3ad77bb40d7a3660a89ecaf32466ef97";
	immutable ubyte[16] userKey = cast(const 
ubyte[])x"2b7e151628aed2a6abf7158809cf4f3c";

	ubyte[16*11] keySchedule;	// buffer for key schedule
	AES_128_KEY_EXPANSION!"encrypt"(userKey.ptr, 
keySchedule.ptr);	// initialize encryption key schedule

	ubyte[16] buffer;
	AES_128_ENCRYPT(keySchedule.ptr, plaintext.ptr, buffer.ptr);	// 
encrypt one 128 bit block


	assert(buffer == ciphertext, "aes encryption failed");
	writeln("200 OK");

}

/// AES128 11 round encryption
/// Params:
/// key = 11*16 byte key schedule
/// plain = 16 bytes plaintext
/// ciphertext = at least 16 bytes output buffer
void AES_128_ENCRYPT(in ubyte* key, in ubyte* plain, ubyte* 
ciphertext)
in {
	//assert(key.length == 16*ROUNDS, "invalid key size");
	//assert(plain.length == 16, "invalid input block size");
	//assert(ciphertext.length >= 16, "output buffer too small");
}
body {

	asm {
		mov RDX, key;	// pointer to key schedule
		// load key into XMM0-XMM10
		lddqu XMM0, [RDX+0x00];
		lddqu XMM1, [RDX+0x10];
		lddqu XMM2, [RDX+0x20];
		lddqu XMM3, [RDX+0x30];
		lddqu XMM4, [RDX+0x40];
		lddqu XMM5, [RDX+0x50];
		lddqu XMM6, [RDX+0x60];
		lddqu XMM7, [RDX+0x70];
		lddqu XMM8, [RDX+0x80];
		lddqu XMM9, [RDX+0x90];
		lddqu XMM10, [RDX+0xA0];

		// load plaintext into XMM15

		mov RDX, plain;	// pointer to plaintext
		movdqu XMM15, [RDX];	// read plaintext block

		// AES-128 encryption sequence.
		// The data block is in XMM15.
		// Registers XMM0–XMM10 hold the round keys(from 0 to 10 in 
this order).
		// In the end, XMM15 holds the encryption result.
		pxor XMM15, XMM0; // Whitening step (Round 0)
		aesenc XMM15, XMM1; // Round 1
		aesenc XMM15, XMM2; // Round 2
		aesenc XMM15, XMM3; // Round 3
		aesenc XMM15, XMM4; // Round 4
		aesenc XMM15, XMM5; // Round 5
		aesenc XMM15, XMM6; // Round 6
		aesenc XMM15, XMM7; // Round 7
		aesenc XMM15, XMM8; // Round 8
		aesenc XMM15, XMM9; // Round 9
		aesenclast XMM15, XMM10; // Round 10

		mov RDX, ciphertext;	// pointer to output buffer
		movdqu [RDX], XMM15;	// write processed data to buffer
	}
}


///
/// Expand a 128 bit user key into 11 round keys
///
/// source: 
http://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instruc
ions-set-paper.pdf, 
Figure 19. AES-128 Key Expansion: Outlined Code Example
///
/// Params:
///
/// decrypt = generate decryption key if set to true. default: 
false
///
/// userKey = the AES key as given by the user
/// key = 11 round keys
///
///
enum ROUNDS = 11;
 trusted
public void AES_128_KEY_EXPANSION(string mode = "encrypt")(in 
ubyte* userKey, ubyte* key) nothrow  nogc
	if(mode == "encrypt" || mode == "decrypt")
	in {
		//assertHardwareSupport();
		//assert(userKey.length == 16, "invalid key size");
		//assert(key.length == ROUNDS*16, "invalid key schedule size");
	}
body {
	
	
	asm  {

		mov RDX, userKey; // pointer to user key
		movdqu XMM1, [RDX]; // read user key
		
		xor RCX, RCX; // set index to 0
		
		mov RDX, key;	// pointer to working key
		movdqu [RDX+RCX], XMM1;
		add	RCX, 0x10; // increment by 16 bytes
		
		aeskeygenassist XMM2, XMM1, 0x01;
		call aes_128_assist;
		aeskeygenassist XMM2, XMM1, 0x02;
		call aes_128_assist;
		aeskeygenassist XMM2, XMM1, 0x04;
		call aes_128_assist;
		aeskeygenassist XMM2, XMM1, 0x08;
		call aes_128_assist;
		aeskeygenassist XMM2, XMM1, 0x10;
		call aes_128_assist;
		aeskeygenassist XMM2, XMM1, 0x20;
		call aes_128_assist;
		aeskeygenassist XMM2, XMM1, 0x40;
		call aes_128_assist;
		aeskeygenassist XMM2, XMM1, 0x80;
		call aes_128_assist;
		aeskeygenassist XMM2, XMM1, 0x1b;
		call aes_128_assist;
		aeskeygenassist XMM2, XMM1, 0x36;
		call aes_128_assist;
		
	}
	static if(mode == "decrypt") {
		asm {
			// generate inverse key
			call aesimc128;
		}
	} asm {
		
		jmp END;
		
	aes_128_assist:
		pshufd XMM2, XMM2, 0xff;
		
		//vpslldq XMM3, XMM1, 0x4; // vpslldq requires AVX, pslldq 
requires only SSE2
		movdqu XMM3, XMM1;
		pslldq XMM3, 0x4;
		
		pxor XMM1, XMM3;
		
		//vpslldq XMM3, XMM1, 0x4;
		movdqu XMM3, XMM1;
		pslldq XMM3, 0x4;
		
		pxor XMM1, XMM3;
		
		//vpslldq XMM3, XMM1, 0x4;
		movdqu XMM3, XMM1;
		pslldq XMM3, 0x4;
		
		pxor XMM1, XMM3;
		pxor XMM1, XMM2;
		
		mov RDX, key;	// pointer to working key
		movdqu [RDX+RCX], XMM1; // store result in keySchedule
		add RCX, 0x10; // increment index by 16 bytes

		ret; // end of key_expansion_128
		
		//
		// do aesimc for all except the first and the last round key
		//
	aesimc128:
		
		mov RDX, key;			// pointer to key output buffer
		add RDX, 0x10;			// dont modify first key
		
		mov RCX, ROUNDS-2;		// set counter to number of rounds - 2
		
	LOOP:
		movdqu XMM1, [RDX];		// load
		aesimc XMM1, XMM1;		// invert
		movdqu [RDX], XMM1;		// store
		
		add RDX, 0x10;			// increment pointer
		
		loop LOOP;				// loop rounds-2 times
		
		ret;
		
		// end aesimc128
		
	END:
		;
	}
}
Apr 09 2015
parent reply "Kai Nacke" <kai redstar.de> writes:
On Thursday, 9 April 2015 at 15:31:34 UTC, salsa wrote:
 I'm trying to use Intel's AES instruction set for AES 
 encryption. The following piece of code works well with DMD2 
 but won't compile with LDC. ldc2 tells me this:

 Basic Block in function 
 '_D4main48__T21AES_128_KEY_EXPANSIONVAyaa7_656e6372797074Z21AES_128_KEY_EXPANS
ONFNaNbNiNexPhPhZv' 
 does not have terminator!
 label %endentry
 LLVM ERROR: Broken function found, compilation aborted!

 Flow control in the asm block might be the problem.
Hi salsa! The function has several asm { .. } blocks. Jumping between these blocks is not supported by ldc. A possible workaround could be to load the target address into a register and do an indirect call. You could also write a mixin for the aes_128_assist and aesimc128 subroutines and replace the calls with the assembler text. Regards, Kai
Apr 09 2015
parent "salsa" <salsa nomail.org> writes:
Thanks! Replaced the 'static if' with a simple assembler branch.
Apr 09 2015