I try to make encryption / descryption in C language using Intel's AES New Instruction Set, to be more specific I try to do 256 bit AES with CBC mode.
I found C code at Intel's white paper here: https://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instructions-set-paper.pdf
This is code from Intel's white paper pdf (aes.h
):
#ifndef AES_H
#define AES_H
#include <stdio.h>
#include <stdint.h> //for int8_t
#include <string.h> //for memcmp
#include <wmmintrin.h> //for intrinsics for AES-NI
//compile using gcc and following arguments: -g;-O0;-Wall;-msse2;-msse;-march=native;-maes
#define ARRAY_SIZE(array) (sizeof(array) / sizeof(unsigned char))
inline void KEY_256_ASSIST_1(__m128i* temp1, __m128i * temp2)
{
__m128i temp4;
*temp2 = _mm_shuffle_epi32(*temp2, 0xff);
temp4 = _mm_slli_si128 (*temp1, 0x4);
*temp1 = _mm_xor_si128 (*temp1, temp4);
temp4 = _mm_slli_si128 (temp4, 0x4);
*temp1 = _mm_xor_si128 (*temp1, temp4);
temp4 = _mm_slli_si128 (temp4, 0x4);
*temp1 = _mm_xor_si128 (*temp1, temp4);
*temp1 = _mm_xor_si128 (*temp1, *temp2);
}
inline void KEY_256_ASSIST_2(__m128i* temp1, __m128i * temp3)
{
__m128i temp2,temp4;
temp4 = _mm_aeskeygenassist_si128 (*temp1, 0x0);
temp2 = _mm_shuffle_epi32(temp4, 0xaa);
temp4 = _mm_slli_si128 (*temp3, 0x4);
*temp3 = _mm_xor_si128 (*temp3, temp4);
temp4 = _mm_slli_si128 (temp4, 0x4);
*temp3 = _mm_xor_si128 (*temp3, temp4);
temp4 = _mm_slli_si128 (temp4, 0x4);
*temp3 = _mm_xor_si128 (*temp3, temp4);
*temp3 = _mm_xor_si128 (*temp3, temp2);
}
void AES_256_Key_Expansion(const unsigned char *userkey, unsigned char *key)
{
__m128i temp1, temp2, temp3;
__m128i *Key_Schedule = (__m128i*)key;
temp1 = _mm_loadu_si128((__m128i*)userkey);
temp3 = _mm_loadu_si128((__m128i*)(userkey+16));
Key_Schedule[0] = temp1;
Key_Schedule[1] = temp3;
temp2 = _mm_aeskeygenassist_si128 (temp3,0x01);
KEY_256_ASSIST_1(&temp1, &temp2);
Key_Schedule[2]=temp1;
KEY_256_ASSIST_2(&temp1, &temp3);
Key_Schedule[3]=temp3;
temp2 = _mm_aeskeygenassist_si128 (temp3,0x02);
KEY_256_ASSIST_1(&temp1, &temp2);
Key_Schedule[4]=temp1;
KEY_256_ASSIST_2(&temp1, &temp3);
Key_Schedule[5]=temp3;
temp2 = _mm_aeskeygenassist_si128 (temp3,0x04);
KEY_256_ASSIST_1(&temp1, &temp2);
Key_Schedule[6]=temp1;
KEY_256_ASSIST_2(&temp1, &temp3);
Key_Schedule[7]=temp3;
temp2 = _mm_aeskeygenassist_si128 (temp3,0x08);
KEY_256_ASSIST_1(&temp1, &temp2);
Key_Schedule[8]=temp1;
KEY_256_ASSIST_2(&temp1, &temp3);
Key_Schedule[9]=temp3;
temp2 = _mm_aeskeygenassist_si128 (temp3,0x10);
KEY_256_ASSIST_1(&temp1, &temp2);
Key_Schedule[10]=temp1;
KEY_256_ASSIST_2(&temp1, &temp3);
Key_Schedule[11]=temp3;
temp2 = _mm_aeskeygenassist_si128 (temp3,0x20);
KEY_256_ASSIST_1(&temp1, &temp2);
Key_Schedule[12]=temp1;
KEY_256_ASSIST_2(&temp1, &temp3);
Key_Schedule[13]=temp3;
temp2 = _mm_aeskeygenassist_si128 (temp3,0x40);
KEY_256_ASSIST_1(&temp1, &temp2);
Key_Schedule[14]=temp1;
}
void AES_CBC_encrypt(const unsigned char *in, unsigned char *out, unsigned char ivec[16], unsigned long length, unsigned char *key, int number_of_rounds)
{
__m128i feedback,data;
unsigned long i;
int j;
if (length%16)
length = length/16+1;
else length /=16;
feedback=_mm_loadu_si128 ((__m128i*)ivec);
for(i=0; i < length; i++){
data = _mm_loadu_si128 (&((__m128i*)in)[i]);
feedback = _mm_xor_si128 (data,feedback);
feedback = _mm_xor_si128 (feedback,((__m128i*)key)[0]);
for(j=1; j < number_of_rounds; j++)
{
feedback = _mm_aesenc_si128 (feedback,((__m128i*)key)[j]);
}
feedback = _mm_aesenclast_si128 (feedback,((__m128i*)key)[j]);
_mm_storeu_si128 (&((__m128i*)out)[i],feedback);
}
}
void AES_CBC_decrypt(const unsigned char *in, unsigned char *out, unsigned char ivec[16], unsigned long length, unsigned char *key, int number_of_rounds)
{
__m128i data,feedback,last_in;
unsigned long i;
int j;
if (length%16)
length = length/16+1;
else length /=16;
feedback=_mm_loadu_si128 ((__m128i*)ivec);
for(i=0; i < length; i++)
{
last_in=_mm_loadu_si128 (&((__m128i*)in)[i]);
data = _mm_xor_si128 (last_in,((__m128i*)key)[0]);
for(j=1; j < number_of_rounds; j++)
{
data = _mm_aesdec_si128 (data,((__m128i*)key)[j]);
}
data = _mm_aesdeclast_si128 (data,((__m128i*)key)[j]);
data = _mm_xor_si128 (data,feedback);
_mm_storeu_si128 (&((__m128i*)out)[i],data);
feedback=last_in;
}
}
#endif // AES_H
main.cpp
:
#include <iostream>
#include "aes.h"
using namespace std;
int main()
{
unsigned char ot[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64, 0x0, 0x0, 0x0, 0x0, 0x0 };
/*
* File
* 68656C6C6F20776F726C640A00000000 - ot
* 3AC1757A0C4FE816202D39BFBE74D680 - key
*/
unsigned char iv[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
unsigned char key[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11,
0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f };
unsigned char computed_cipher[16];
unsigned char computed_plain[16];
unsigned char key_schedule[16 * 16];
int size = 0;
AES_256_Key_Expansion(key, key_schedule);
AES_CBC_encrypt(ot, computed_cipher, iv, 16, key_schedule, 14);
size = ARRAY_SIZE(ot);
for (int i = 0; i < size; i++) {
printf("%02X", ot[i]);
}
printf("\n");
size = ARRAY_SIZE(computed_cipher);
for (int i = 0; i < size; i++) {
printf("%02X", computed_cipher[i]);
}
printf("\n");
AES_CBC_decrypt(computed_cipher, computed_plain, iv, 16, key_schedule, 14);
size = ARRAY_SIZE(computed_plain);
for (int i = 0; i < size; i++) {
printf("%02X", computed_plain[i]);
}
printf("\n");
return 0;
}
Encryption works right, I checked result with some online AES tools, but decrypted text is wrong, decrypted text should be content from variable ot
, but I get something like AE516492B1B62F2C7D1FCB906B11AE25
. Variable computed_plain
should be ot
plain text (original). Where can be problem?
You’ve used the same key expansion for encryption and decryption, but they’re not the same. See AES_set_decrypt_key
on page 48.
A shorter demonstration:
unsigned char key_schedule_decrypt[15 * 16];
((__m128i*)key_schedule_decrypt)[0] = ((__m128i const*)key_schedule)[14];
for (int j = 1; j < 14; j++) {
((__m128i*)key_schedule_decrypt)[j] = _mm_aesimc_si128(((__m128i const*)key_schedule)[14 - j]);
}
((__m128i*)key_schedule_decrypt)[14] = ((__m128i const*)key_schedule)[0];
AES_CBC_decrypt(computed_cipher, computed_plain, iv, 16, key_schedule_decrypt, 14);