cencryptionaesintelaes-ni

Wrong result from decryption using AES New Instruction Set


I try to make encryption / descryption in C language using Intel's AES New Instruction Set, to be more specific I try to do 256 bit AES with CBC mode.

I found C code at Intel's white paper here: https://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instructions-set-paper.pdf

This is code from Intel's white paper pdf (aes.h):

#ifndef AES_H
#define AES_H

#include <stdio.h>
#include <stdint.h>     //for int8_t
#include <string.h>     //for memcmp
#include <wmmintrin.h>  //for intrinsics for AES-NI
//compile using gcc and following arguments: -g;-O0;-Wall;-msse2;-msse;-march=native;-maes

#define ARRAY_SIZE(array) (sizeof(array) / sizeof(unsigned char))

inline void KEY_256_ASSIST_1(__m128i* temp1, __m128i * temp2)
{
    __m128i temp4;
    *temp2 = _mm_shuffle_epi32(*temp2, 0xff);
    temp4 = _mm_slli_si128 (*temp1, 0x4);
    *temp1 = _mm_xor_si128 (*temp1, temp4);
    temp4 = _mm_slli_si128 (temp4, 0x4);
    *temp1 = _mm_xor_si128 (*temp1, temp4);
    temp4 = _mm_slli_si128 (temp4, 0x4);
    *temp1 = _mm_xor_si128 (*temp1, temp4);
    *temp1 = _mm_xor_si128 (*temp1, *temp2);
}

inline void KEY_256_ASSIST_2(__m128i* temp1, __m128i * temp3)
{
    __m128i temp2,temp4;
    temp4 = _mm_aeskeygenassist_si128 (*temp1, 0x0);
    temp2 = _mm_shuffle_epi32(temp4, 0xaa);
    temp4 = _mm_slli_si128 (*temp3, 0x4);
    *temp3 = _mm_xor_si128 (*temp3, temp4);
    temp4 = _mm_slli_si128 (temp4, 0x4);
    *temp3 = _mm_xor_si128 (*temp3, temp4);
    temp4 = _mm_slli_si128 (temp4, 0x4);
    *temp3 = _mm_xor_si128 (*temp3, temp4);
    *temp3 = _mm_xor_si128 (*temp3, temp2);
}

void AES_256_Key_Expansion(const unsigned char *userkey, unsigned char *key)
{
    __m128i temp1, temp2, temp3;
    __m128i *Key_Schedule = (__m128i*)key;
    temp1 = _mm_loadu_si128((__m128i*)userkey);
    temp3 = _mm_loadu_si128((__m128i*)(userkey+16));
    Key_Schedule[0] = temp1;
    Key_Schedule[1] = temp3;
    temp2 = _mm_aeskeygenassist_si128 (temp3,0x01);
    KEY_256_ASSIST_1(&temp1, &temp2);
    Key_Schedule[2]=temp1;
    KEY_256_ASSIST_2(&temp1, &temp3);
    Key_Schedule[3]=temp3;
    temp2 = _mm_aeskeygenassist_si128 (temp3,0x02);
    KEY_256_ASSIST_1(&temp1, &temp2);
    Key_Schedule[4]=temp1;
    KEY_256_ASSIST_2(&temp1, &temp3);
    Key_Schedule[5]=temp3;
    temp2 = _mm_aeskeygenassist_si128 (temp3,0x04);
    KEY_256_ASSIST_1(&temp1, &temp2);
    Key_Schedule[6]=temp1;
    KEY_256_ASSIST_2(&temp1, &temp3);
    Key_Schedule[7]=temp3;
    temp2 = _mm_aeskeygenassist_si128 (temp3,0x08);
    KEY_256_ASSIST_1(&temp1, &temp2);
    Key_Schedule[8]=temp1;
    KEY_256_ASSIST_2(&temp1, &temp3);
    Key_Schedule[9]=temp3;
    temp2 = _mm_aeskeygenassist_si128 (temp3,0x10);
    KEY_256_ASSIST_1(&temp1, &temp2);
    Key_Schedule[10]=temp1;
    KEY_256_ASSIST_2(&temp1, &temp3);
    Key_Schedule[11]=temp3;
    temp2 = _mm_aeskeygenassist_si128 (temp3,0x20);
    KEY_256_ASSIST_1(&temp1, &temp2);
    Key_Schedule[12]=temp1;
    KEY_256_ASSIST_2(&temp1, &temp3);
    Key_Schedule[13]=temp3;
    temp2 = _mm_aeskeygenassist_si128 (temp3,0x40);
    KEY_256_ASSIST_1(&temp1, &temp2);
    Key_Schedule[14]=temp1;
}

void AES_CBC_encrypt(const unsigned char *in, unsigned char *out, unsigned char ivec[16], unsigned long length, unsigned char *key, int number_of_rounds)
{
    __m128i feedback,data;
    unsigned long i;
    int j;
    if (length%16)
        length = length/16+1;
    else length /=16;
        feedback=_mm_loadu_si128 ((__m128i*)ivec);
    for(i=0; i < length; i++){
        data = _mm_loadu_si128 (&((__m128i*)in)[i]);
        feedback = _mm_xor_si128 (data,feedback);
        feedback = _mm_xor_si128 (feedback,((__m128i*)key)[0]);
        for(j=1; j < number_of_rounds; j++)
        {
            feedback = _mm_aesenc_si128 (feedback,((__m128i*)key)[j]);
        }
        feedback = _mm_aesenclast_si128 (feedback,((__m128i*)key)[j]);
        _mm_storeu_si128 (&((__m128i*)out)[i],feedback);
    }
}

void AES_CBC_decrypt(const unsigned char *in, unsigned char *out, unsigned char ivec[16], unsigned long length, unsigned char *key, int number_of_rounds)
{
    __m128i data,feedback,last_in;
    unsigned long i;
    int j;

    if (length%16)
        length = length/16+1;
    else length /=16;
        feedback=_mm_loadu_si128 ((__m128i*)ivec);
    for(i=0; i < length; i++)
    {
        last_in=_mm_loadu_si128 (&((__m128i*)in)[i]);
        data = _mm_xor_si128 (last_in,((__m128i*)key)[0]);
        for(j=1; j < number_of_rounds; j++)
        {
            data = _mm_aesdec_si128 (data,((__m128i*)key)[j]);
        }
        data = _mm_aesdeclast_si128 (data,((__m128i*)key)[j]);
        data = _mm_xor_si128 (data,feedback);
        _mm_storeu_si128 (&((__m128i*)out)[i],data);
        feedback=last_in;
    }
}

#endif // AES_H

main.cpp:

#include <iostream>
#include "aes.h"

using namespace std;

int main()
{
    unsigned char ot[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64, 0x0, 0x0, 0x0, 0x0, 0x0 };

    /*
     *  File
     *  68656C6C6F20776F726C640A00000000 - ot
     *  3AC1757A0C4FE816202D39BFBE74D680 - key
     */

    unsigned char iv[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };

    unsigned char key[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11,
                            0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f };


    unsigned char computed_cipher[16];
    unsigned char computed_plain[16];
    unsigned char key_schedule[16 * 16];
    int size = 0;

    AES_256_Key_Expansion(key, key_schedule);

    AES_CBC_encrypt(ot, computed_cipher, iv, 16, key_schedule, 14);

    size = ARRAY_SIZE(ot);
    for (int i = 0; i < size; i++) {
        printf("%02X", ot[i]);
    }
    printf("\n");

    size = ARRAY_SIZE(computed_cipher);
    for (int i = 0; i < size; i++) {
        printf("%02X", computed_cipher[i]);
    }
    printf("\n");

    AES_CBC_decrypt(computed_cipher, computed_plain, iv, 16, key_schedule, 14);

    size = ARRAY_SIZE(computed_plain);
    for (int i = 0; i < size; i++) {
        printf("%02X", computed_plain[i]);
    }
    printf("\n");

    return 0;
}

Encryption works right, I checked result with some online AES tools, but decrypted text is wrong, decrypted text should be content from variable ot, but I get something like AE516492B1B62F2C7D1FCB906B11AE25. Variable computed_plain should be ot plain text (original). Where can be problem?


Solution

  • You’ve used the same key expansion for encryption and decryption, but they’re not the same. See AES_set_decrypt_key on page 48.

    A shorter demonstration:

    unsigned char key_schedule_decrypt[15 * 16];
    
    ((__m128i*)key_schedule_decrypt)[0] = ((__m128i const*)key_schedule)[14];
    
    for (int j = 1; j < 14; j++) {
        ((__m128i*)key_schedule_decrypt)[j] = _mm_aesimc_si128(((__m128i const*)key_schedule)[14 - j]);
    }
    
    ((__m128i*)key_schedule_decrypt)[14] = ((__m128i const*)key_schedule)[0];
    
    AES_CBC_decrypt(computed_cipher, computed_plain, iv, 16, key_schedule_decrypt, 14);