delphiassembly64-bitaccess-violationbasm

Why do I get an access violation when porting this assembly code to x64?


I'am using this component http://sourceforge.net/projects/tponguard/ and now I need to compile in 64bit. I'm stuck in this assembly.

It was like this:

  push esi
  push edi

  mov  esi, eax         //esi = Mem1
  mov  edi, edx         //edi = Mem2

  push ecx              //save byte count
  shr  ecx, 2           //convert to dwords
  jz   @Continue

  cld
@Loop1:                 //xor dwords at a time
  mov  eax, [edi]
  xor  [esi], eax
  add  esi, 4
  add  edi, 4
  dec  ecx
  jnz  @Loop1

@Continue:              //handle remaining bytes (3 or less)
  pop  ecx
  and  ecx, 3
  jz   @Done

@Loop2:                 //xor remaining bytes
  mov  al, [edi]
  xor  [esi], al
  inc  esi
  inc  edi
  dec  ecx
  jnz  @Loop2

@Done:
  pop  edi
  pop  esi

And I changed to this:

  push rsi
  push rdi

  mov  rsi, rax         //esi = Mem1
  mov  rdi, rdx         //edi = Mem2

  push rcx              //save byte count
  shr  rcx, 2           //convert to dwords
  jz   @Continue

  cld
@Loop1:                 //xor dwords at a time
  mov  rax, [rdi]
  xor  [rsi], rax
  add  rsi, 4
  add  rdi, 4
  dec  rcx
  jnz  @Loop1

@Continue:              //handle remaining bytes (3 or less)
  pop  rcx
  and  rcx, 3
  jz   @Done

@Loop2:                 //xor remaining bytes
  mov  al, [rdi]
  xor  [rsi], al
  inc  rsi
  inc  rdi
  dec  rcx
  jnz  @Loop2

@Done:
  pop  rdi
  pop  rsi

But now I got an Access Violation in xor [rsi], rax


Solution

  • The function you are looking at is

    procedure XorMem(var Mem1; const Mem2; Count : Cardinal); register;
    

    from the ogutil unit.

    Personally I would not bother converting this to x64 assembler. There are quite a few tricky details that you need to get right in order to do so. It makes more sense to me to port to Pascal and let the compiler deal with the details. The simplest most naive translation looks like this:

    procedure XorMem(var Mem1; const Mem2; Count: Cardinal);
    var
      p1, p2: PByte;
    begin
      p1 := PByte(@Mem1);
      p2 := PByte(@Mem2);
      while Count>0 do
      begin
        p1^ := p1^ xor p2^;
        inc(p1);
        inc(p2);
        dec(Count);
      end;
    end;
    

    If this is performance critical then you'd want to unroll the loop a little to operate on large operands. Say 32 bit operands on x86 and 64 bit operands on x64.

    A version that operated on 32 bit operands might look like this:

    procedure XorMem(var Mem1; const Mem2; Count: Cardinal);
    var
      p1, p2: PByte;
    begin
      p1 := PByte(@Mem1);
      p2 := PByte(@Mem2);
      while Count>3 do
      begin
        PCardinal(p1)^ := PCardinal(p1)^ xor PCardinal(p2)^;
        inc(p1, 4);
        inc(p2, 4);
        dec(Count, 4);
      end;
      while Count>0 do
      begin
        p1^ := p1^ xor p2^;
        inc(p1);
        inc(p2);
        dec(Count);
      end;
    end;
    

    Actually, you can easily enough write a version that automatically uses 32 or 64 bit operands as determined by the compilation target. The trick is to use the NativeUInt type which is machine word size.

    procedure XorMem(var Mem1; const Mem2; Count: Cardinal);
    var
      p1, p2: PByte;
    begin
      p1 := PByte(@Mem1);
      p2 := PByte(@Mem2);
      while Count>SizeOf(NativeUInt)-1 do
      begin
        PNativeUInt(p1)^ := PNativeUInt(p1)^ xor PNativeUInt(p2)^;
        inc(p1, SizeOf(NativeUInt));
        inc(p2, SizeOf(NativeUInt));
        dec(Count, SizeOf(NativeUInt));
      end;
      while Count>0 do
      begin
        p1^ := p1^ xor p2^;
        inc(p1);
        inc(p2);
        dec(Count);
      end;
    end;
    

    This final version is pretty efficient when compiled with optimisations enabled. I would not look beyond that final Pascal version.