I'am using this component http://sourceforge.net/projects/tponguard/ and now I need to compile in 64bit. I'm stuck in this assembly.
It was like this:
push esi
push edi
mov esi, eax //esi = Mem1
mov edi, edx //edi = Mem2
push ecx //save byte count
shr ecx, 2 //convert to dwords
jz @Continue
cld
@Loop1: //xor dwords at a time
mov eax, [edi]
xor [esi], eax
add esi, 4
add edi, 4
dec ecx
jnz @Loop1
@Continue: //handle remaining bytes (3 or less)
pop ecx
and ecx, 3
jz @Done
@Loop2: //xor remaining bytes
mov al, [edi]
xor [esi], al
inc esi
inc edi
dec ecx
jnz @Loop2
@Done:
pop edi
pop esi
And I changed to this:
push rsi
push rdi
mov rsi, rax //esi = Mem1
mov rdi, rdx //edi = Mem2
push rcx //save byte count
shr rcx, 2 //convert to dwords
jz @Continue
cld
@Loop1: //xor dwords at a time
mov rax, [rdi]
xor [rsi], rax
add rsi, 4
add rdi, 4
dec rcx
jnz @Loop1
@Continue: //handle remaining bytes (3 or less)
pop rcx
and rcx, 3
jz @Done
@Loop2: //xor remaining bytes
mov al, [rdi]
xor [rsi], al
inc rsi
inc rdi
dec rcx
jnz @Loop2
@Done:
pop rdi
pop rsi
But now I got an Access Violation in xor [rsi], rax
The function you are looking at is
procedure XorMem(var Mem1; const Mem2; Count : Cardinal); register;
from the ogutil
unit.
Personally I would not bother converting this to x64 assembler. There are quite a few tricky details that you need to get right in order to do so. It makes more sense to me to port to Pascal and let the compiler deal with the details. The simplest most naive translation looks like this:
procedure XorMem(var Mem1; const Mem2; Count: Cardinal);
var
p1, p2: PByte;
begin
p1 := PByte(@Mem1);
p2 := PByte(@Mem2);
while Count>0 do
begin
p1^ := p1^ xor p2^;
inc(p1);
inc(p2);
dec(Count);
end;
end;
If this is performance critical then you'd want to unroll the loop a little to operate on large operands. Say 32 bit operands on x86 and 64 bit operands on x64.
A version that operated on 32 bit operands might look like this:
procedure XorMem(var Mem1; const Mem2; Count: Cardinal);
var
p1, p2: PByte;
begin
p1 := PByte(@Mem1);
p2 := PByte(@Mem2);
while Count>3 do
begin
PCardinal(p1)^ := PCardinal(p1)^ xor PCardinal(p2)^;
inc(p1, 4);
inc(p2, 4);
dec(Count, 4);
end;
while Count>0 do
begin
p1^ := p1^ xor p2^;
inc(p1);
inc(p2);
dec(Count);
end;
end;
Actually, you can easily enough write a version that automatically uses 32 or 64 bit operands as determined by the compilation target. The trick is to use the NativeUInt
type which is machine word size.
procedure XorMem(var Mem1; const Mem2; Count: Cardinal);
var
p1, p2: PByte;
begin
p1 := PByte(@Mem1);
p2 := PByte(@Mem2);
while Count>SizeOf(NativeUInt)-1 do
begin
PNativeUInt(p1)^ := PNativeUInt(p1)^ xor PNativeUInt(p2)^;
inc(p1, SizeOf(NativeUInt));
inc(p2, SizeOf(NativeUInt));
dec(Count, SizeOf(NativeUInt));
end;
while Count>0 do
begin
p1^ := p1^ xor p2^;
inc(p1);
inc(p2);
dec(Count);
end;
end;
This final version is pretty efficient when compiled with optimisations enabled. I would not look beyond that final Pascal version.