Can my python code have access to the actual pointer values received and returned by C functions called through ctypes
?
If yes, how could I achieve that ?
I'd like to test the pointer values passed to and returned from a shared library function to test an assignment with pytest (here, to test that strdup
didn't return the same pointer but a new pointer to a different address).
I've wrapped one of the functions to implement (strdup
) in a new C function in a file named wrapped_strdup.c
to display the pointer values and memory areas contents:
/*
** I'm compiling this into a .so the following way:
** - gcc -o wrapped_strdup.o -c wrapped_strdup.c
** - ar rc wrapped_strdup.a wrapped_strdup.o
** - ranlib wrapped_strdup.a
** - gcc -shared -o wrapped_strdup.so -Wl,--whole-archive wrapped_strdup.a -Wl,--no-whole-archive
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *wrapped_strdup(char *src){
char *dst;
printf("From C:\n");
printf("- src address: %X, src content: [%s].\n", src, src);
dst = strdup(src);
printf("- dst address: %X, dst content: [%s].\n", dst, dst);
return dst;
}
I also create in the same directory a pytest test file named test_strdup.py
:
#!/usr/bin/env python3
import ctypes
import pytest
# Setting wrapped_strdup:
lib_wrapped_strdup = ctypes.cdll.LoadLibrary("./wrapped_strdup.so")
wrapped_strdup = lib_wrapped_strdup.wrapped_strdup
wrapped_strdup.restype = ctypes.c_char_p
wrapped_strdup.argtypes = [ctypes.c_char_p]
@pytest.mark.parametrize("src", [b"", b"foo"])
def test_strdup(src: bytes):
print("")
dst = wrapped_strdup(src)
print("From Python:")
print(f"- src address: {hex(id(src))}, src content: [{src!r}].")
print(f"- dst address: {hex(id(dst))}, dst content: [{dst!r}].")
assert src == dst
assert hex(id(src)) != hex(id(dst))
Then, running my test gives me the following output:
$ pytest test_strdup.py --maxfail=2 -v -s
=================================== test session starts ====================================
platform linux -- Python 3.12.5, pytest-8.3.2, pluggy-1.5.0 -- /usr/bin/python
cachedir: .pytest_cache
rootdir: /home/vmonteco/code/MREs/MRe_strdup_test_with_ctypes
plugins: anyio-4.4.0, cov-5.0.0, typeguard-4.3.0
collected 2 items
test_strdup.py::test_strdup[]
From C:
- src address: C19BDBE8, src content: [].
- dst address: 5977DFA0, dst content: [].
From Python:
- src address: 0x75bcc19bdbc8, src content: [b''].
- dst address: 0x75bcc19bdbc8, dst content: [b''].
FAILED
test_strdup.py::test_strdup[foo]
From C:
- src address: BF00A990, src content: [foo].
- dst address: 59791030, dst content: [foo].
From Python:
- src address: 0x75bcbf00a970, src content: [b'foo'].
- dst address: 0x75bcbefc18f0, dst content: [b'foo'].
PASSED
========================================= FAILURES =========================================
______________________________________ test_strdup[] _______________________________________
src = b''
@pytest.mark.parametrize("src", [b"", b"foo"])
def test_strdup(src: bytes):
print("")
dst = wrapped_strdup(src)
print("From Python:")
print(f"- src address: {hex(id(src))}, src content: [{src!r}].")
print(f"- dst address: {hex(id(dst))}, dst content: [{dst!r}].")
assert src == dst
> assert hex(id(src)) != hex(id(dst))
E AssertionError: assert '0x75bcc19bdbc8' != '0x75bcc19bdbc8'
E + where '0x75bcc19bdbc8' = hex(129453562518472)
E + where 129453562518472 = id(b'')
E + and '0x75bcc19bdbc8' = hex(129453562518472)
E + where 129453562518472 = id(b'')
test_strdup.py:22: AssertionError
================================= short test summary info ==================================
FAILED test_strdup.py::test_strdup[] - AssertionError: assert '0x75bcc19bdbc8' != '0x75bcc19bdbc8'
=============================== 1 failed, 1 passed in 0.04s ================================
This output shows two things :
b''
in Python are identical either way (that's the same object) despite addresses being different from the lower level perspective. This is consistent with some pure Python tests and I guess it could be some optimization feature.dst
and src
variables don't actually seem related.So the above attempt is actually unreliable to check that a function returned a pointer to a different area.
I could also try to retrieve the pointer value itself and make a second test run for checking this part specifically by changing the restype
attribute :
#!/usr/bin/env python3
import ctypes
import pytest
# Setting wrapped_strdup:
lib_wrapped_strdup = ctypes.cdll.LoadLibrary("./wrapped_strdup.so")
wrapped_strdup = lib_wrapped_strdup.wrapped_strdup
wrapped_strdup.restype = ctypes.c_void_p # Note that it's not a c_char_p anymore.
wrapped_strdup.argtypes = [ctypes.c_char_p]
@pytest.mark.parametrize("src", [b"", b"foo"])
def test_strdup_for_pointers(src: bytes):
print("")
dst = wrapped_strdup(src)
print("From Python:")
print(f"- retrieved dst address: {hex(dst)}.")
The above gives the following output :
$ pytest test_strdup_for_pointers.py --maxfail=2 -v -s
=================================== test session starts ====================================
platform linux -- Python 3.12.5, pytest-8.3.2, pluggy-1.5.0 -- /usr/bin/python
cachedir: .pytest_cache
rootdir: /home/vmonteco/code/MREs/MRe_strdup_test_with_ctypes
plugins: anyio-4.4.0, cov-5.0.0, typeguard-4.3.0
collected 2 items
test_strdup_for_pointers.py::test_strdup_for_pointers[]
From C:
- src address: E15BDBE8, src content: [].
- dst address: 84D4D820, dst content: [].
From Python:
- retrieved dst address: 0x608984d4d820.
PASSED
test_strdup_for_pointers.py::test_strdup_for_pointers[foo]
From C:
- src address: DEC7EA80, src content: [foo].
- dst address: 84EA7C40, dst content: [foo].
From Python:
- retrieved dst address: 0x608984ea7c40.
PASSED
==================================== 2 passed in 0.01s =====================================
Which would give the actual address (or at least something that looks related).
But without knowing the value the C function receives, it's not of much help.
Here's a test that implements both the solution suggested in the accepted answer :
#!/usr/bin/env python3
import ctypes
import pytest
# Setting libc:
libc = ctypes.cdll.LoadLibrary("libc.so.6")
strlen = libc.strlen
strlen.restype = ctypes.c_size_t
strlen.argtypes = (ctypes.c_char_p,)
# Setting wrapped_strdup:
lib_wrapped_strdup = ctypes.cdll.LoadLibrary("./wrapped_strdup.so")
wrapped_strdup = lib_wrapped_strdup.wrapped_strdup
# Restype will be set directly in the tests.
wrapped_strdup.argtypes = (ctypes.c_char_p,)
@pytest.mark.parametrize("src", [b"", b"foo"])
def test_strdup(src: bytes):
print("") # Just to make pytest output more readable.
# Set expected result type.
wrapped_strdup.restype = ctypes.POINTER(ctypes.c_char)
# Create the src buffer and retrieve its address.
src_buffer = ctypes.create_string_buffer(src)
src_addr = ctypes.addressof(src_buffer)
src_content = src_buffer[:strlen(src_buffer)]
# Run function to test.
dst = wrapped_strdup(src_buffer)
# Retrieve result address and content.
dst_addr = ctypes.addressof(dst.contents)
dst_content = dst[: strlen(dst)]
# Assertions.
assert src_content == dst_content
assert src_addr != dst_addr
# Output.
print("From Python:")
print(f"- Src content: {src_content!r}. Src address: {src_addr:X}.")
print(f"- Dst content: {dst_content!r}. Dst address: {dst_addr:X}.")
@pytest.mark.parametrize("src", [b"", b"foo"])
def test_strdup_alternative(src: bytes):
print("") # Just to make pytest output more readable.
# Set expected result type.
wrapped_strdup.restype = ctypes.c_void_p
# Create the src buffer and retrieve its address.
src_buffer = ctypes.create_string_buffer(src)
src_addr = ctypes.addressof(src_buffer)
src_content = src_buffer[:strlen(src_buffer)]
# Run function to test.
dst = wrapped_strdup(src_buffer)
# Retrieve result address and content.
dst_addr = dst
# cast dst:
dst_pointer = ctypes.cast(dst, ctypes.POINTER(ctypes.c_char))
dst_content = dst_pointer[:strlen(dst_pointer)]
# Assertions.
assert src_content == dst_content
assert src_addr != dst_addr
# Output.
print("From Python:")
print(f"- Src content: {src_content!r}. Src address: {src_addr:X}.")
print(f"- Dst content: {dst_content!r}. Dst address: {dst_addr:X}.")
Output :
$ pytest test_strdup.py -v -s
=============================== test session starts ===============================
platform linux -- Python 3.10.14, pytest-8.3.2, pluggy-1.5.0 -- /home/vmonteco/.pyenv/versions/3.10.14/envs/strduo_test/bin/python3.10
cachedir: .pytest_cache
rootdir: /home/vmonteco/code/MREs/MRe_strdup_test_with_ctypes
plugins: anyio-4.4.0, stub-1.1.0
collected 4 items
test_strdup.py::test_strdup[]
From C:
- src address: 661BBE90, src content: [].
- dst address: F5D8A7A0, dst content: [].
From Python:
- Src content: b''. Src address: 7C39661BBE90.
- Dst content: b''. Dst address: 57B4F5D8A7A0.
PASSED
test_strdup.py::test_strdup[foo]
From C:
- src address: 661BBE90, src content: [foo].
- dst address: F5E03340, dst content: [foo].
From Python:
- Src content: b'foo'. Src address: 7C39661BBE90.
- Dst content: b'foo'. Dst address: 57B4F5E03340.
PASSED
test_strdup.py::test_strdup_alternative[]
From C:
- src address: 661BBE90, src content: [].
- dst address: F5B0AC50, dst content: [].
From Python:
- Src content: b''. Src address: 7C39661BBE90.
- Dst content: b''. Dst address: 57B4F5B0AC50.
PASSED
test_strdup.py::test_strdup_alternative[foo]
From C:
- src address: 661BBE90, src content: [foo].
- dst address: F5BF9C20, dst content: [foo].
From Python:
- Src content: b'foo'. Src address: 7C39661BBE90.
- Dst content: b'foo'. Dst address: 57B4F5BF9C20.
PASSED
================================ 4 passed in 0.01s ================================
A return type of ctypes.c_char_p
is "helpful" and converts the return value to a Python string, losing the actual C pointer. Use ctypes.POINTER(ctypes.c_char)
to keep the pointer.
A return type of ctypes.c_void_p
is also "helpful" and converts the returned C address to a Python integer, but can be cast a more specific pointer type to access the data at the address
To find it's address, use ctypes.addressof
on the contents of the pointer; otherwise you get the address of the storage of the pointer.
I use char* strcpy(char* dest, const char* src)
as an example because the returned pointer is the same address as the dest
pointer and it shows the C addresses are the same from Python without needing a C helper function.
In the code below the mutable string buffer dest
has the same address as the return value and a few ways to examine the C address of the return value are shown:
import ctypes as ct
dll = ct.CDLL('msvcrt')
dll.strcpy.argtypes = ct.c_char_p, ct.c_char_p
dll.strcpy.restype = ct.POINTER(ct.c_char) # NOT ct.c_char_p to keep pointer
dll.strlen.argtypes = ct.c_char_p,
dll.strlen.restype = ct.c_size_t
dest = ct.create_string_buffer(10) # writable char buffer
print(f'{ct.addressof(dest) = :#x}') # its C address
result = dll.strcpy(dest, b'abcdefg')
# Note that for strcpy, returned address is the same as dest address
print(f'{ct.addressof(dest) = :#x}') # dest array's C address
print(f'{ct.addressof(result.contents) = :#x}') # result pointer's C address (same as dest)
n = dll.strlen(result)
print(f'{result[:n] = }') # must slice char* or only prints one character.
print(f'{dest.value = }') # array has .value (nul-termination) or .raw
dll.strcpy.restype = ct.c_void_p # alternative, get the pointer address as Python int
result = dll.strcpy(dest, b'abcdefg')
print(f'{result = :#x}') # same C address as above
p = ct.cast(result, ct.POINTER(ct.c_char)) # Cast afterward
print(f'{ct.addressof(p.contents) = :#x}') # same C address
n = dll.strlen(p)
print(f'{p[:n] = }') # must slice char*
p = ct.cast(result, ct.POINTER(ct.c_char * n)) # alternate, pointer to sized array
print(f'{p.contents.value = }') # don't have to slice, (char*)[n] has known size n
Output:
ct.addressof(dest) = 0x1abe80ea398
ct.addressof(dest) = 0x1abe80ea398
ct.addressof(result.contents) = 0x1abe80ea398
result[:n] = b'abcdefg'
dest.value = b'abcdefg'
result = 0x1abe80ea398
ct.addressof(p.contents) = 0x1abe80ea398
p[:n] = b'abcdefg'
p.contents.value = b'abcdefg'