pythonctypes

How can access the pointer values passed to and returned by C functions from Python?


Can my python code have access to the actual pointer values received and returned by C functions called through ctypes?

If yes, how could I achieve that ?


I'd like to test the pointer values passed to and returned from a shared library function to test an assignment with pytest (here, to test that strdup didn't return the same pointer but a new pointer to a different address).

I've wrapped one of the functions to implement (strdup) in a new C function in a file named wrapped_strdup.c to display the pointer values and memory areas contents:

/*
** I'm compiling this into a .so the following way:
**   - gcc -o wrapped_strdup.o -c wrapped_strdup.c
**   - ar rc wrapped_strdup.a wrapped_strdup.o
**   - ranlib wrapped_strdup.a
**   - gcc -shared -o wrapped_strdup.so -Wl,--whole-archive wrapped_strdup.a -Wl,--no-whole-archive
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char *wrapped_strdup(char *src){
    char *dst;

    printf("From C:\n");
    printf("- src address: %X, src content: [%s].\n", src, src);
    dst = strdup(src);
    printf("- dst address: %X, dst content: [%s].\n", dst, dst);
    return dst;
}

I also create in the same directory a pytest test file named test_strdup.py:

#!/usr/bin/env python3

import ctypes
import pytest

# Setting wrapped_strdup:
lib_wrapped_strdup = ctypes.cdll.LoadLibrary("./wrapped_strdup.so")
wrapped_strdup = lib_wrapped_strdup.wrapped_strdup
wrapped_strdup.restype = ctypes.c_char_p
wrapped_strdup.argtypes = [ctypes.c_char_p]

@pytest.mark.parametrize("src", [b"", b"foo"])
def test_strdup(src: bytes):
    print("")
    dst = wrapped_strdup(src)

    print("From Python:")
    print(f"- src address: {hex(id(src))}, src content: [{src!r}].")
    print(f"- dst address: {hex(id(dst))}, dst content: [{dst!r}].")
    
    assert src == dst
    assert hex(id(src)) != hex(id(dst))

Then, running my test gives me the following output:

$ pytest test_strdup.py --maxfail=2 -v -s
=================================== test session starts ====================================
platform linux -- Python 3.12.5, pytest-8.3.2, pluggy-1.5.0 -- /usr/bin/python
cachedir: .pytest_cache
rootdir: /home/vmonteco/code/MREs/MRe_strdup_test_with_ctypes
plugins: anyio-4.4.0, cov-5.0.0, typeguard-4.3.0
collected 2 items                                                                          

test_strdup.py::test_strdup[] 
From C:
- src address: C19BDBE8, src content: [].
- dst address: 5977DFA0, dst content: [].
From Python:
- src address: 0x75bcc19bdbc8, src content: [b''].
- dst address: 0x75bcc19bdbc8, dst content: [b''].
FAILED
test_strdup.py::test_strdup[foo] 
From C:
- src address: BF00A990, src content: [foo].
- dst address: 59791030, dst content: [foo].
From Python:
- src address: 0x75bcbf00a970, src content: [b'foo'].
- dst address: 0x75bcbefc18f0, dst content: [b'foo'].
PASSED

========================================= FAILURES =========================================
______________________________________ test_strdup[] _______________________________________

src = b''

    @pytest.mark.parametrize("src", [b"", b"foo"])
    def test_strdup(src: bytes):
        print("")
        dst = wrapped_strdup(src)
    
        print("From Python:")
        print(f"- src address: {hex(id(src))}, src content: [{src!r}].")
        print(f"- dst address: {hex(id(dst))}, dst content: [{dst!r}].")
    
        assert src == dst
>       assert hex(id(src)) != hex(id(dst))
E       AssertionError: assert '0x75bcc19bdbc8' != '0x75bcc19bdbc8'
E        +  where '0x75bcc19bdbc8' = hex(129453562518472)
E        +    where 129453562518472 = id(b'')
E        +  and   '0x75bcc19bdbc8' = hex(129453562518472)
E        +    where 129453562518472 = id(b'')

test_strdup.py:22: AssertionError
================================= short test summary info ==================================
FAILED test_strdup.py::test_strdup[] - AssertionError: assert '0x75bcc19bdbc8' != '0x75bcc19bdbc8'
=============================== 1 failed, 1 passed in 0.04s ================================

This output shows two things :

So the above attempt is actually unreliable to check that a function returned a pointer to a different area.


I could also try to retrieve the pointer value itself and make a second test run for checking this part specifically by changing the restype attribute :

#!/usr/bin/env python3                                                                      

import ctypes
import pytest

# Setting wrapped_strdup:                                                                   
lib_wrapped_strdup = ctypes.cdll.LoadLibrary("./wrapped_strdup.so")
wrapped_strdup = lib_wrapped_strdup.wrapped_strdup
wrapped_strdup.restype = ctypes.c_void_p   # Note that it's not a c_char_p anymore.
wrapped_strdup.argtypes = [ctypes.c_char_p]

@pytest.mark.parametrize("src", [b"", b"foo"])
def test_strdup_for_pointers(src: bytes):
    print("")
    dst = wrapped_strdup(src)

    print("From Python:")
    print(f"- retrieved dst address: {hex(dst)}.")

The above gives the following output :

$ pytest test_strdup_for_pointers.py --maxfail=2 -v -s
=================================== test session starts ====================================
platform linux -- Python 3.12.5, pytest-8.3.2, pluggy-1.5.0 -- /usr/bin/python
cachedir: .pytest_cache
rootdir: /home/vmonteco/code/MREs/MRe_strdup_test_with_ctypes
plugins: anyio-4.4.0, cov-5.0.0, typeguard-4.3.0
collected 2 items                                                                          

test_strdup_for_pointers.py::test_strdup_for_pointers[] 
From C:
- src address: E15BDBE8, src content: [].
- dst address: 84D4D820, dst content: [].
From Python:
- retrieved dst address: 0x608984d4d820.
PASSED
test_strdup_for_pointers.py::test_strdup_for_pointers[foo] 
From C:
- src address: DEC7EA80, src content: [foo].
- dst address: 84EA7C40, dst content: [foo].
From Python:
- retrieved dst address: 0x608984ea7c40.
PASSED

==================================== 2 passed in 0.01s =====================================

Which would give the actual address (or at least something that looks related).

But without knowing the value the C function receives, it's not of much help.


Addendum: what I came up with from Mark's answer (and that works):

Here's a test that implements both the solution suggested in the accepted answer :

#!/usr/bin/env python3

import ctypes
import pytest

# Setting libc:
libc = ctypes.cdll.LoadLibrary("libc.so.6")
strlen = libc.strlen
strlen.restype = ctypes.c_size_t
strlen.argtypes = (ctypes.c_char_p,)

# Setting wrapped_strdup:
lib_wrapped_strdup = ctypes.cdll.LoadLibrary("./wrapped_strdup.so")
wrapped_strdup = lib_wrapped_strdup.wrapped_strdup
# Restype will be set directly in the tests.
wrapped_strdup.argtypes = (ctypes.c_char_p,)


@pytest.mark.parametrize("src", [b"", b"foo"])
def test_strdup(src: bytes):
    print("")  # Just to make pytest output more readable.

    # Set expected result type.
    wrapped_strdup.restype = ctypes.POINTER(ctypes.c_char)

    # Create the src buffer and retrieve its address.
    src_buffer = ctypes.create_string_buffer(src)
    src_addr = ctypes.addressof(src_buffer)
    src_content = src_buffer[:strlen(src_buffer)]

    # Run function to test.
    dst = wrapped_strdup(src_buffer)

    # Retrieve result address and content.
    dst_addr = ctypes.addressof(dst.contents)
    dst_content = dst[: strlen(dst)]

    # Assertions.
    assert src_content == dst_content
    assert src_addr != dst_addr

    # Output.
    print("From Python:")
    print(f"- Src content: {src_content!r}. Src address: {src_addr:X}.")
    print(f"- Dst content: {dst_content!r}. Dst address: {dst_addr:X}.")


@pytest.mark.parametrize("src", [b"", b"foo"])
def test_strdup_alternative(src: bytes):
    print("")  # Just to make pytest output more readable.

    # Set expected result type.
    wrapped_strdup.restype = ctypes.c_void_p

    # Create the src buffer and retrieve its address.
    src_buffer = ctypes.create_string_buffer(src)
    src_addr = ctypes.addressof(src_buffer)
    src_content = src_buffer[:strlen(src_buffer)]

    # Run function to test.
    dst = wrapped_strdup(src_buffer)

    # Retrieve result address and content.
    dst_addr = dst
    # cast dst:
    dst_pointer = ctypes.cast(dst, ctypes.POINTER(ctypes.c_char))
    dst_content = dst_pointer[:strlen(dst_pointer)]

    # Assertions.
    assert src_content == dst_content
    assert src_addr != dst_addr

    # Output.
    print("From Python:")
    print(f"- Src content: {src_content!r}. Src address: {src_addr:X}.")
    print(f"- Dst content: {dst_content!r}. Dst address: {dst_addr:X}.")

Output :

$ pytest test_strdup.py -v -s            
=============================== test session starts ===============================
platform linux -- Python 3.10.14, pytest-8.3.2, pluggy-1.5.0 -- /home/vmonteco/.pyenv/versions/3.10.14/envs/strduo_test/bin/python3.10
cachedir: .pytest_cache
rootdir: /home/vmonteco/code/MREs/MRe_strdup_test_with_ctypes
plugins: anyio-4.4.0, stub-1.1.0
collected 4 items                                                                 

test_strdup.py::test_strdup[] 
From C:
- src address: 661BBE90, src content: [].
- dst address: F5D8A7A0, dst content: [].
From Python:
- Src content: b''. Src address: 7C39661BBE90.
- Dst content: b''. Dst address: 57B4F5D8A7A0.
PASSED
test_strdup.py::test_strdup[foo] 
From C:
- src address: 661BBE90, src content: [foo].
- dst address: F5E03340, dst content: [foo].
From Python:
- Src content: b'foo'. Src address: 7C39661BBE90.
- Dst content: b'foo'. Dst address: 57B4F5E03340.
PASSED
test_strdup.py::test_strdup_alternative[] 
From C:
- src address: 661BBE90, src content: [].
- dst address: F5B0AC50, dst content: [].
From Python:
- Src content: b''. Src address: 7C39661BBE90.
- Dst content: b''. Dst address: 57B4F5B0AC50.
PASSED
test_strdup.py::test_strdup_alternative[foo] 
From C:
- src address: 661BBE90, src content: [foo].
- dst address: F5BF9C20, dst content: [foo].
From Python:
- Src content: b'foo'. Src address: 7C39661BBE90.
- Dst content: b'foo'. Dst address: 57B4F5BF9C20.
PASSED

================================ 4 passed in 0.01s ================================

Solution

  • A return type of ctypes.c_char_p is "helpful" and converts the return value to a Python string, losing the actual C pointer. Use ctypes.POINTER(ctypes.c_char) to keep the pointer.

    A return type of ctypes.c_void_p is also "helpful" and converts the returned C address to a Python integer, but can be cast a more specific pointer type to access the data at the address

    To find it's address, use ctypes.addressof on the contents of the pointer; otherwise you get the address of the storage of the pointer.

    I use char* strcpy(char* dest, const char* src) as an example because the returned pointer is the same address as the dest pointer and it shows the C addresses are the same from Python without needing a C helper function.

    In the code below the mutable string buffer dest has the same address as the return value and a few ways to examine the C address of the return value are shown:

    import ctypes as ct
    
    dll = ct.CDLL('msvcrt')
    
    dll.strcpy.argtypes = ct.c_char_p, ct.c_char_p
    dll.strcpy.restype = ct.POINTER(ct.c_char)  # NOT ct.c_char_p to keep pointer
    dll.strlen.argtypes = ct.c_char_p,
    dll.strlen.restype = ct.c_size_t
    
    dest = ct.create_string_buffer(10)  # writable char buffer
    print(f'{ct.addressof(dest) = :#x}')  # its C address
    result = dll.strcpy(dest, b'abcdefg')
    
    # Note that for strcpy, returned address is the same as dest address
    print(f'{ct.addressof(dest) = :#x}')  # dest array's C address
    print(f'{ct.addressof(result.contents) = :#x}')  # result pointer's C address (same as dest)
    n = dll.strlen(result)
    print(f'{result[:n] = }')  # must slice char* or only prints one character.
    print(f'{dest.value = }')  # array has .value (nul-termination) or .raw
    
    dll.strcpy.restype = ct.c_void_p  # alternative, get the pointer address as Python int
    result = dll.strcpy(dest, b'abcdefg')
    print(f'{result = :#x}')  # same C address as above
    p = ct.cast(result, ct.POINTER(ct.c_char))  # Cast afterward
    print(f'{ct.addressof(p.contents) = :#x}')  # same C address
    n = dll.strlen(p)
    print(f'{p[:n] = }')  # must slice char*
    
    p = ct.cast(result, ct.POINTER(ct.c_char * n))  # alternate, pointer to sized array
    print(f'{p.contents.value = }')  # don't have to slice, (char*)[n] has known size n
    

    Output:

    ct.addressof(dest) = 0x1abe80ea398
    ct.addressof(dest) = 0x1abe80ea398
    ct.addressof(result.contents) = 0x1abe80ea398
    result[:n] = b'abcdefg'
    dest.value = b'abcdefg'
    result = 0x1abe80ea398
    ct.addressof(p.contents) = 0x1abe80ea398
    p[:n] = b'abcdefg'
    p.contents.value = b'abcdefg'