clinuxmultithreadingsystem-calls

Creating a new thread with clone3 causes segfault


I'm experimenting with the Linux clone3() syscall (introduced in Linux 5.3) to create threads without using the standard library. While there are many examples of using the older clone() syscall, I haven't found clear examples of using clone3() for thread creation. What I'm trying to do:

Create a thread using the clone3() syscall directly Implement this without libc (-nostdlib) Have the thread execute a simple function that prints a message.

Current behavior: The program crashes with SIGSEGV after printing "Created". The strace output shows:

execve("./clone3", ["./clone3"], 0x7ffc16f11ca0 /* 43 vars */) = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_GROWSDOWN|MAP_STACK, -1, 0) = 0x7156c433d000
clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND, exit_signal=0, stack=0x7156c433d000, stack_size=0x2000}, 88) = 9973
write(1, "C", 1C)                        = 1
write(1, "r", 1r)                        = 1
write(1, "e", 1e)                        = 1
write(1, "a", 1a)                        = 1
write(1, "t", 1t)                        = 1
write(1, "e", 1e)                        = 1
write(1, "d", 1d)                        = 1
--- SIGSEGV {si_signo=SIGSEGV, si_code=SEGV_MAPERR, si_addr=0x20040114d} ---
+++ killed by SIGSEGV (core dumped) +++
fish: Job 1, 'strace ./clone3' terminated by signal SIGSEGV (Address boundary error)

Code:

#define _GNU_SOURCE

#include <linux/mman.h>
#include <linux/sched.h>
#include <linux/time.h>
#include <stddef.h>
#include <sys/syscall.h>

typedef int (*thread_fn)(void *arg);

static inline long syscall6(long n, long a1, long a2, long a3, long a4, long a5,
                            long a6) {
  register long rax __asm__("rax") = n;
  register long rdi __asm__("rdi") = a1;
  register long rsi __asm__("rsi") = a2;
  register long rdx __asm__("rdx") = a3;
  register long r10 __asm__("r10") = a4;
  register long r8 __asm__("r8") = a5;
  register long r9 __asm__("r9") = a6;

  __asm__ volatile("syscall"
                   : "+r"(rax)
                   : "r"(rdi), "r"(rsi), "r"(rdx), "r"(r10), "r"(r8), "r"(r9)
                   : "rcx", "r11", "memory");
  return rax;
}
static inline long mmap(void *addr, size_t length, int prot, int flags, int fd,
                        long offset) {
  return syscall6(SYS_mmap, (long)addr, (long)length, (long)prot, (long)flags,
                  (long)fd, offset);
}
static inline void exit(int code) {
  syscall6(SYS_exit_group, code, 0, 0, 0, 0, 0);
}
static inline void sleep_seconds(unsigned int seconds) {
  struct timespec ts = {.tv_sec = seconds, .tv_nsec = 0};
  syscall6(SYS_nanosleep, (long)&ts, 0, 0, 0, 0, 0);
}
static inline void write_str(const char *str) {
  while (*str) {
    syscall6(SYS_write, 1, (long)str, 1, 0, 0, 0);
    str++;
  }
}
static inline long clone3(struct clone_args *args) {
  register long rax __asm__("rax") = SYS_clone3;
  register long rdi __asm__("rdi") = (long)args;
  register long rsi __asm__("rsi") = (long)sizeof(struct clone_args);
  __asm__ volatile("syscall"
                   : "+r"(rax)
                   : "r"(rdi), "r"(rsi)
                   : "r11", "memory");

  if (rax < 0) {
    write_str("clone3 was unsuccessful!");
    exit(1);
  }

  return rax;
}

int thread_func(void *arg) {
  write_str("Hello from thread!\n");
  sleep_seconds(1);
  return 0;
}

__attribute__((noreturn)) void _start() {
  const unsigned long STACK_SIZE = 8192;
  void *stack = (void *)mmap(
      0, STACK_SIZE, PROT_READ | PROT_WRITE,
      MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK | MAP_GROWSDOWN, -1, 0);

  void *stack_top = stack + STACK_SIZE;
  unsigned long *sp = (unsigned long *)stack_top;
  *(--sp) = 0;
  *(--sp) = (unsigned long)thread_func;

  struct clone_args args = {0};
  args.flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND;
  args.stack = (unsigned long)stack;
  args.stack_size = STACK_SIZE;

  long tid = clone3(&args);
  if (tid > 0) {
    write_str("Created thread!\n");
    sleep_seconds(2);
  }

  exit(0);
  __builtin_unreachable();
}

Compiled with: gcc clone3.c -static -nostdlib -fno-stack-protector -o clone3

Linux Kernel: 6.11.6

I've reviewed the kernel documentation and man pages but couldn't find specific examples of using clone3() for thread creation. Any help in understanding what I'm missing would be appreciated.


Solution

  • I found work project from a Chris Wellons. Read his article:https://nullprogram.com/blog/2023/03/23/ and show his a Source. I compiler his source and it is a work ( gcc src.c -static -nostdlib -fno-stack-protector -o run). my kernel Linux viva 6.11.6-arch1-1

    Edit 1: I can create clone3 :)) . Show my code on github https://github.com/oditynet/syscall You have problem in func sleep_seconds.

    strace output:

     odity@viva  ~/bin  ./syscall                                                      
    [+] Thread create
    3412
    Created thread!
    ^C
     ✘ odity@viva  ~/bin  strace ./syscall
    execve("./syscall", ["./syscall"], 0x7ffd15a02950 /* 46 vars */) = 0
    write(1, "[", 1[)                        = 1
    write(1, "+", 1+)                        = 1
    write(1, "]", 1])                        = 1
    write(1, " ", 1 )                        = 1
    write(1, "T", 1T)                        = 1
    write(1, "h", 1h)                        = 1
    write(1, "r", 1r)                        = 1
    write(1, "e", 1e)                        = 1
    write(1, "a", 1a)                        = 1
    write(1, "d", 1d)                        = 1
    write(1, " ", 1 )                        = 1
    write(1, "c", 1c)                        = 1
    write(1, "r", 1r)                        = 1
    write(1, "e", 1e)                        = 1
    write(1, "a", 1a)                        = 1
    write(1, "t", 1t)                        = 1
    write(1, "e", 1e)                        = 1
    write(1, "\n", 1
    )                       = 1
    mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_GROWSDOWN|MAP_STACK, -1, 0) = 0x7ce303ee6000
    clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND, exit_signal=0, stack=0x7ce303ee6000, stack_size=0x2000}, 880) = 3494
    
    Eexit_group(0)                           = ?
    +++ exited with 0 +++