I'm trying to do basic hooking by locating sys_call_table
and modify an entry for sys_read
syscall to a function in my own kernel module. I have tried kprobes I'm just interested to do it with sys_call_table
.
Below is my code:
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/kprobes.h>
#include <linux/syscalls.h>
#include <linux/version.h>
typedef asmlinkage long (*t_syscall)(const struct pt_regs *);
unsigned long cr0;
unsigned long **__sys_call_table;
typedef unsigned long (*kallsyms_lookup_name_t)(const char *name);
typedef asmlinkage int (*orig_getdents64_t)(unsigned int,
struct linux_dirent64 *, unsigned int);
asmlinkage long (*original_syscall)(const struct pt_regs *);
static struct kprobe kp = {
.symbol_name = "kallsyms_lookup_name"
};
static kallsyms_lookup_name_t kallsyms_lookup_name_ptr;
static struct kprobe kp2 = {
.symbol_name = "__x64_sys_read"
};
unsigned long *get_syscall_address(unsigned long *sys_call_table, int syscall_number);
asmlinkage long hooked_syscall(const struct pt_regs *regs);
#if LINUX_VERSION_CODE > KERNEL_VERSION(4, 16, 0)
static inline void
write_cr0_forced(unsigned long val)
{
unsigned long __force_order;
asm volatile(
"mov %0, %%cr0"
: "+r"(val), "+m"(__force_order));
}
#endif
static inline void
unprotect_memory(void)
{
#if IS_ENABLED(CONFIG_X86) || IS_ENABLED(CONFIG_X86_64)
#if LINUX_VERSION_CODE > KERNEL_VERSION(4, 16, 0)
write_cr0_forced(cr0 & ~0x00010000);
#else
write_cr0(cr0 & ~0x00010000);
#endif
#elif IS_ENABLED(CONFIG_ARM64)
update_mapping_prot(__pa_symbol(start_rodata), (unsigned long)start_rodata,
section_size, PAGE_KERNEL);
#endif
}
static inline void
protect_memory(void)
{
#if IS_ENABLED(CONFIG_X86) || IS_ENABLED(CONFIG_X86_64)
#if LINUX_VERSION_CODE > KERNEL_VERSION(4, 16, 0)
write_cr0_forced(cr0);
#else
write_cr0(cr0);
#endif
#elif IS_ENABLED(CONFIG_ARM64)
update_mapping_prot(__pa_symbol(start_rodata), (unsigned long)start_rodata,
section_size, PAGE_KERNEL_RO);
#endif
}
asmlinkage long hooked_syscall(const struct pt_regs *regs) {
printk(KERN_INFO "Syscall hooked!\n");
return original_syscall(regs);
}
static unsigned long **find_sys_call_table(void) {
unsigned long **sct;
sct = (unsigned long **)kallsyms_lookup_name_ptr("sys_call_table");
return sct;
}
static int __init kprobe_init(void)
{
int ret;
cr0 = read_cr0();
ret = register_kprobe(&kp);
if (ret < 0)
return ret;
kallsyms_lookup_name_ptr = (kallsyms_lookup_name_t)kp.addr;
__sys_call_table = find_sys_call_table();
if (!__sys_call_table) {
printk(KERN_ERR "Couldn't find sys_call_table.\n");
return -1;
}
printk("__sys_call_table address : %px\n", __sys_call_table);
unprotect_memory();
original_syscall = (void *)__sys_call_table[__NR_read];
printk("__NR_READ : %px\n", original_syscall);
printk("HOOKED FUNCTION : %px\n", (unsigned long *)hooked_syscall);
__sys_call_table[__NR_read] = (unsigned long *)hooked_syscall;
/// Double check
original_syscall = (void *)__sys_call_table[__NR_read];
printk("__NR_READ : %px\n", original_syscall);
protect_memory();
// Extra check
int ret2 = register_kprobe(&kp2);
if (ret2 < 0)
return ret2;
printk("%px\n", kp2.addr);
unregister_kprobe(&kp);
unregister_kprobe(&kp2);
return 0;
}
static void __exit kprobe_exit(void)
{
}
module_init(kprobe_init)
module_exit(kprobe_exit)
MODULE_LICENSE("GPL");
and the Makefile,
# Name of the kernel module
obj-m += sct.o
# List of source files for the module
hello_world-objs := sct.c
# Path to the kernel source tree
KDIR := /lib/modules/$(shell uname -r)/build
all:
make -C $(KDIR) M=$(PWD) modules
clean:
make -C $(KDIR) M=$(PWD) clean
I get the address to kallsyms_lookup_name()
by installing a kprobe and after registering it, get the .addr
field. Once I got the address to sys_call_table
I can read the the
address of sys_read
syscall. I checked the read address by grepping /proc/kallsyms
and it seems I got the right address.
Then I change the __NR_read
entry to a function in my lkm. I have some debug prints afterward and I can confirm that the sys_call_table
entry has changed.
printk("__sys_call_table address : %px\n", __sys_call_table);
unprotect_memory();
original_syscall = (void *)__sys_call_table[__NR_read];
printk("__NR_READ : %px\n", original_syscall);
printk("HOOKED FUNCTION : %px\n", (unsigned long *)hooked_syscall);
__sys_call_table[__NR_read] = (unsigned long *)hooked_syscall;
/// Double check
original_syscall = (void *)__sys_call_table[__NR_read];
printk("__NR_READ : %px\n", original_syscall);
Unfortunately, after modifying sys_call_table
entry I don't get any printk showing in dmesg, or any crash or anything!
To do extra checking, I installed a kprobe on sys_read
and got the addr
but even after modifying sys_call_table
the kprobe still shows the original address of sys_read
.
I'm on Ubuntu 24.04, 6.8.0-35-generic
. I also tried Ubuntu 22.04 but I got the same result! Both with stock kernel with default configuration. Tried in VMware VM as well as physical hardware.
I searched a bit to see if any security mechanism might cause issues with this, but couldn't find anything :(
It's pretty confusing for me why my modification to sys_call_table
doesn't seem to take effect.
Can you please tell me what do I miss here? Is hooking sys_call_table
a thing yet?
I'm new and learning different Linux kernel features, I need to know if sys_call_table
modification for hooking a syscall is still a thing or not?
I tried to include enough information to help reproducing the same result.
Surprise, surprise! You cannot do this anymore since Linux v6.9. Commit 1e3ad78334a69b36e107232e337f9d693dcc9df2 introduced a security mitigation against speculative execution on x86 that completely removed the use of syscall tables, which has been backported to v6.8.5+, v6.6.26+, v6.1.85+, v5.15.154+.
Ubuntu 24.04 uses the v6.8 stable branch, and Ubuntu 22.04 uses the v6.1 stable branch, so the patch is present there too. The same goes for Debian and Debian-based distros like Kali. Most major Linux distributions also incorporated this change as they simply follow the stable kernel branch.
The sys_call_table
symbol still exists and still contains valid function pointers, but it is only used for tracing purposes (CONFIG_FTRACE_SYSCALLS=y
). The actual syscall dispatch code is now implemented as a huge inlined switch
case (source):
#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
{
switch (nr) {
#include <asm/syscalls_64.h>
default: return __x64_sys_ni_syscall(regs);
}
};
I see you already mention you tried kprobes (the real solution) so I assume you know how to use those. I'm just going to leave this here for whoever comes across this post and might find it useful. Using kprobes is significantly easier than doing things manually and in a "dirty" way by editing sys_call_table
.
In order to find the appropriate symbol to hook you can take a look at the kernel symbols directly with readelf -s
and grep for the syscall name you are interested in. Usually, they are prefixed with an arch-specific prefix. In case of x86 it's __x64_sys_
for 64-bit syscalls.
I also maintain syscalls.mebeim.net where you can find a list of syscall symbol names for various architectures and kernel versions, which you may find useful.
Here's an example of how this could be done:
#include <linux/kprobes.h>
#include <linux/ptrace.h>
// ...
static int sys_read_kprobe_pre_handler(struct kprobe *p, struct pt_regs *regs)
{
// Do something here...
return 0;
}
struct kprobe syscall_kprobe = {
.symbol_name = "__x64_sys_read",
.pre_handler = sys_read_kprobe_pre_handler,
};
static int __init my_module_init(void)
{
int err;
err = register_kprobe(&syscall_kprobe);
if (err) {
pr_err("register_kprobe() failed: %d\n", err);
return err;
}
return 0;
}
static void __exit my_module_exit(void)
{
unregister_kprobe(&syscall_kprobe);
}
Note: I did not test the above code so don't expect it to run perfectly as is, but you can use it as a starting point.
Important: remember that the .pre_handler
kprobe will get the kernel registers in the struct pt_regs
that is passed as second argument, not the userspace registers. You will have to get the struct pt_regs
holding userspace registers from the register holding the first function argument (this will be different depending on architecture, on x86 it's regs->di
for RDI). There are also special cases where the syscall is not defined with a SYSCALL_DEFINEn
macro and the userspace arguments will not be passed as a struct pt_regs
. You should check kernel sources of whichever syscall you are trying to hook.