While playing around with the code below I noticed that a network namespace can be kept alive without neither a process attached to it nor any "direct" reference by an open file descriptor to the nsfs inode.
How can I discover the network namespace an open (RT)NETLINK socket refers to, given only the NETLINK socket?
Running the following code with go run -exec sudo main.go
will first create a new network namespace in a separate OS-locked goroutine. The same thread will then open a NETLINK socket for RTNETLINK in this new network namespace. The file descriptor is passed back to the initial (and also locked) thread/goroutine and the separate goroutine/thread terminates. This will leave the process with a set of threads (tasks) that are attached to the original network namespace when starting the code. There is now no thread anymore directly attached to the new network namespace nor any direct namespace fd reference.
There finally is only the NETLINK/RTNETLINK socket left and only indirectly references the new network namespace. Yet, the network namespace is kept alive, as can be seen from the fact that on each query every 30s the code can still successfully list the network interfaces in the new network namespace.
package main
import (
"runtime"
"strings"
"syscall"
"time"
"github.com/vishvananda/netlink"
"golang.org/x/sys/unix"
)
// creator creates a new network namespace, enters it and passed back a
// reference to it, then waits to be signalled to exit the network namespace so
// it might collapse if everything goes to plan. (which plan???)
//
// This function must be called on its own goroutine.
func creator(done <-chan struct{}, handle chan<- *netlink.Handle) {
println("creator thread:", syscall.Gettid())
// Make sure to lock this goroutine to its current OS-level thread, as the
// unshare syscall will affect only the thread it is called from. Switching
// threads would be ... slightly bad. We don't unlock, so upon return and
// fallout off the edge of our disk world this OS thread will be killed and
// never reused.
runtime.LockOSThread()
println("creator: locked OS-level thread")
err := syscall.Unshare(syscall.CLONE_NEWNET)
if err != nil {
panic("cannot create and enter new network namespace: " + err.Error())
}
println("creator: in new network namespace")
nlHandle, err := netlink.NewHandle(unix.NETLINK_ROUTE)
if err != nil {
panic("cannot open RTNETLINK connection: " + err.Error())
}
println("creator: sending RTNETLINK handle")
handle <- nlHandle
println("creator: handle sent")
<-done // wait for channel to be closed.
println("creator: falling off")
// ...simply fall off the edge.
}
func main() {
runtime.LockOSThread()
println("main thread:", syscall.Gettid())
done := make(chan struct{})
handle := make(chan *netlink.Handle)
go creator(done, handle)
nlHandle := <-handle
println("received RTNETLINK handle")
println("telling creator to stop")
close(done)
for {
links, err := nlHandle.LinkList()
if err != nil {
panic("RTNETLINK failed: " + err.Error())
}
var names []string
for _, link := range links {
names = append(names, link.Attrs().Name)
}
println("nifs found:", strings.Join(names, ", "))
time.Sleep(30 * time.Second)
}
}
The clue to a less heavy-handed solution (than kprobes) is somewhat hidden inside user313992's answer to How to get the Linux network namespace for a tap/tun device referenced in /proc/[PID]/fdinfo/[FD]?: the (at this time new) ioctl SIOCGSKNS
. This can be applied to (not only) NETLINK sockets, where the socket fds can be duplicated across processes and then successfully queried using the SIOCGSKNS
ioctl.
The proof-of-concept code (that relies on some ugly unsafe reflection "foodoo", thanks to How to access unexported struct fields?):
package main
import (
"os"
"reflect"
"runtime"
"strconv"
"strings"
"syscall"
"time"
"unsafe"
"github.com/vishvananda/netlink"
"github.com/vishvananda/netlink/nl"
"golang.org/x/sys/unix"
)
// creator creates a new network namespace, enters it and passed back a
// reference to it, then waits to be signalled to exit the network namespace so
// it might collapse if everything goes to plan. (which plan???)
//
// This function must be called on its own goroutine.
func creator(done <-chan struct{}, handle chan<- *netlink.Handle) {
println("creator thread:", syscall.Gettid())
// Make sure to lock this goroutine to its current OS-level thread, as the
// unshare syscall will affect only the thread it is called from. Switching
// threads would be ... slightly bad. We don't unlock, so upon return and
// fallout off the edge of our disk world this OS thread will be killed and
// never reused.
runtime.LockOSThread()
println("creator: locked OS-level thread")
err := syscall.Unshare(syscall.CLONE_NEWNET)
if err != nil {
panic("cannot create and enter new network namespace: " + err.Error())
}
netnslnk, err := os.Stat("/proc/thread-self/ns/net")
if err != nil {
panic("cannot determine task's new network namespace: " + err.Error())
}
println("creator: in new network namespace net:[" +
strconv.FormatUint(netnslnk.Sys().(*syscall.Stat_t).Ino, 10) + "]")
nlHandle, err := netlink.NewHandle(unix.NETLINK_ROUTE)
if err != nil {
panic("cannot open RTNETLINK connection: " + err.Error())
}
println("creator: sending RTNETLINK handle")
handle <- nlHandle
println("creator: handle sent")
<-done // wait for channel to be closed.
println("creator: falling off")
// ...simply fall off the edge.
}
func main() {
runtime.LockOSThread()
println("main thread:", syscall.Gettid())
done := make(chan struct{})
handle := make(chan *netlink.Handle)
go creator(done, handle)
nlHandle := <-handle
println("received RTNETLINK handle")
netnslnk, err := os.Stat("/proc/thread-self/ns/net")
if err != nil {
panic("cannot determine main task's network namespace: " + err.Error())
}
println("main is still in network namespace net:[" +
strconv.FormatUint(netnslnk.Sys().(*syscall.Stat_t).Ino, 10) + "]")
rNlhandle := reflect.ValueOf(nlHandle).Elem().FieldByName("sockets")
rNlhandle = reflect.NewAt(rNlhandle.Type(), unsafe.Pointer(rNlhandle.UnsafeAddr())).Elem()
for _, sock := range rNlhandle.Interface().(map[int]*nl.SocketHandle) {
rSocket := reflect.ValueOf(sock.Socket).Elem().FieldByName("fd")
rSocket = reflect.NewAt(rSocket.Type(), unsafe.Pointer(rSocket.UnsafeAddr())).Elem()
fd := rSocket.Interface().(int32)
netnsfd, err := unix.IoctlRetInt(int(fd), unix.SIOCGSKNS)
if err != nil {
panic("cannot query netns fd of RTNETLINK fd: " + err.Error())
}
var stat unix.Stat_t
if err := unix.Fstat(netnsfd, &stat); err != nil {
panic("cannot stat netns fd: " + err.Error())
}
println("received netlink socket is connected to net:[" +
strconv.FormatUint(stat.Ino, 10) + "]")
}
println("telling creator to stop")
close(done)
for {
links, err := nlHandle.LinkList()
if err != nil {
panic("RTNETLINK failed: " + err.Error())
}
var names []string
for _, link := range links {
names = append(names, link.Attrs().Name)
}
println("nifs found:", strings.Join(names, ", "))
time.Sleep(30 * time.Second)
}
}
The output should be similar to:
go run -exec sudo .
main thread: 60063
creator thread: 60067
creator: locked OS-level thread
creator: in new network namespace net:[4026532257]
creator: sending RTNETLINK handle
creator: handle sent
received RTNETLINK handle
main is still in network namespace net:[4026531840]
received netlink socket is connected to net:[4026532257]
telling creator to stop
creator: falling off
nifs found: lo