Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 93 additions & 68 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,27 +239,105 @@ func (c *Container) exec() error {
return c.postStart()
}

// handleFifo listens for either a byte written to the FIFO file or
// the init process exiting. On kernels supporting pidfd_open(2)
// (>= 5.3), it uses a single poll(2) for efficiency. On older kernels,
// it falls back to a polling loop that periodically checks the init
// process's liveness. This function is blocking.
func handleFifo(path string, pid int) error {
blockingFifoOpenCh := awaitFifoOpen(path)
f, err := os.OpenFile(path, os.O_RDONLY|unix.O_NONBLOCK, 0)
if err != nil {
return fmt.Errorf("exec fifo: %w", err)
}
defer f.Close()

if err := waitForFifoReady(f, pid); err != nil {
return err
}

if err := readFromExecFifo(f); err != nil {
return err
}

if err := os.Remove(f.Name()); err != nil && !errors.Is(err, os.ErrNotExist) {
return err
}
return nil
}

// waitForFifoReady blocks until either the FIFO has data available to read,
// or the init process has exited. It does not consume the data — it only
// returns once a subsequent read on f will not block indefinitely.
func waitForFifoReady(f *os.File, pid int) error {
// TODO: switch to os.Process.WithHandle once go < 1.26 is no longer supported.
pidFd, err := unix.PidfdOpen(pid, 0)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can use https://pkg.go.dev/os#Process.WithHandle here:

proc, _ := os.FindProcess(pid) // or add process() to parentProcess interface
var readyErr error
err := proc.WithHandle(func(pidfd uintptr) {
   readyErr = waitForReadyPidfd(f, int(pidfd))
})
if err != nil { // pidfd not supported
   return waitForFifoReadyPolling(f, pid)
}

This works better because WithHandle will only call the function if pidfd is fully supported by the kernel (there are many kernels with different nuances and you can rely on the Go runtime check).

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree its much better

@captainmo1 captainmo1 May 14, 2026

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

os.Process.WithHandle became available in Go 1.26 but runc's go.mod is currently at go 1.25.0. We'll need to bump the version or I can continue using raw unix.PidfOpen for now.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe add a // TODO: switch to os.Process.WithHandle once go < 1.26 is no longer supported. or somesuch.

if err == nil {
defer unix.Close(pidFd)
return waitForFifoReadyPidfd(f, pidFd)
}
// fall through to the polling path.
return waitForFifoReadyPolling(f, pid)
}

// fast path: a single poll(2) on both the
// FIFO and a pidfd, blocking with no timeout.
func waitForFifoReadyPidfd(f *os.File, pidFd int) error {
pfds := []unix.PollFd{
{Fd: int32(f.Fd()), Events: unix.POLLIN},
{Fd: int32(pidFd), Events: unix.POLLIN},
}
for {
select {
case result := <-blockingFifoOpenCh:
return handleFifoResult(result)

case <-time.After(time.Millisecond * 100):
stat, err := system.Stat(pid)
if err != nil || stat.State == system.Zombie {
// could be because process started, ran, and completed between our 100ms timeout and our system.Stat() check.
// see if the fifo exists and has data (with a non-blocking open, which will succeed if the writing process is complete).
if err := handleFifoResult(fifoOpen(path, false)); err != nil {
return errors.New("container process is already dead")
}
return nil
}
_, err := unix.Poll(pfds, -1)
if errors.Is(err, unix.EINTR) {
continue
}
if err != nil {
return fmt.Errorf("poll exec fifo: %w", err)
}
// We don't care which FD woke us up. In all cases the next step
// is to read the FIFO: if init wrote the byte (alive or dead),
// it's there to be read; if init died without writing, the read
// returns EOF and readFromExecFifo reports the dead-process error.
return nil
}
}

// the fallback for kernels without pidfd_open.
func waitForFifoReadyPolling(f *os.File, pid int) error {
pfd := []unix.PollFd{{Fd: int32(f.Fd()), Events: unix.POLLIN}}
const pollIntervalMs = 100
for {
n, err := unix.Poll(pfd, pollIntervalMs)
if errors.Is(err, unix.EINTR) {
continue
}
if err != nil {
return fmt.Errorf("poll exec fifo: %w", err)
}
if n > 0 && pfd[0].Revents&(unix.POLLIN|unix.POLLHUP) != 0 {
return nil
}
// If init is dead, fallthrough and let readFromExecFifo distinguish
// "wrote before dying" from "died before writing".
stat, err := system.Stat(pid)
Comment thread
kolyshkin marked this conversation as resolved.
if err != nil || stat.State == system.Zombie {
return nil
}
}
}

func readFromExecFifo(execFifo io.Reader) error {
var buf [1]byte
n, err := execFifo.Read(buf[:])
if err != nil && err != io.EOF {
return fmt.Errorf("read exec fifo: %w", err)
}
if n == 0 {
return errors.New("exec fifo is empty: container init did not signal execve readiness (process died before writing, or fifo already consumed)")
}
return nil
}

func (c *Container) postStart() (retErr error) {
if !c.config.HasHook(configs.Poststart) {
return nil
Expand All @@ -283,59 +361,6 @@ func (c *Container) postStart() (retErr error) {
return c.config.Hooks.Run(configs.Poststart, s)
}

func readFromExecFifo(execFifo io.Reader) error {
data, err := io.ReadAll(execFifo)
if err != nil {
return err
}
if len(data) <= 0 {
return errors.New("cannot start an already running container")
}
return nil
}

func awaitFifoOpen(path string) <-chan openResult {
fifoOpened := make(chan openResult)
go func() {
result := fifoOpen(path, true)
fifoOpened <- result
}()
return fifoOpened
}

func fifoOpen(path string, block bool) openResult {
flags := os.O_RDONLY
if !block {
flags |= unix.O_NONBLOCK
}
f, err := os.OpenFile(path, flags, 0)
if err != nil {
return openResult{err: fmt.Errorf("exec fifo: %w", err)}
}
return openResult{file: f}
}

func handleFifoResult(result openResult) error {
if result.err != nil {
return result.err
}
f := result.file
defer f.Close()
if err := readFromExecFifo(f); err != nil {
return err
}
err := os.Remove(f.Name())
if err == nil || errors.Is(err, os.ErrNotExist) {
return nil
}
return err
}

type openResult struct {
file *os.File
err error
}

func (c *Container) start(process *Process) (retErr error) {
if c.config.Cgroups.Resources.SkipDevices {
return errors.New("can't start container with SkipDevices set")
Expand Down
Loading