Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions runner/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ require (
github.com/urfave/cli/v3 v3.6.1
golang.org/x/crypto v0.22.0
golang.org/x/sys v0.26.0
kernel.org/pub/linux/libs/security/libcap/cap v1.2.77
)

require (
Expand Down Expand Up @@ -84,4 +85,5 @@ require (
gopkg.in/warnings.v0 v0.1.2 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
gotest.tools/v3 v3.5.1 // indirect
kernel.org/pub/linux/libs/security/libcap/psx v1.2.77 // indirect
)
4 changes: 4 additions & 0 deletions runner/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -321,3 +321,7 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gotest.tools/v3 v3.5.1 h1:EENdUnS3pdur5nybKYIh2Vfgc8IUNBjxDPSjtiJcOzU=
gotest.tools/v3 v3.5.1/go.mod h1:isy3WKz7GK6uNw/sbHzfKBLvlvXwUyV06n6brMxxopU=
kernel.org/pub/linux/libs/security/libcap/cap v1.2.77 h1:iQtQTjFUOcTT19fI8sTCzYXsjeVs56et3D8AbKS2Uks=
kernel.org/pub/linux/libs/security/libcap/cap v1.2.77/go.mod h1:oV+IO8kGh0B7TxErbydDe2+BRmi9g/W0CkpVV+QBTJU=
kernel.org/pub/linux/libs/security/libcap/psx v1.2.77 h1:Z06sMOzc0GNCwp6efaVrIrz4ywGJ1v+DP0pjVkOfDuA=
kernel.org/pub/linux/libs/security/libcap/psx v1.2.77/go.mod h1:+l6Ee2F59XiJ2I6WR5ObpC1utCQJZ/VLsEbQCD8RG24=
34 changes: 23 additions & 11 deletions runner/internal/executor/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/dstackai/dstack/runner/consts"
"github.com/dstackai/dstack/runner/internal/common"
"github.com/dstackai/dstack/runner/internal/connections"
cap "github.com/dstackai/dstack/runner/internal/linux/capabilities"
linuxuser "github.com/dstackai/dstack/runner/internal/linux/user"
"github.com/dstackai/dstack/runner/internal/log"
"github.com/dstackai/dstack/runner/internal/schemas"
Expand Down Expand Up @@ -467,10 +468,19 @@ func (ex *RunExecutor) execJob(ctx context.Context, jobLogFile io.Writer) error
}
cmd.Dir = ex.jobWorkingDir

// Strictly speaking, we need CAP_SETUID and CAP_GUID (for Cmd.Start()->
// Cmd.SysProcAttr.Credential) and CAP_CHOWN (for startCommand()->os.Chown()),
// but for the sake of simplicity we instead check if we are root or not
if ex.currentUser.IsRoot() {
// CAP_SET{UID,GID} for startCommand() -> Cmd.Start() -> set{uid,gid,groups} syscalls during fork-exec
// CAP_CHOWN for startCommand() -> os.Chown(pts.Name())
if missing, err := cap.Check(cap.SETUID, cap.SETGID, cap.CHOWN); err != nil {
log.Error(
ctx, "Failed to check capabilities, won't try to set process credentials",
"err", err, "user", ex.currentUser,
)
} else if len(missing) > 0 {
log.Info(
ctx, "Required capabilities are missing, cannot set process credentials",
"missing", missing, "user", ex.currentUser,
)
} else {
log.Trace(ctx, "Using credentials", "user", ex.jobUser)
if cmd.SysProcAttr == nil {
cmd.SysProcAttr = &syscall.SysProcAttr{}
Expand All @@ -480,8 +490,6 @@ func (ex *RunExecutor) execJob(ctx context.Context, jobLogFile io.Writer) error
return fmt.Errorf("prepare process credentials: %w", err)
}
cmd.SysProcAttr.Credential = creds
} else {
log.Info(ctx, "Current user is not root, cannot set process credentials", "user", ex.currentUser)
}

envMap := NewEnvMap(ParseEnvList(os.Environ()), jobEnvs, ex.secrets)
Expand Down Expand Up @@ -509,11 +517,15 @@ func (ex *RunExecutor) execJob(ctx context.Context, jobLogFile io.Writer) error
// Note: we already set RLIMIT_MEMLOCK to unlimited in the shim if we've detected IB devices
// (see configureHpcNetworkingIfAvailable() function), but, as it's on the shim side, it only works
// with VM-based backends.
rlimitMemlock := unix.Rlimit{Cur: unix.RLIM_INFINITY, Max: unix.RLIM_INFINITY}
// TODO: Check if we have CAP_SYS_RESOURCE. In container environments, even root usually doesn't have
// this capability.
if err := unix.Setrlimit(unix.RLIMIT_MEMLOCK, &rlimitMemlock); err != nil {
log.Error(ctx, "Failed to set resource limits", "err", err)
if ok, err := cap.Has(cap.SYS_RESOURCE); err != nil {
log.Error(ctx, "Failed to check capabilities, won't try to set resource limits", "err", err)
} else if !ok {
log.Info(ctx, "Required capability is missing, cannot set resource limits", "missing", cap.SYS_RESOURCE)
} else {
rlimitMemlock := unix.Rlimit{Cur: unix.RLIM_INFINITY, Max: unix.RLIM_INFINITY}
if err := unix.Setrlimit(unix.RLIMIT_MEMLOCK, &rlimitMemlock); err != nil {
log.Error(ctx, "Failed to set resource limits", "err", err)
}
}

// HOME must be added after writeDstackProfile to avoid overriding the correct per-user value set by sshd
Expand Down
22 changes: 22 additions & 0 deletions runner/internal/linux/capabilities/capabilities_darwin.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
//go:build darwin

package capabilities

import "errors"

type Capability string

const (
SETUID = Capability("SETUID")
SETGID = Capability("SETGID")
CHOWN = Capability("CHOWN")
SYS_RESOURCE = Capability("SYS_RESOURCE")
)

func Has(c Capability) (bool, error) {
return false, errors.New("not supported")
}

func Check(cs ...Capability) (missing []Capability, err error) {
return nil, errors.New("not supported")
}
52 changes: 52 additions & 0 deletions runner/internal/linux/capabilities/capabilities_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
//go:build linux

package capabilities

import (
"strings"

"kernel.org/pub/linux/libs/security/libcap/cap"
)

type Capability cap.Value

const (
SETUID = Capability(cap.SETUID)
SETGID = Capability(cap.SETGID)
CHOWN = Capability(cap.CHOWN)
SYS_RESOURCE = Capability(cap.SYS_RESOURCE)
)

// String returns a text representation of the capability in the form used by container folks:
// UPPER_CASE, no CAP_ prefix: cap_sys_admin -> SYS_ADMIN
func (c Capability) String() string {
return strings.ToUpper(cap.Value(c).String()[4:])
}

// Has returns true if the current process has the specified capability in its effective set
func Has(c Capability) (bool, error) {
set, err := cap.GetPID(0)
if err != nil {
return false, err
}
return set.GetFlag(cap.Effective, cap.Value(c))
}

// Check checks and returns those capabilities that are _missing_ from the effective set
// of the current process
func Check(cs ...Capability) (missing []Capability, err error) {
set, err := cap.GetPID(0)
if err != nil {
return nil, err
}
for _, c := range cs {
ok, err := set.GetFlag(cap.Effective, cap.Value(c))
if err != nil {
return nil, err
}
if !ok {
missing = append(missing, c)
}
}
return missing, nil
}