diff --git a/internal/lsm/fanotify_backend_linux.go b/internal/lsm/fanotify_backend_linux.go new file mode 100644 index 0000000..1bf310e --- /dev/null +++ b/internal/lsm/fanotify_backend_linux.go @@ -0,0 +1,367 @@ +//go:build linux + +package lsm + +import ( + "bytes" + "encoding/binary" + "fmt" + "os" + "os/signal" + "path/filepath" + "sort" + "strings" + "sync" + "syscall" + "time" + "unsafe" + + "golang.org/x/sys/unix" +) + +const fanotifyTargetRoot = "/proc/1/root" + +type fanotifyBackend struct { + cgroupPath string + logger *SharedLogger + + mu sync.RWMutex + openRules []OpenPolicyRule + execRules []ExecPolicyRule + openDefaultAllow bool + execDefaultAllow bool +} + +func newFanotifyBackend(cgroupPath string, logger *SharedLogger) (*fanotifyBackend, error) { + if strings.TrimSpace(cgroupPath) == "" { + return nil, fmt.Errorf("cgroup path is required") + } + return &fanotifyBackend{ + cgroupPath: cgroupPath, + logger: logger, + }, nil +} + +func (b *fanotifyBackend) UpdatePolicies(policies *PolicySet) error { + if policies == nil { + policies = &PolicySet{} + } + + openRules := ConvertToFileOpenRules(policies.Open) + sort.Slice(openRules, func(i, j int) bool { + return openRules[i].PathLen > openRules[j].PathLen + }) + + execRules := ConvertToExecRules(policies.Exec) + sort.Slice(execRules, func(i, j int) bool { + return execRules[i].PathLen > execRules[j].PathLen + }) + + b.mu.Lock() + defer b.mu.Unlock() + b.openRules = openRules + b.execRules = execRules + b.openDefaultAllow = hasAllowedRootOpen(openRules) + b.execDefaultAllow = hasAllowedRootExec(execRules) + return nil +} + +func (b *fanotifyBackend) Run() error { + fd, err := unix.FanotifyInit( + unix.FAN_CLOEXEC|unix.FAN_CLASS_CONTENT, + unix.O_RDONLY|unix.O_LARGEFILE, + ) + if err != nil { + return fmt.Errorf("fanotify init: %w", err) + } + defer unix.Close(fd) + + if err := unix.FanotifyMark( + fd, + unix.FAN_MARK_ADD|unix.FAN_MARK_MOUNT, + unix.FAN_OPEN_PERM|unix.FAN_OPEN_EXEC_PERM, + unix.AT_FDCWD, + fanotifyTargetRoot, + ); err != nil { + return fmt.Errorf("fanotify mark %s: %w", fanotifyTargetRoot, err) + } + + now := time.Now().Format("15:04:05") + fmt.Printf("time=%s level=info msg=\"Successfully started monitoring file opens and exec via fanotify\"\n", now) + + sigChan := make(chan os.Signal, 1) + signalNotify(sigChan) + defer signalStop(sigChan) + + buf := make([]byte, os.Getpagesize()*4) + pollFDs := []unix.PollFd{{Fd: int32(fd), Events: unix.POLLIN}} + + for { + select { + case <-sigChan: + end := time.Now().Format("15:04:05") + fmt.Printf("time=%s level=info msg=\"Shutting down fanotify tracker\"\n", end) + return nil + default: + } + + n, err := unix.Poll(pollFDs, 100) + if err != nil { + if err == syscall.EINTR { + continue + } + return fmt.Errorf("fanotify poll: %w", err) + } + if n == 0 || pollFDs[0].Revents&unix.POLLIN == 0 { + continue + } + + readN, err := unix.Read(fd, buf) + if err != nil { + if err == syscall.EINTR { + continue + } + return fmt.Errorf("fanotify read: %w", err) + } + if readN == 0 { + continue + } + + if err := b.handleBuffer(fd, buf[:readN]); err != nil { + return err + } + } +} + +func (b *fanotifyBackend) handleBuffer(fanotifyFD int, buf []byte) error { + offset := 0 + metaSize := int(unsafe.Sizeof(unix.FanotifyEventMetadata{})) + for offset+metaSize <= len(buf) { + var meta unix.FanotifyEventMetadata + if err := binary.Read(bytes.NewReader(buf[offset:offset+metaSize]), binary.LittleEndian, &meta); err != nil { + return fmt.Errorf("parse fanotify event metadata: %w", err) + } + if meta.Vers != unix.FANOTIFY_METADATA_VERSION { + return fmt.Errorf("fanotify metadata version mismatch: got %d want %d", meta.Vers, unix.FANOTIFY_METADATA_VERSION) + } + if meta.Event_len < uint32(metaSize) { + return fmt.Errorf("invalid fanotify event length %d", meta.Event_len) + } + + if meta.Fd >= 0 { + b.handleEvent(fanotifyFD, meta) + } + offset += int(meta.Event_len) + } + return nil +} + +func (b *fanotifyBackend) handleEvent(fanotifyFD int, meta unix.FanotifyEventMetadata) { + defer unix.Close(int(meta.Fd)) + + path := readFDPath(int(meta.Fd)) + comm := readProcComm(int(meta.Pid)) + cgroupID := readProcCgroupID(int(meta.Pid)) + timestamp := time.Now().Format(time.RFC3339) + + allowed := true + var logEntry string + + switch { + case meta.Mask&unix.FAN_OPEN_EXEC_PERM != 0: + args := readProcArgs(int(meta.Pid)) + allowed = b.allowExec(path, args) + decision := "allowed" + if !allowed { + decision = "denied" + } + if len(args) > 0 { + logEntry = fmt.Sprintf( + "time=%s event=proc.exec pid=%d cgroup=%d exe=\"%s\" path=\"%s\" argc=%d argv=\"%s\" decision=%s", + timestamp, meta.Pid, cgroupID, comm, path, len(args), strings.Join(args, " "), decision, + ) + } else { + logEntry = fmt.Sprintf( + "time=%s event=proc.exec pid=%d cgroup=%d exe=\"%s\" path=\"%s\" argc=%d decision=%s", + timestamp, meta.Pid, cgroupID, comm, path, len(args), decision, + ) + } + case meta.Mask&unix.FAN_OPEN_PERM != 0: + allowed = b.allowOpen(path) + decision := "allowed" + if !allowed { + decision = "denied" + } + // fanotify open permission events do not expose the original open flags, so the + // fallback reports the generic file.open event name. + logEntry = fmt.Sprintf( + "time=%s event=file.open pid=%d cgroup=%d exe=\"%s\" path=\"%s\" decision=%s", + timestamp, meta.Pid, cgroupID, comm, path, decision, + ) + default: + return + } + + if b.logger != nil { + _ = b.logger.Write(logEntry) + } + + response := unix.FAN_ALLOW + if !allowed { + response = unix.FAN_DENY + } + var out bytes.Buffer + _ = binary.Write(&out, binary.LittleEndian, unix.FanotifyResponse{ + Fd: meta.Fd, + Response: uint32(response), + }) + _, _ = unix.Write(fanotifyFD, out.Bytes()) +} + +func (b *fanotifyBackend) allowOpen(path string) bool { + b.mu.RLock() + defer b.mu.RUnlock() + + for _, rule := range b.openRules { + rulePath := bytesToRulePath(rule.Path[:], int(rule.PathLen)) + if !strings.HasPrefix(path, rulePath) { + continue + } + return rule.Action == PolicyAllow + } + return b.openDefaultAllow +} + +func (b *fanotifyBackend) allowExec(path string, args []string) bool { + b.mu.RLock() + defer b.mu.RUnlock() + + for _, rule := range b.execRules { + rulePath := bytesToRulePath(rule.Path[:], int(rule.PathLen)) + if !strings.HasPrefix(path, rulePath) { + continue + } + if rule.ArgCount == 0 { + return rule.Action == PolicyAllow + } + if rule.Action == PolicyDeny && execArgsMatch(rule, args) { + return false + } + } + return b.execDefaultAllow +} + +func execArgsMatch(rule ExecPolicyRule, args []string) bool { + if len(args) <= 1 { + return false + } + for i := int32(0); i < rule.ArgCount && i < 4; i++ { + needle := string(bytes.TrimRight(rule.Args[i][:rule.ArgLens[i]], "\x00")) + if needle == "" { + continue + } + for _, arg := range args[1:] { + if arg == needle { + return true + } + } + } + return false +} + +func hasAllowedRootOpen(rules []OpenPolicyRule) bool { + for _, rule := range rules { + if rule.Action == PolicyAllow && bytesToRulePath(rule.Path[:], int(rule.PathLen)) == "/" { + return true + } + } + return false +} + +func hasAllowedRootExec(rules []ExecPolicyRule) bool { + for _, rule := range rules { + if rule.Action == PolicyAllow && bytesToRulePath(rule.Path[:], int(rule.PathLen)) == "/" { + return true + } + } + return false +} + +func bytesToRulePath(raw []byte, n int) string { + if n <= 0 || n > len(raw) { + return "" + } + return string(bytes.TrimRight(raw[:n], "\x00")) +} + +func readFDPath(fd int) string { + path, err := os.Readlink(fmt.Sprintf("/proc/self/fd/%d", fd)) + if err != nil { + return "" + } + if resolved, err := filepath.EvalSymlinks(fmt.Sprintf("/proc/self/fd/%d", fd)); err == nil && resolved != "" { + return resolved + } + return path +} + +func readProcComm(pid int) string { + data, err := os.ReadFile(fmt.Sprintf("/proc/%d/comm", pid)) + if err != nil { + return "" + } + return strings.TrimSpace(string(data)) +} + +func readProcArgs(pid int) []string { + data, err := os.ReadFile(fmt.Sprintf("/proc/%d/cmdline", pid)) + if err != nil { + return nil + } + parts := strings.Split(string(bytes.TrimRight(data, "\x00")), "\x00") + out := make([]string, 0, len(parts)) + for _, part := range parts { + part = strings.TrimSpace(part) + if part != "" { + out = append(out, part) + } + } + return out +} + +func readProcCgroupID(pid int) uint64 { + data, err := os.ReadFile(fmt.Sprintf("/proc/%d/cgroup", pid)) + if err != nil { + return 0 + } + lines := strings.Split(string(data), "\n") + for _, line := range lines { + line = strings.TrimSpace(line) + if line == "" { + continue + } + parts := strings.Split(line, ":") + raw := parts[len(parts)-1] + raw = strings.TrimSpace(raw) + if raw == "" || raw == "/" || raw == "." { + continue + } + if !strings.HasPrefix(raw, "/") { + raw = "/" + raw + } + path := filepath.Clean(filepath.Join("/sys/fs/cgroup", strings.TrimPrefix(raw, "/"))) + id, err := getCgroupID(path) + if err == nil { + return id + } + } + return 0 +} + +func signalNotify(ch chan<- os.Signal) { + signal.Notify(ch, os.Interrupt, syscall.SIGTERM) +} + +func signalStop(ch chan<- os.Signal) { + signal.Stop(ch) +} diff --git a/internal/lsm/fanotify_backend_linux_test.go b/internal/lsm/fanotify_backend_linux_test.go new file mode 100644 index 0000000..a55705a --- /dev/null +++ b/internal/lsm/fanotify_backend_linux_test.go @@ -0,0 +1,62 @@ +//go:build linux + +package lsm + +import "testing" + +func TestFanotifyBackendAllowOpen(t *testing.T) { + backend, err := newFanotifyBackend("/sys/fs/cgroup/test", nil) + if err != nil { + t.Fatalf("newFanotifyBackend: %v", err) + } + + policies := &PolicySet{ + Open: []PolicyRule{ + mustPolicyRule(t, "deny file.open /"), + mustPolicyRule(t, "allow file.open /workspace/project"), + }, + } + if err := backend.UpdatePolicies(policies); err != nil { + t.Fatalf("UpdatePolicies: %v", err) + } + + if !backend.allowOpen("/workspace/project/README.md") { + t.Fatalf("expected allow for matching allow rule") + } + if backend.allowOpen("/etc/passwd") { + t.Fatalf("expected deny from root default rule") + } +} + +func TestFanotifyBackendAllowExecArgumentBlacklist(t *testing.T) { + backend, err := newFanotifyBackend("/sys/fs/cgroup/test", nil) + if err != nil { + t.Fatalf("newFanotifyBackend: %v", err) + } + + policies := &PolicySet{ + Exec: []PolicyRule{ + mustPolicyRule(t, "allow proc.exec /usr/bin/"), + mustPolicyRule(t, "deny proc.exec /usr/bin/curl --insecure"), + }, + } + if err := backend.UpdatePolicies(policies); err != nil { + t.Fatalf("UpdatePolicies: %v", err) + } + + if !backend.allowExec("/usr/bin/curl", []string{"curl", "--silent"}) { + t.Fatalf("expected allow for non-blacklisted args") + } + if backend.allowExec("/usr/bin/curl", []string{"curl", "--insecure"}) { + t.Fatalf("expected deny for blacklisted arg") + } +} + +func mustPolicyRule(t *testing.T, line string) PolicyRule { + t.Helper() + rule, err := ParseRuleString(line) + if err != nil { + t.Fatalf("ParseRuleString(%q): %v", line, err) + } + return *rule +} diff --git a/internal/lsm/fanotify_backend_stub.go b/internal/lsm/fanotify_backend_stub.go new file mode 100644 index 0000000..b31bcc6 --- /dev/null +++ b/internal/lsm/fanotify_backend_stub.go @@ -0,0 +1,15 @@ +//go:build !linux + +package lsm + +import "fmt" + +type fanotifyBackend struct{} + +func newFanotifyBackend(_ string, _ *SharedLogger) (*fanotifyBackend, error) { + return nil, fmt.Errorf("fanotify backend is only supported on linux") +} + +func (b *fanotifyBackend) UpdatePolicies(_ *PolicySet) error { return nil } + +func (b *fanotifyBackend) Run() error { return nil } diff --git a/internal/lsm/manager.go b/internal/lsm/manager.go index 3649710..2997287 100644 --- a/internal/lsm/manager.go +++ b/internal/lsm/manager.go @@ -6,10 +6,24 @@ import ( "fmt" "os" "os/signal" + "runtime" + "strings" "sync" "syscall" ) +type enforcementBackend interface { + UpdatePolicies(*PolicySet) error + Run() error +} + +type backendKind string + +const ( + backendBPF backendKind = "bpf" + backendFanotify backendKind = "fanotify" +) + // LSMManager manages multiple LSM programs and handles policy reloading type LSMManager struct { cgroupPath string @@ -21,6 +35,10 @@ type LSMManager struct { connectLsm *ConnectLsm reloadMutex sync.RWMutex + started bool + backend enforcementBackend + backendKind backendKind + policies *PolicySet } func NewLSMManager(cgroupPath string, logger *SharedLogger) *LSMManager { @@ -31,6 +49,28 @@ func NewLSMManager(cgroupPath string, logger *SharedLogger) *LSMManager { } func (m *LSMManager) LoadAndStart() error { + m.reloadMutex.Lock() + defer m.reloadMutex.Unlock() + + if !m.started { + backend, kind, err := m.initializeBackendLocked() + if err != nil { + return err + } + m.backend = backend + m.backendKind = kind + m.started = true + if m.policies != nil { + if err := m.backend.UpdatePolicies(clonePolicySet(m.policies)); err != nil { + return err + } + } + } + + if m.backendKind == backendFanotify { + return m.backend.Run() + } + // Set up signal handling for graceful shutdown sigChan := make(chan os.Signal, 1) signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM) @@ -46,6 +86,32 @@ func (m *LSMManager) LoadAndStart() error { return nil } +func (m *LSMManager) initializeBackendLocked() (enforcementBackend, backendKind, error) { + if runtime.GOOS != "linux" { + return nil, "", fmt.Errorf("LSM manager is only supported on linux") + } + + if hostSupportsBPFLSM() { + fmt.Printf("Using BPF LSM enforcement backend\n") + return newBPFBackend(m), backendBPF, nil + } + + backend, err := newFanotifyBackend(m.cgroupPath, m.logger) + if err != nil { + return nil, "", err + } + fmt.Printf("Using fanotify enforcement backend\n") + return backend, backendFanotify, nil +} + +func hostSupportsBPFLSM() bool { + data, err := os.ReadFile("/sys/kernel/security/lsm") + if err != nil { + return false + } + return strings.Contains(strings.TrimSpace(string(data)), "bpf") +} + func (m *LSMManager) updateOpenLSM(policies *PolicySet) error { if !policies.HasOpenPolicies() { // No open policies, ensure LSM is stopped @@ -158,14 +224,46 @@ func (m *LSMManager) UpdateRuntimeRules(policies *PolicySet) error { m.reloadMutex.Lock() defer m.reloadMutex.Unlock() - if err := m.updateOpenLSM(policies); err != nil { + m.policies = clonePolicySet(policies) + if !m.started { + return nil + } + return m.backend.UpdatePolicies(clonePolicySet(policies)) +} + +func clonePolicySet(src *PolicySet) *PolicySet { + if src == nil { + return &PolicySet{} + } + return &PolicySet{ + Open: append([]PolicyRule(nil), src.Open...), + Exec: append([]PolicyRule(nil), src.Exec...), + Connect: append([]PolicyRule(nil), src.Connect...), + MCP: append([]MCPPolicyRule(nil), src.MCP...), + ConnectDefaultAllow: src.ConnectDefaultAllow, + ConnectDefaultExplicit: src.ConnectDefaultExplicit, + } +} + +type bpfBackend struct { + manager *LSMManager +} + +func newBPFBackend(manager *LSMManager) *bpfBackend { + return &bpfBackend{manager: manager} +} + +func (b *bpfBackend) UpdatePolicies(policies *PolicySet) error { + if err := b.manager.updateOpenLSM(policies); err != nil { return err } - if err := m.updateExecLSM(policies); err != nil { + if err := b.manager.updateExecLSM(policies); err != nil { return err } - if err := m.updateConnectLSM(policies); err != nil { + if err := b.manager.updateConnectLSM(policies); err != nil { return err } return nil } + +func (b *bpfBackend) Run() error { return nil } diff --git a/internal/runner/runner.go b/internal/runner/runner.go index d8726e1..4026e90 100644 --- a/internal/runner/runner.go +++ b/internal/runner/runner.go @@ -2092,6 +2092,7 @@ func (r *runner) launchLeashContainer(ctx context.Context, cgroupPath string) er "--privileged", "--cap-add", "NET_ADMIN", "--cgroupns=host", // Use host cgroup namespace for iptables cgroup matching + "--pid", fmt.Sprintf("container:%s", r.cfg.targetContainer), "--network", fmt.Sprintf("container:%s", r.cfg.targetContainer), "-v", "/sys/fs/cgroup:/sys/fs/cgroup:ro", "-v", r.internalBindMountSpec(r.cfg.logDir, "/log", ""),