Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
5ee3058
Add SQLite state store for persistent resources
danegsta May 6, 2026
d5ad5eb
Improve persistent executable lifecycle handling
danegsta May 7, 2026
a47d849
Preserve process identity times in state store
danegsta May 7, 2026
83a06cc
Update dependency notices
danegsta May 7, 2026
a84e815
Use structured executable lifecycle key hashing
danegsta May 7, 2026
0f84172
Simplify container startup retry handling
danegsta May 7, 2026
4d4c9f4
Refactor container startup orchestration
danegsta May 7, 2026
7d1a045
Handle persistent executable adoption before start gating
danegsta May 7, 2026
1008884
Simplify resource lease persistence
danegsta May 7, 2026
9d536e6
Require leasable resources for resource leases
danegsta May 7, 2026
4aa2cea
Clarify persistence API documentation
danegsta May 7, 2026
2d9f4bd
Describe executable persistence as feature docs
danegsta May 7, 2026
6a37ae0
Use resource leases for persistent executables
danegsta May 7, 2026
c756d12
Extract executable persistence helpers
danegsta May 7, 2026
eabc3c9
Clarify resource lease contention logs
danegsta May 7, 2026
ffe378a
Preserve reusable persistent executable records
danegsta May 8, 2026
1b434ab
Configure state store WAL checkpointing
danegsta May 8, 2026
4dc1368
Use executable spec lifecycle keys for persistence
danegsta May 8, 2026
11b2df7
Require lease ownership on release
danegsta May 8, 2026
576a428
Use effective executable lifecycle inputs
danegsta May 8, 2026
2006624
Store process PID for adopted runs
danegsta May 8, 2026
16411de
Guard persistent container setup with leases
danegsta May 8, 2026
833e133
Flush shutdown response before stopping apiserver
danegsta May 8, 2026
c9d9ceb
Gate persistent container startup with leases
danegsta May 8, 2026
85c1cb9
Merge remote-tracking branch 'origin/main' into danegsta-microsoft/dr…
danegsta May 8, 2026
5fbf62f
Close state store migration runner
danegsta May 8, 2026
994beda
Align persistent executable lease handling
danegsta May 8, 2026
c14d669
Fix persistent container lease retries
danegsta May 8, 2026
b4da77f
Format process identity values safely
danegsta May 8, 2026
fa3ca8c
Harden persistent resource cleanup
danegsta May 8, 2026
b450c3b
Simplify state store file setup
danegsta May 8, 2026
dae4b30
Trim persistent state metadata
danegsta May 8, 2026
4fa4940
Clarify lease owner identity logging
danegsta May 8, 2026
0350a48
Persist executable output files
danegsta May 8, 2026
0f30fcd
Skip timestamp pipe for persistent output
danegsta May 8, 2026
da9c310
Release persistent container lease after reuse
danegsta May 8, 2026
40c126f
Harden persistent output directory setup
danegsta May 8, 2026
25abaea
Format io platform files
danegsta May 8, 2026
8f32f5c
Simplify persistent process PID check
danegsta May 8, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,007 changes: 1,007 additions & 0 deletions NOTICE

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ DCP has knowledge of a number of environment variables that can change its behav
| `DCP_EXTENSIONS_PATH` | Points to directory that contains DCP extensions. By default extensions are placed in the `ext` sub-directory of the directory where DCP main executable is located. |
| `DEBUG_SESSION_PORT`, `DEBUG_SESSION_TOKEN`, and `DEBUG_SESSION_SERVER_CERTIFICATE` | These are variables that configure the endpoint for running Executables via a developer IDE/under debugger. For more information see [IDE execution specification](https://github.com/dotnet/aspire/blob/main/docs/specs/IDE-execution.md). |
| `DCP_SESSION_FOLDER` | This variable is used for isolating multiple DCP instances running concurrently on the same machine. If set (to a valid filesystem folder), DCP process(es) will create files related to their execution in this folder: the access configuration file (kubeconfig), captured Executable/Container logs, etc. |
| `DCP_STATE_STORE_PATH` | Overrides the path to the local SQLite state store used for DCP coordination metadata such as process records and resource leases. Resource leases are owned by the leasing DCP process identity and stale leases are cleaned up on startup. If unset, DCP uses the default per-user state store under the DCP user directory. |
| `DCP_LOG_SOCKET` | If set to a Unix domain socket, DCP will write its execution logs to that socket instead of writing them to standard error stream (`stderr`). This allows programs that launch DCP to capture its output even if DCP is running in `--detach` mode. <br/> The `--detach` mode causes DCP to fork itself and break the parent-child relationship (and lifetime dependency) from the process that launched it, but the side effect of doing so is that the parent process loses ability to monitor DCP standard output and standard error streamd. |
| `DCP_LOG_SESSION_ID` | If set, DCP will prepend this value to all diagnostics log names. If unset, a session ID will be calculated. The value is propagated to all child DCP processes. |
| `DCP_DIAGNOSTICS_LOG_LEVEL` | If set, enabled DCP diagnostic logging. <br/> Can be set to `error`, `info`, or `debug`; for troubleshooting `debug` is recommended, although it results in the most verbose output. |
Expand Down
8 changes: 8 additions & 0 deletions api/v1/container_network_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@ package v1

import (
"context"
"fmt"
"strings"

"github.com/microsoft/dcp/internal/statestore"
"github.com/microsoft/dcp/pkg/commonapi"
apiserver_resource "github.com/tilt-dev/tilt-apiserver/pkg/server/builder/resource"
apiserver_resourcerest "github.com/tilt-dev/tilt-apiserver/pkg/server/builder/resource/resourcerest"
Expand Down Expand Up @@ -107,6 +110,10 @@ func (cn *ContainerNetwork) GetGroupVersionResource() schema.GroupVersionResourc
}
}

func (cn *ContainerNetwork) GetLeaseKey() string {
return fmt.Sprintf("%s/%s", cn.GetGroupVersionResource().Resource, strings.TrimSpace(cn.Spec.NetworkName))
}

func (cn *ContainerNetwork) GetObjectMeta() *metav1.ObjectMeta {
return &cn.ObjectMeta
}
Expand Down Expand Up @@ -214,3 +221,4 @@ var _ apiserver_resource.StatusSubResource = (*ContainerNetworkStatus)(nil)
var _ apiserver_resourcerest.ShortNamesProvider = (*ContainerNetwork)(nil)
var _ apiserver_resourcestrategy.Validater = (*ContainerNetwork)(nil)
var _ apiserver_resourcestrategy.ValidateUpdater = (*ContainerNetwork)(nil)
var _ statestore.LeasableResource = (*ContainerNetwork)(nil)
Comment thread
danegsta marked this conversation as resolved.
6 changes: 6 additions & 0 deletions api/v1/container_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
apiserver_resourcerest "github.com/tilt-dev/tilt-apiserver/pkg/server/builder/resource/resourcerest"
apiserver_resourcestrategy "github.com/tilt-dev/tilt-apiserver/pkg/server/builder/resource/resourcestrategy"

"github.com/microsoft/dcp/internal/statestore"
"github.com/microsoft/dcp/pkg/commonapi"
"github.com/microsoft/dcp/pkg/pointers"
)
Expand Down Expand Up @@ -1175,6 +1176,10 @@ func (c *Container) GetGroupVersionResource() schema.GroupVersionResource {
}
}

func (c *Container) GetLeaseKey() string {
return fmt.Sprintf("%s/%s", c.GetGroupVersionResource().Resource, strings.TrimSpace(c.Spec.ContainerName))
}

func (c *Container) GetObjectMeta() *metav1.ObjectMeta {
return &c.ObjectMeta
}
Expand Down Expand Up @@ -1550,3 +1555,4 @@ var _ apiserver_resourcestrategy.Validater = (*Container)(nil)
var _ apiserver_resourcestrategy.ValidateUpdater = (*Container)(nil)
var _ apiserver_resource.ObjectWithGenericSubResource = (*Container)(nil)
var _ apiserver_resource.GenericSubResource = (*ContainerLogResource)(nil)
var _ statestore.LeasableResource = (*Container)(nil)
Comment thread
danegsta marked this conversation as resolved.
18 changes: 18 additions & 0 deletions api/v1/container_types_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,24 @@ import (
"k8s.io/apimachinery/pkg/util/validation/field"
)

func TestContainerGetLeaseKey(t *testing.T) {
t.Parallel()

container := &Container{}
container.Spec.ContainerName = " api "

require.Equal(t, "containers/api", container.GetLeaseKey())
}

func TestContainerNetworkGetLeaseKey(t *testing.T) {
t.Parallel()

network := &ContainerNetwork{}
network.Spec.NetworkName = " app-network "

require.Equal(t, "containernetworks/app-network", network.GetLeaseKey())
}

func TestImageLayerValidate(t *testing.T) {
t.Parallel()

Expand Down
214 changes: 214 additions & 0 deletions api/v1/executable_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,16 @@ package v1

import (
"context"
"encoding/gob"
"errors"
"fmt"
"hash/fnv"
stdmaps "maps"
"os"
stdslices "slices"
"strings"

"github.com/joho/godotenv"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
Expand All @@ -24,7 +30,11 @@ import (
apiserver_resourcerest "github.com/tilt-dev/tilt-apiserver/pkg/server/builder/resource/resourcerest"
apiserver_resourcestrategy "github.com/tilt-dev/tilt-apiserver/pkg/server/builder/resource/resourcestrategy"

"github.com/microsoft/dcp/internal/statestore"
"github.com/microsoft/dcp/pkg/commonapi"
usvc_maps "github.com/microsoft/dcp/pkg/maps"
"github.com/microsoft/dcp/pkg/osutil"
"github.com/microsoft/dcp/pkg/pointers"
"github.com/microsoft/dcp/pkg/slices"
)

Expand Down Expand Up @@ -250,10 +260,21 @@ type ExecutableSpec struct {
// Controls behavior of environment variables inherited from the controller process.
AmbientEnvironment AmbientEnvironment `json:"ambientEnvironment,omitempty"`

// Should the controller attempt to start the Executable?
// +kubebuilder:default:=true
Start *bool `json:"start,omitempty"`

// Should the controller attempt to stop the Executable
// +kubebuilder:default:=false
Stop bool `json:"stop,omitempty"`

// Should this Executable be created and persisted between DCP runs?
Persistent bool `json:"persistent,omitempty"`

// Optional key used to identify if an existing persistent Executable process should be reused.
// If not set, the controller will calculate a key based on a hash of specific fields in the ExecutableSpec.
LifecycleKey string `json:"lifecycleKey,omitempty"`

// Health probe configuration for the Executable
// +listType=atomic
HealthProbes []HealthProbe `json:"healthProbes,omitempty"`
Expand Down Expand Up @@ -296,10 +317,22 @@ func (es ExecutableSpec) Equal(other ExecutableSpec) bool {
return false
}

if pointers.GetValueOrDefault(es.Start, true) != pointers.GetValueOrDefault(other.Start, true) {
return false
}

if es.Stop != other.Stop {
return false
}

if es.Persistent != other.Persistent {
return false
}

if es.LifecycleKey != other.LifecycleKey {
return false
}

if len(es.HealthProbes) != len(other.HealthProbes) {
return false
}
Expand All @@ -317,6 +350,159 @@ func (es ExecutableSpec) Equal(other ExecutableSpec) bool {
return true
}

func (es *ExecutableSpec) GetLifecycleKey() (string, bool, error) {
if es.LifecycleKey != "" {
return es.LifecycleKey, false, nil
}

fnvHash := fnv.New128()
encoder := gob.NewEncoder(fnvHash)

var hashErr error
hashErr = errors.Join(hashErr, encoder.Encode(es.ExecutablePath))
hashErr = errors.Join(hashErr, encoder.Encode(es.WorkingDirectory))
hashErr = errors.Join(hashErr, encoder.Encode(string(es.ExecutionType)))
hashErr = errors.Join(hashErr, encoder.Encode(string(es.AmbientEnvironment.Behavior)))
hashErr = errors.Join(hashErr, encoder.Encode(es.Args))

Comment thread
danegsta marked this conversation as resolved.
if len(es.Env) > 0 {
sortedEnv := stdslices.Clone(es.Env)
stdslices.SortFunc(sortedEnv, func(e1, e2 EnvVar) int {
return strings.Compare(e1.Name, e2.Name)
})

for i := range sortedEnv {
hashErr = errors.Join(hashErr, encoder.Encode(sortedEnv[i]))
}
}

if len(es.EnvFiles) > 0 {
sortedEnvFiles := stdslices.Clone(es.EnvFiles)
stdslices.Sort(sortedEnvFiles)

for i := range sortedEnvFiles {
envFileContents, envFileReadErr := os.ReadFile(sortedEnvFiles[i])
if envFileReadErr != nil {
hashErr = errors.Join(hashErr, envFileReadErr)
} else {
hashErr = errors.Join(hashErr, encoder.Encode(envFileContents))
}
}
}

if es.PemCertificates != nil {
sortedPemCertificates := stdslices.Clone(es.PemCertificates.Certificates)
stdslices.SortFunc(sortedPemCertificates, func(c1, c2 PemCertificate) int {
return strings.Compare(c1.Thumbprint, c2.Thumbprint)
})

for i := range sortedPemCertificates {
hashErr = errors.Join(hashErr, encoder.Encode(sortedPemCertificates[i]))
}
hashErr = errors.Join(hashErr, encoder.Encode(es.PemCertificates.ContinueOnError))
}

lifecycleKey := fmt.Sprintf("%x", fnvHash.Sum(nil))
return lifecycleKey, true, hashErr
}

func (e *Executable) GetLifecycleKey() (string, bool, error) {
lifecycleSpec, lifecycleSpecErr := e.EffectiveLifecycleSpec()
if lifecycleSpecErr != nil {
return "", false, lifecycleSpecErr
}
return lifecycleSpec.GetLifecycleKey()
}

func (e *Executable) EffectiveLifecycleSpec() (ExecutableSpec, error) {
lifecycleSpec := *e.Spec.DeepCopy()
effectiveArgs, effectiveArgsErr := effectiveLifecycleArgs(e)
if effectiveArgsErr != nil {
return ExecutableSpec{}, effectiveArgsErr
}
explicitEffectiveEnv, explicitEffectiveEnvErr := explicitEffectiveLifecycleEnv(e)
if explicitEffectiveEnvErr != nil {
return ExecutableSpec{}, explicitEffectiveEnvErr
}
lifecycleSpec.Args = effectiveArgs
lifecycleSpec.Env = explicitEffectiveEnv
lifecycleSpec.EnvFiles = nil
return lifecycleSpec, nil
}

func effectiveLifecycleArgs(e *Executable) ([]string, error) {
if len(e.Status.EffectiveArgs) == 0 {
if len(e.Spec.Args) > 0 {
return nil, fmt.Errorf("executable lifecycle key cannot be calculated before effective arguments are computed")
}
return nil, nil
}
return stdslices.Clone(e.Status.EffectiveArgs), nil
}

func explicitEffectiveLifecycleEnv(e *Executable) ([]EnvVar, error) {
explicitNames := explicitLifecycleEnvNames(e)
if explicitNames.Len() == 0 {
return nil, nil
}

effectiveEnvByName := lifecycleEnvMap()
for _, envVar := range e.Status.EffectiveEnv {
effectiveEnvByName.Set(envVar.Name, envVar.Value)
}
if effectiveEnvByName.Len() == 0 {
return nil, fmt.Errorf("executable lifecycle key cannot be calculated before effective environment is computed")
}

explicitEffectiveEnv := make([]EnvVar, 0, explicitNames.Len())
for nameKey, name := range explicitNames.Data() {
value, found := effectiveEnvByName.Get(nameKey)
if !found {
continue
}
explicitEffectiveEnv = append(explicitEffectiveEnv, EnvVar{Name: name, Value: value})
}
stdslices.SortFunc(explicitEffectiveEnv, func(e1, e2 EnvVar) int {
return strings.Compare(lifecycleEnvKey(e1.Name), lifecycleEnvKey(e2.Name))
})
return explicitEffectiveEnv, nil
}

func explicitLifecycleEnvNames(e *Executable) usvc_maps.StringKeyMap[string] {
explicitNames := lifecycleEnvMap()
addExplicitName := func(name string) {
if name != "" {
explicitNames.Set(name, name)
}
}

if len(e.Spec.EnvFiles) > 0 {
if fileEnv, readErr := godotenv.Read(e.Spec.EnvFiles...); readErr == nil {
for name := range fileEnv {
addExplicitName(name)
}
}
}
for _, envVar := range e.Spec.Env {
addExplicitName(envVar.Name)
}
return explicitNames
}

func lifecycleEnvMap() usvc_maps.StringKeyMap[string] {
if osutil.IsWindows() {
return usvc_maps.NewStringKeyMap[string](usvc_maps.StringMapModeCaseInsensitive)
}
return usvc_maps.NewStringKeyMap[string](usvc_maps.StringMapModeCaseSensitive)
}

func lifecycleEnvKey(name string) string {
if osutil.IsWindows() {
return strings.ToUpper(name)
}
return name
}

func (es ExecutableSpec) Validate(specPath *field.Path) field.ErrorList {
errorList := field.ErrorList{}

Expand Down Expand Up @@ -348,6 +534,17 @@ func (es ExecutableSpec) Validate(specPath *field.Path) field.ErrorList {
errorList = append(errorList, field.Invalid(specPath.Child("ambientEnvironment", "behavior"), es.AmbientEnvironment.Behavior, "Ambient environment behavior must be either Inherit or DoNotInherit."))
}

effectiveExecutionType := es.ExecutionType
if effectiveExecutionType == "" {
effectiveExecutionType = ExecutionTypeProcess
}
if es.Persistent && effectiveExecutionType != ExecutionTypeProcess {
errorList = append(errorList, field.Invalid(specPath.Child("persistent"), es.Persistent, "Persistent Executables only support Process execution type."))
}
if es.Persistent && len(es.FallbackExecutionTypes) > 0 {
errorList = append(errorList, field.Invalid(specPath.Child("fallbackExecutionTypes"), es.FallbackExecutionTypes, "Persistent Executables cannot use fallback execution types."))
}

healthProbesPath := specPath.Child("healthProbes")
for i, probe := range es.HealthProbes {
errorList = append(errorList, probe.Validate(healthProbesPath.Index(i))...)
Expand Down Expand Up @@ -453,6 +650,10 @@ func (e *Executable) GetResourceId() string {
return fmt.Sprintf("executable-%s", e.UID)
}

func (e *Executable) GetLeaseKey() string {
return e.NamespacedName().String()
}

func (e *Executable) GetGroupVersionResource() schema.GroupVersionResource {
return schema.GroupVersionResource{
Group: GroupVersion.Group,
Expand Down Expand Up @@ -517,10 +718,18 @@ func (e *Executable) ValidateUpdate(ctx context.Context, obj runtime.Object) fie
errorList := field.ErrorList{}

oldExe := obj.(*Executable)
if (oldExe.Spec.Start == nil || *oldExe.Spec.Start) && (e.Spec.Start != nil && !*e.Spec.Start) {
errorList = append(errorList, field.Forbidden(field.NewPath("spec", "start"), "Cannot set start to false after Executable creation."))
}

if oldExe.Spec.Stop && e.Spec.Stop != oldExe.Spec.Stop {
errorList = append(errorList, field.Forbidden(field.NewPath("spec", "stop"), "Cannot unset stop property once it is set."))
}

if oldExe.Spec.Persistent != e.Spec.Persistent {
errorList = append(errorList, field.Forbidden(field.NewPath("spec", "persistent"), "persistent cannot be changed"))
}

if oldExe.Spec.AmbientEnvironment.Behavior != e.Spec.AmbientEnvironment.Behavior {
errorList = append(errorList, field.Forbidden(field.NewPath("spec", "ambientEnvironment", "behavior"), "Cannot change ambient environment behavior once it is set."))
}
Expand All @@ -547,6 +756,10 @@ func (e *Executable) Done() bool {
return !e.Status.FinishTimestamp.IsZero()
}

func (e *Executable) ShouldStart() bool {
return e.Spec.Start == nil || *e.Spec.Start
}

func (*Executable) GenericSubResources() []apiserver_resource.GenericSubResource {
return []apiserver_resource.GenericSubResource{
&ExecutableLogResource{},
Expand Down Expand Up @@ -630,3 +843,4 @@ var _ apiserver_resourcestrategy.ValidateUpdater = (*Executable)(nil)
var _ apiserver_resource.ObjectWithGenericSubResource = (*Executable)(nil)
var _ apiserver_resource.GenericSubResource = (*ExecutableLogResource)(nil)
var _ StdIoStreamableResource = (*Executable)(nil)
var _ statestore.LeasableResource = (*Executable)(nil)
Comment thread
danegsta marked this conversation as resolved.
Loading
Loading