Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ require (
charm.land/bubbletea/v2 v2.0.6
github.com/BurntSushi/toml v1.6.0
github.com/aymanbagabas/go-udiff v0.4.1
github.com/cenkalti/backoff/v4 v4.3.0
github.com/coder/websocket v1.8.14
github.com/dustinkirkland/golang-petname v0.0.0-20260215035315-f0c533e9ce9b
github.com/gin-gonic/gin v1.12.0
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,8 @@ github.com/catenacyber/perfsprint v0.10.1 h1:u7Riei30bk46XsG8nknMhKLXG9BcXz3+3tl
github.com/catenacyber/perfsprint v0.10.1/go.mod h1:DJTGsi/Zufpuus6XPGJyKOTMELe347o6akPvWG9Zcsc=
github.com/ccojocar/zxcvbn-go v1.0.4 h1:FWnCIRMXPj43ukfX000kvBZvV6raSxakYr1nzyNrUcc=
github.com/ccojocar/zxcvbn-go v1.0.4/go.mod h1:3GxGX+rHmueTUMvm5ium7irpyjmm7ikxYFOSJB21Das=
github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
Expand Down
61 changes: 58 additions & 3 deletions internal/sidecar/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,13 @@ import (
"context"
"errors"
"fmt"
"net"
"os"
"path/filepath"
"strings"
"time"

"github.com/cenkalti/backoff/v4"

"github.com/CircleCI-Public/chunk-cli/internal/circleci"
"github.com/CircleCI-Public/chunk-cli/internal/gitremote"
Expand Down Expand Up @@ -52,7 +56,7 @@ func persistWorkspace(ctx context.Context, workspace string) error {
func Sync(ctx context.Context,
client *circleci.Client, sidecarID, identityFile, authSock, workdir string, status iostream.StatusFunc) error {

session, err := OpenSession(ctx, client, sidecarID, identityFile, authSock)
session, err := openSessionWithRetry(ctx, client, sidecarID, identityFile, authSock, status)
if err != nil {
return err
}
Expand Down Expand Up @@ -155,10 +159,20 @@ func syncWorkspace(ctx context.Context, status iostream.StatusFunc, org, repo, r

status(iostream.LevelInfo, fmt.Sprintf("Synchronising local %s/%s to remote: %s...", org, repo, repoPath))

base, err := gitutil.MergeBase()
// Prefer origin/HEAD (set when the remote advertises a default branch), fall
// back to HEAD for repos where origin/HEAD is not configured.
baseCmd := fmt.Sprintf(
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where was the git fetch happening? We were only calling rev-parse and merge-base which shouldn't be doing a fetch?

"git -C %[1]s rev-parse origin/HEAD 2>/dev/null || git -C %[1]s rev-parse HEAD",
ShellEscape(repoPath),
)
baseResult, err := ExecOverSSH(ctx, session, baseCmd, nil, nil)
if err != nil {
return &RemoteBaseError{Err: err}
return fmt.Errorf("sync: resolve remote base: %w", err)
}
if baseResult.ExitCode != 0 {
return &RemoteBaseError{Err: fmt.Errorf("resolve base commit: %s", baseResult.Stderr)}
}
base := strings.TrimSpace(baseResult.Stdout)

patch, err := gitutil.GeneratePatch(base)
if err != nil {
Expand Down Expand Up @@ -201,3 +215,44 @@ func syncWorkspace(ctx context.Context, status iostream.StatusFunc, org, repo, r
}
return nil
}

// openSessionWithRetry calls OpenSession, retrying on transient errors to give
// a newly-created sidecar time to finish booting before its SSH service is ready.
func openSessionWithRetry(ctx context.Context, client *circleci.Client, sidecarID, identityFile, authSock string, status iostream.StatusFunc) (*Session, error) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@danmux or @pete-woods do we have a standard pattern or lib for retries? this feels just like something we would have solved elsewhere...

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is using the standard lib for retries now! Thanks @danmux for the link

b := backoff.NewExponentialBackOff()
b.InitialInterval = 2 * time.Second
b.MaxInterval = 15 * time.Second
b.MaxElapsedTime = 90 * time.Second

var session *Session
notified := false
err := backoff.RetryNotify(
func() error {
var e error
session, e = OpenSession(ctx, client, sidecarID, identityFile, authSock)
if e != nil && !isTransientSSHError(e) {
return backoff.Permanent(e)
}
return e
},
backoff.WithContext(b, ctx),
func(_ error, _ time.Duration) {
if !notified {
status(iostream.LevelInfo, "Waiting for sidecar SSH to become available...")
notified = true
}
},
)
if err != nil {
return nil, err
}
return session, nil
}

// isTransientSSHError returns true for network-level errors that are worth
// retrying when opening a session — connection failures and timeouts that
// indicate the sidecar's SSH service is not yet ready.
func isTransientSSHError(err error) bool {
var netErr net.Error
return errors.As(err, &netErr)
}
13 changes: 9 additions & 4 deletions internal/sidecar/sync_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,16 @@ import (
func TestSync_NonApplyFailureReturnsImmediately(t *testing.T) {
keyFile, pubKey := fakes.GenerateSSHKeypair(t)

// SSH server: all commands succeed (exitCode 0), so mkdir-p and test-d pass.
// syncWorkspace then calls gitutil.MergeBase(), which fails because the test
// repo has no upstream tracking branch — a non-errApplyFailed error.
// SSH server: mkdir-p and test-d succeed; git rev-parse fails (exit 1) to
// simulate a sidecar where origin/HEAD is not configured — a RemoteBaseError,
// which is a non-errApplyFailed error and must not trigger rm -rf.
sshSrv := fakes.NewSSHServer(t, pubKey)
sshSrv.SetResult("", 0)
sshSrv.SetResultFunc(func(cmd string) (string, int) {
if strings.Contains(cmd, "rev-parse") {
return "fatal: ambiguous argument 'origin/HEAD'", 1
}
return "", 0
})

cci := fakes.NewFakeCircleCI()
cci.AddKeyURL = sshSrv.Addr()
Expand Down
61 changes: 61 additions & 0 deletions internal/sidecar/sync_whitebox_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package sidecar

import (
"fmt"
"net"
"testing"

"gotest.tools/v3/assert"

"github.com/CircleCI-Public/chunk-cli/internal/circleci"
)

func TestIsTransientSSHError(t *testing.T) {
t.Run("timeout is transient", func(t *testing.T) {
err := &net.OpError{Op: "dial", Err: &timeoutError{}}
assert.Equal(t, isTransientSSHError(err), true)
})

t.Run("connection refused is transient", func(t *testing.T) {
err := &net.OpError{Op: "dial", Net: "tcp", Err: fmt.Errorf("connection refused")}
assert.Equal(t, isTransientSSHError(err), true)
})

t.Run("net error wrapped with fmt.Errorf is transient", func(t *testing.T) {
inner := &net.OpError{Op: "dial", Err: &timeoutError{}}
err := fmt.Errorf("register SSH key: %w", inner)
assert.Equal(t, isTransientSSHError(err), true)
})

t.Run("ErrNotAuthorized is not transient", func(t *testing.T) {
err := fmt.Errorf("add ssh key: %w", circleci.ErrNotAuthorized)
assert.Equal(t, isTransientSSHError(err), false)
})

t.Run("StatusError is not transient", func(t *testing.T) {
err := &circleci.StatusError{Op: "add ssh key", StatusCode: 503}
assert.Equal(t, isTransientSSHError(err), false)
})

t.Run("KeyNotFoundError is not transient", func(t *testing.T) {
err := &KeyNotFoundError{Path: "/home/user/.ssh/chunk_ai"}
assert.Equal(t, isTransientSSHError(err), false)
})

t.Run("PublicKeyNotFoundError is not transient", func(t *testing.T) {
err := &PublicKeyNotFoundError{KeyPath: "/home/user/.ssh/chunk_ai.pub"}
assert.Equal(t, isTransientSSHError(err), false)
})

t.Run("generic error is not transient", func(t *testing.T) {
err := fmt.Errorf("resolve home directory: permission denied")
assert.Equal(t, isTransientSSHError(err), false)
})
}

// timeoutError is a net.Error that reports Timeout() == true.
type timeoutError struct{}

func (timeoutError) Error() string { return "i/o timeout" }
func (timeoutError) Timeout() bool { return true }
func (timeoutError) Temporary() bool { return true }
15 changes: 15 additions & 0 deletions internal/testing/fakes/ssh.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ type SSHServer struct {
stdout string
exitCode int
commands []string
resultFn func(cmd string) (stdout string, exitCode int)
}

// GenerateSSHKeypair generates an ed25519 keypair, writes the private and public
Expand Down Expand Up @@ -121,6 +122,15 @@ func (s *SSHServer) SetResult(stdout string, exitCode int) {
s.exitCode = exitCode
}

// SetResultFunc installs a per-command handler. When set, it takes precedence
// over SetResult for any command for which it returns a non-zero exit code or
// non-empty stdout. Use it to return different results for different commands.
func (s *SSHServer) SetResultFunc(fn func(cmd string) (stdout string, exitCode int)) {
s.mu.Lock()
defer s.mu.Unlock()
s.resultFn = fn
}

// Commands returns a copy of all exec command strings received so far.
func (s *SSHServer) Commands() []string {
s.mu.Lock()
Expand Down Expand Up @@ -190,8 +200,13 @@ func (s *SSHServer) handleSession(ch ssh.Channel, requests <-chan *ssh.Request)
s.commands = append(s.commands, cmd)
stdout := s.stdout
exitCode := s.exitCode
fn := s.resultFn
s.mu.Unlock()

if fn != nil {
stdout, exitCode = fn(cmd)
}

if req.WantReply {
_ = req.Reply(true, nil)
}
Expand Down