diff --git a/.github/workflows/integration-v2.yml b/.github/workflows/integration-v2.yml index 4b2b11e7..1cebf9b5 100644 --- a/.github/workflows/integration-v2.yml +++ b/.github/workflows/integration-v2.yml @@ -56,11 +56,20 @@ jobs: echo "::add-mask::$TOKEN" echo "TOKEN=$TOKEN" >> "$GITHUB_OUTPUT" + - name: Extract c4gh pubkey + id: pubkey + run: | + docker compose --project-name download-v2-dev cp reencrypt:/shared/c4gh.pub.pem ./c4gh.pub.pem + KEY=$(base64 -w0 ./c4gh.pub.pem) + echo "::add-mask::$KEY" + echo "KEY=$KEY" >> "$GITHUB_OUTPUT" + - name: Run v2 integration tests working-directory: sda-cli env: DOWNLOAD_V2_URL: http://localhost:8085 DOWNLOAD_V2_TOKEN: ${{ steps.token.outputs.TOKEN }} + DOWNLOAD_V2_PUBKEY_B64: ${{ steps.pubkey.outputs.KEY }} run: | go test -tags=integration -v ./... diff --git a/download/download.go b/download/download.go index 45997f52..00944894 100644 --- a/download/download.go +++ b/download/download.go @@ -3,15 +3,11 @@ package download import ( "bufio" "context" - "encoding/json" "errors" "fmt" "io" - "net/http" - "net/url" "os" "path/filepath" - "slices" "strings" "time" @@ -21,8 +17,6 @@ import ( "github.com/spf13/cobra" "github.com/vbauerster/mpb/v8" "github.com/vbauerster/mpb/v8/decor" - "go.nhat.io/cookiejar" - "golang.org/x/net/publicsuffix" ) var datasetID string @@ -40,11 +34,11 @@ var apiVersionFlag string var downloadCmd = &cobra.Command{ Use: "download [flags] [filepath(s) | fileid(s)]", Short: "Download files from SDA", - Long: `Download files from the Sensitive Data Archive (SDA) using APIs at the specified URL. + Long: `Download files from the Sensitive Data Archive (SDA) using APIs at the specified URL. The user must have the necessary access rights (visas) to the datasets being downloaded Important: Provide exactly one of the following options to specify files to download: - - [filepath(s) or fileid(s)] + - [filepath(s) or fileid(s)] - --dataset - --recursive - --from-file `, @@ -73,46 +67,13 @@ func init() { downloadCmd.Flags().StringVar(&apiVersionFlag, "api-version", "v1", "SDA download API version to use (v1 or v2)") } -var cookieJar *cookiejar.PersistentJar -var cookiePath string -var appVersion string - -// File is the file metadata type. Canonical definition lives in downloadclient. -// Alias preserves source compat for existing callers (list/, tests). Removed -// in #677 when callers reference downloadclient.File directly. -type File = downloadclient.File - // Download function downloads files from the SDA by using the // download's service APIs func Download(args []string, configPath, version string) error { - appVersion = version - if datasetID == "" || URL == "" || configPath == "" { return errors.New("missing required arguments, dataset-id, config and url are required") } - // Fail fast on an unsupported --api-version before we touch the - // filesystem via setupCookieJar. Cheap check; avoids creating - // ${UserCacheDir}/sda-cli/ when the command is about to error out. - if err := downloadclient.ValidateVersion(apiVersionFlag); err != nil { - return err - } - - // v2 list commands work as of #676, but the actual download still goes - // through the legacy /s3 transfer path. Without this guard, a successful - // v2 ListFiles would be followed by a silent 404 on /s3/{fileID} once - // the real v2 dev stack is targeted. Reject cleanly until #677 wires - // v2 download via /files/{fileId} + X-C4GH-Public-Key. - if apiVersionFlag == "v2" { - return errors.New("v2 download is not yet implemented (see #677)") - } - - u, err := url.Parse(URL) - if err != nil || u.Scheme == "" { - return errors.New("invalid base URL") - } - setupCookieJar(u) - // Check if both --ignore-existing and --overwrite-existing are set if ignoreExisting && overwriteExisting { return errors.New("both --ignore-existing and --overwrite-existing flags are set, choose one of them") @@ -155,16 +116,11 @@ func Download(args []string, configPath, version string) error { return err } - // Share the cookie jar that downloadFile uses so V1Client's metadata - // calls and the legacy /s3 transfer path see the same in-memory - // cookie state. Two independent AutoSync:ed jars on the same on-disk - // file would race and clobber each other (fixed here; removed in - // #677 when downloadFile also moves onto downloadclient.Client). client, err := downloadclient.New(downloadclient.Config{ BaseURL: URL, Token: config.AccessToken, ClientVersion: version, - }, apiVersionFlag, downloadclient.WithV1CookieJar(cookieJar)) + }, apiVersionFlag) if err != nil { return err } @@ -174,22 +130,22 @@ func Download(args []string, configPath, version string) error { switch { case datasetDownload: - err = datasetCase(ctx, client, config.AccessToken) + err = datasetCase(ctx, client) if err != nil { return err } case recursiveDownload: - err = recursiveCase(ctx, client, args, config.AccessToken) + err = recursiveCase(ctx, client, args) if err != nil { return err } case fromFile: - err = fileCase(ctx, client, args, config.AccessToken, true) + err = fileCase(ctx, client, args, true) if err != nil { return err } default: - err = fileCase(ctx, client, args, config.AccessToken, false) + err = fileCase(ctx, client, args, false) if err != nil { return err } @@ -198,7 +154,7 @@ func Download(args []string, configPath, version string) error { return nil } -func datasetCase(ctx context.Context, client downloadclient.Client, token string) error { +func datasetCase(ctx context.Context, client downloadclient.Client) error { fmt.Println("Downloading all files in the dataset") files, err := client.ListFiles(ctx, datasetID, downloadclient.ListFilesOptions{}) if err != nil { @@ -206,11 +162,7 @@ func datasetCase(ctx context.Context, client downloadclient.Client, token string } for _, file := range files { - fileName := helpers.AnonymizeFilepath(file.FilePath) - fileURL := URL + "/s3/" + datasetID + "/" + fileName - - err = downloadFile(fileURL, token, pubKeyBase64, file.FilePath) - if err != nil { + if err := downloadOne(ctx, client, file.FilePath); err != nil { return err } } @@ -218,12 +170,8 @@ func datasetCase(ctx context.Context, client downloadclient.Client, token string return nil } -func recursiveCase(ctx context.Context, client downloadclient.Client, args []string, token string) error { +func recursiveCase(ctx context.Context, client downloadclient.Client, args []string) error { fmt.Println("Downloading content of the path(s)") - files, err := client.ListFiles(ctx, datasetID, downloadclient.ListFilesOptions{}) - if err != nil { - return fmt.Errorf("failed to get files, reason: %v", err) - } var dirPaths []string for _, path := range args { @@ -232,25 +180,44 @@ func recursiveCase(ctx context.Context, client downloadclient.Client, args []str } dirPaths = append(dirPaths, path) } + + // v1 has no server-side prefix filter, so fetch the full list once and + // filter client-side. v2 has a pathPrefix filter; apply it per dirPath. + var allFiles []downloadclient.File + if apiVersionFlag != "v2" { + files, err := client.ListFiles(ctx, datasetID, downloadclient.ListFilesOptions{}) + if err != nil { + return fmt.Errorf("failed to get files, reason: %v", err) + } + allFiles = files + } + var missingPaths []string - // Loop over all the files of the dataset and - // check if the provided path is part of their filepath. - // If it is then download the file for _, dirPath := range dirPaths { - pathExists := false - for _, file := range files { - if strings.HasPrefix(file.FilePath, dirPath) { - pathExists = true - fileName := helpers.AnonymizeFilepath(file.FilePath) - fileURL := URL + "/s3/" + datasetID + "/" + fileName - err = downloadFile(fileURL, token, pubKeyBase64, file.FilePath) - if err != nil { - return err + var matched []downloadclient.File + if apiVersionFlag == "v2" { + files, err := client.ListFiles(ctx, datasetID, downloadclient.ListFilesOptions{PathPrefix: dirPath}) + if err != nil { + return fmt.Errorf("failed to get files, reason: %v", err) + } + matched = files + } else { + for _, f := range allFiles { + if strings.HasPrefix(f.FilePath, dirPath) { + matched = append(matched, f) } } } - if !pathExists { + + if len(matched) == 0 { missingPaths = append(missingPaths, dirPath) + + continue + } + for _, file := range matched { + if err := downloadOne(ctx, client, file.FilePath); err != nil { + return err + } } } if len(missingPaths) == len(dirPaths) { @@ -265,47 +232,22 @@ func recursiveCase(ctx context.Context, client downloadclient.Client, args []str return nil } -func fileCase(ctx context.Context, client downloadclient.Client, args []string, token string, fileList bool) error { +func fileCase(ctx context.Context, client downloadclient.Client, args []string, fileList bool) error { var files []string if fileList { fmt.Println("Downloading files from file list") - fileList, err := GetURLsFile(args[0]) + fl, err := GetURLsFile(args[0]) if err != nil { return err } - files = append(files, fileList...) + files = append(files, fl...) } else { fmt.Println("Downloading files") files = append(files, args...) } for _, filePath := range files { - outputPath := filepath.Join(outDir, filePath) - // Cleanup .part if it exists since we are skipping - partPath := outputPath + ".part" - if _, err := os.Stat(partPath); err == nil { - if err := os.Remove(partPath); err != nil { - fmt.Printf("Warning: could not remove old partial file %s: %v\n", partPath, err) - } - } - - if ignoreExisting { - if _, err := os.Stat(outputPath); err == nil { - fmt.Printf("Skipping download to %s, file already exists\n", outputPath) - - continue - } else if !errors.Is(err, os.ErrNotExist) { - return err - } - } - - fileIDURL, apiFilePath, err := getFileIDURL(ctx, client, URL, datasetID, pubKeyBase64, filePath) - if err != nil { - return err - } - - err = downloadFile(fileIDURL, token, pubKeyBase64, apiFilePath) - if err != nil { + if err := downloadOne(ctx, client, filePath); err != nil { return err } } @@ -313,29 +255,68 @@ func fileCase(ctx context.Context, client downloadclient.Client, args []string, return nil } -func downloadFile(uri, token, pubKeyBase64, filePath string) error { - filePath = helpers.AnonymizeFilepath(filePath) - filePath = filepath.Join(outDir, filePath) - - exists, err := handleExistingFile(filePath) +// downloadOne fetches a single file via the downloadclient.Client and writes it +// to disk under outDir. userArg is either a path or a fileId — the client's +// DownloadFile resolves it and returns the canonical File, which we use for +// the on-disk name (UserArg must not be used for the filename because it +// may be a bare fileId with no relationship to the actual file name). +func downloadOne(ctx context.Context, client downloadclient.Client, userArg string) error { + result, err := client.DownloadFile(ctx, downloadclient.DownloadRequest{ + DatasetID: datasetID, + UserArg: userArg, + PublicKeyBase64: pubKeyBase64, + }) if err != nil { return err } - if exists { - return nil + defer result.Body.Close() //nolint:errcheck + + // Server-provided metadata must not be able to escape the configured + // output directory via "../" segments or absolute paths. filepath.IsLocal + // is the safety boundary: it rejects "..", absolute paths, and (on + // Windows) reserved names. This works correctly even when outDir is + // empty (default) or ".", cases the previous prefix-check rejected. + anonymized := helpers.AnonymizeFilepath(result.File.FilePath) + if !filepath.IsLocal(anonymized) { + return fmt.Errorf("refusing to write outside outdir: %s", result.File.FilePath) + } + od := outDir + if od == "" { + od = "." + } + outputPath := filepath.Join(od, anonymized) + + // Clean up any stale .part from a previous failed run before the + // existing-file check — otherwise a prior .part could be left behind + // when we end up skipping due to ignore-existing. + partPath := outputPath + ".part" + if _, serr := os.Stat(partPath); serr == nil { + if rerr := os.Remove(partPath); rerr != nil { + fmt.Printf("Warning: could not remove old partial file %s: %v\n", partPath, rerr) + } } - bodyStream, totalSize, err := getBody(uri, token, pubKeyBase64) + exists, err := handleExistingFile(outputPath) if err != nil { return err } - defer bodyStream.Close() + if exists { + return nil + } + + return writeBodyToDisk(result.Body, result.ContentLength, outputPath) +} - if err := os.MkdirAll(filepath.Dir(filePath), 0750); err != nil { +// writeBodyToDisk streams the encrypted response body to destPath, driving +// an mpb progress bar sized by totalSize (0 = indeterminate spinner). Writes +// to destPath + ".part" first and renames on success; cleans up the .part +// on failure. +func writeBodyToDisk(body io.Reader, totalSize int64, destPath string) error { + if err := os.MkdirAll(filepath.Dir(destPath), 0750); err != nil { return fmt.Errorf("failed to create directory: %w", err) } - outFile, err := os.Create(filePath + ".part") + outFile, err := os.Create(destPath + ".part") if err != nil { return fmt.Errorf("failed to create partial file: %w", err) } @@ -349,13 +330,13 @@ func downloadFile(uri, token, pubKeyBase64, filePath string) error { }() buf := make([]byte, 1024*1024) - bufReader := bufio.NewReaderSize(bodyStream, 1024*1024) + bufReader := bufio.NewReaderSize(body, 1024*1024) p := mpb.New( mpb.WithRefreshRate(150 * time.Millisecond), ) - fmt.Printf("Downloading file to %s\n", filePath) + fmt.Printf("Downloading file to %s\n", destPath) if totalSize > 0 { if err := downloadWithBar(p, outFile, bufReader, totalSize, buf); err != nil { @@ -374,7 +355,7 @@ func downloadFile(uri, token, pubKeyBase64, filePath string) error { return fmt.Errorf("failed to close partial file %s: %v", outFile.Name(), err) } - if err := os.Rename(outFile.Name(), filePath); err != nil { // #nosec G703 + if err := os.Rename(outFile.Name(), destPath); err != nil { // #nosec G703 return fmt.Errorf("failed to rename partial file %s: %v", outFile.Name(), err) } @@ -425,155 +406,6 @@ func handleExistingFile(filePath string) (bool, error) { return false, nil } -func getFileIDURL(ctx context.Context, client downloadclient.Client, baseURL, dataset, pubKeyBase64, filename string) (string, string, error) { - // Preserve legacy behavior: if baseURL is invalid, return "invalid base URL" - // without wrapping (TestFileIdUrl/InvalidUrl asserts on the bare string). - u, err := url.ParseRequestURI(baseURL) - if err != nil || u.Scheme == "" { - return "", "", errors.New("invalid base URL") - } - - // Forward the caller's pubkey on v1 so the Client-Public-Key header is - // emitted on /files listing — matches the original download.getFileIDURL → - // GetFilesInfo → getBody wire behavior. V2 ignores LegacyV1PubKey. - datasetFiles, err := client.ListFiles(ctx, dataset, downloadclient.ListFilesOptions{ - LegacyV1PubKey: pubKeyBase64, - }) - if err != nil { - return "", "", fmt.Errorf("failed to get files, reason: %v", err) - } - - var idx int - switch { - case strings.Contains(filename, "/"): - if !strings.HasSuffix(filename, ".c4gh") { - filename += ".c4gh" - } - idx = slices.IndexFunc( - datasetFiles, - func(f File) bool { return strings.Contains(f.FilePath, filename) }, - ) - default: - idx = slices.IndexFunc( - datasetFiles, - func(f File) bool { return strings.Contains(f.FileID, filename) }, - ) - } - - if idx == -1 { - return "", "", fmt.Errorf("File not found in dataset %s", filename) - } - - fileName := helpers.AnonymizeFilepath(datasetFiles[idx].FilePath) - fileURL := baseURL + "/s3/" + dataset + "/" + fileName - - return fileURL, fileName, nil -} - -// GetDatasets is retained for backward compatibility with list/ and -// download_test.go. Deprecated: new code should call downloadclient.Client -// via downloadclient.New(...). Removed in #677 when callers finish migrating. -func GetDatasets(baseURL, token, version string) ([]string, error) { - // URL-parse errors are returned unwrapped to preserve legacy behavior - // (TestListDatasetsNoUrl asserts on the bare "invalid base URL" string). - u, err := url.ParseRequestURI(baseURL) - if err != nil || u.Scheme == "" { - return nil, errors.New("invalid base URL") - } - - c := downloadclient.NewV1Client(downloadclient.Config{ - BaseURL: baseURL, - Token: token, - ClientVersion: version, - }, nil) - - datasets, err := c.ListDatasets(context.Background()) - if err != nil { - // Preserve pre-abstraction error shape: transport/status failures were - // wrapped as "failed to get datasets, reason: ..."; parse errors - // already carry their own "failed to parse ..." prefix from - // V1Client.ListDatasets, so pass those through untouched to avoid - // double-wrapping. - if strings.HasPrefix(err.Error(), "failed to parse") { - return nil, err - } - - return nil, fmt.Errorf("failed to get datasets, reason: %v", err) - } - - return datasets, nil -} - -// GetFilesInfo is retained for backward compatibility. Preserves v1's -// error-prefix behavior ("failed to get files, reason: ...") that -// existing tests like TestInvalidUrl rely on. Deprecated: call -// downloadclient.Client.ListFiles instead. Removed in #677. -func GetFilesInfo(baseURL, dataset, pubKeyBase64, token, version string) ([]File, error) { - // URL-parse errors are returned unwrapped to preserve legacy behavior - // (tests like TestFileIdUrl/InvalidUrl and TestListDatasetNoUrl rely on - // the bare "invalid base URL" string). - u, err := url.ParseRequestURI(baseURL) - if err != nil || u.Scheme == "" { - return nil, errors.New("invalid base URL") - } - - c := downloadclient.NewV1Client(downloadclient.Config{ - BaseURL: baseURL, - Token: token, - ClientVersion: version, - }, nil) - files, err := c.ListFiles(context.Background(), dataset, downloadclient.ListFilesOptions{ - LegacyV1PubKey: pubKeyBase64, - }) - if err != nil { - // Same shape discrimination as GetDatasets: parse errors from - // V1Client already carry "failed to parse ..." prefixes, so avoid - // double-wrapping by passing those through. Transport/status - // failures keep the legacy "failed to get files, reason: ..." - // wrapper that callers and TestInvalidUrl depend on. - if strings.HasPrefix(err.Error(), "failed to parse") { - return nil, err - } - - return nil, fmt.Errorf("failed to get files, reason: %v", err) - } - - return files, nil -} - -// getBody returns a stream of the response body and its size -func getBody(requestURL, token, pubKeyBase64 string) (io.ReadCloser, int64, error) { - req, err := http.NewRequest("GET", requestURL, nil) - if err != nil { - return nil, 0, fmt.Errorf("failed to create request, reason: %v", err) - } - - req.Header.Add("SDA-Client-Version", appVersion) - req.Header.Add("Authorization", "Bearer "+token) - req.Header.Add("Content-Type", "application/json") - if pubKeyBase64 != "" { - req.Header.Add("Client-Public-Key", pubKeyBase64) - } - - client := &http.Client{Jar: cookieJar} - res, err := client.Do(req) // #nosec G704 - if err != nil { - return nil, 0, fmt.Errorf("failed to get response, reason: %v", err) - } - - if res.StatusCode != http.StatusOK { - defer res.Body.Close() - resBody, _ := io.ReadAll(res.Body) - if res.StatusCode == http.StatusPreconditionFailed { - return nil, 0, errors.New(strings.TrimSpace(string(resBody))) - } - - return nil, 0, fmt.Errorf("server returned status %d", res.StatusCode) - } - - return res.Body, res.ContentLength, nil -} - func GetURLsFile(urlsFilePath string) (urlsList []string, err error) { urlsFile, err := os.Open(filepath.Clean(urlsFilePath)) if err != nil { @@ -592,36 +424,6 @@ func GetURLsFile(urlsFilePath string) (urlsList []string, err error) { return urlsList, scanner.Err() } -func setupCookieJar(u *url.URL) { - if cd, err := os.UserCacheDir(); err != nil { - fmt.Fprintln(os.Stderr, "cache dir not set, using current dir") - cookiePath, _ = filepath.Abs(".sda_cookie") - } else { - if err := os.MkdirAll(filepath.Join(cd, "sda-cli"), 0750); err != nil { - fmt.Fprintln(os.Stderr, "failed to create cache dir, using current dir") - cookiePath, _ = filepath.Abs(".sda_cookie") - } else { - cookiePath = filepath.Join(cd, "sda-cli/sda_cookie") - } - } - cookieJar = cookiejar.NewPersistentJar( - cookiejar.WithFilePath(cookiePath), - cookiejar.WithAutoSync(true), - cookiejar.WithPublicSuffixList(publicsuffix.List), - ) - if _, err := os.Stat(cookiePath); err == nil { - cookieString, err := os.ReadFile(cookiePath) - if err != nil { - fmt.Fprintf(os.Stderr, "failed to read cookie file: %s", err.Error()) - } - - var parsedCookies []*http.Cookie - if err := json.Unmarshal(cookieString, &parsedCookies); err == nil && len(parsedCookies) > 0 { - cookieJar.SetCookies(u, parsedCookies) - } - } -} - func downloadWithBar(p *mpb.Progress, outFile *os.File, reader io.Reader, totalSize int64, buf []byte) error { bar := p.AddBar(totalSize, mpb.PrependDecorators( diff --git a/download/download_test.go b/download/download_test.go index a9a7c69e..2e6de58b 100644 --- a/download/download_test.go +++ b/download/download_test.go @@ -6,15 +6,11 @@ import ( "crypto/rsa" "errors" "fmt" - "io" "net/http" "net/http/httptest" - "net/url" "os" "path" "path/filepath" - "runtime" - "strings" "testing" "time" @@ -127,9 +123,6 @@ encrypt = False s.FailNow("failed to write to config file", err) } - u, _ := url.Parse("http://localhost") - setupCookieJar(u) - s.testKeyFile = filepath.Join(s.tempDir, "testkey") err = createkey.GenerateKeyPair(s.testKeyFile, "test") assert.NoError(s.T(), err) @@ -147,16 +140,32 @@ func (s *DownloadTestSuite) TestInvalidUrl() { assert.Contains( s.T(), err.Error(), - "failed to get files, reason: failed to get response, reason: Get \"https://some/url/metadata/datasets/TES01/files\": dial tcp: lookup some", + "failed to get response, reason: Get \"https://some/url/metadata/datasets/TES01/files\": dial tcp: lookup some", ) } -func (s *DownloadTestSuite) TestDownload_APIVersionV2_RejectedUntilPR677() { - // v2 list commands are live as of #676, but download still uses the - // legacy /s3 transfer. Download() rejects --api-version v2 early so - // users don't silently half-succeed (v2 ListFiles OK) then hit a - // cryptic 404 on /s3/{fileID}. The real v2 download path arrives in - // #677. +func (s *DownloadTestSuite) TestDownload_APIVersionV2_MissingPubkey() { + // v2 requires --pubkey. Without one, V2Client.DownloadFile errors before + // any HTTP request. + oldDatasetID, oldURL, oldAPIVersion := datasetID, URL, apiVersionFlag + datasetID = "TES01" + URL = s.httpTestServer.URL + apiVersionFlag = "v2" + defer func() { + datasetID, URL, apiVersionFlag = oldDatasetID, oldURL, oldAPIVersion + }() + + err := Download([]string{"files/file1.c4gh"}, s.configFilePath, "test") + require.Error(s.T(), err) + assert.Contains(s.T(), err.Error(), "v2 downloads require --pubkey") +} + +func (s *DownloadTestSuite) TestDownload_APIVersionV2_HitsV2Endpoint() { + // v2 factory returns a real V2Client. For the single-file path, the + // flow is downloadOne -> V2Client.DownloadFile -> resolveFile -> + // ListFiles which issues GET /datasets/{id}/files. The mock server has + // no v2 handler, so decoding its default non-JSON body fails — proving + // v2 is wired up. oldDatasetID, oldURL, oldAPIVersion := datasetID, URL, apiVersionFlag datasetID = "TES01" URL = s.httpTestServer.URL @@ -165,9 +174,13 @@ func (s *DownloadTestSuite) TestDownload_APIVersionV2_RejectedUntilPR677() { datasetID, URL, apiVersionFlag = oldDatasetID, oldURL, oldAPIVersion }() + oldPubKey := pubKey + pubKey = fmt.Sprintf("%s.pub.pem", s.testKeyFile) + defer func() { pubKey = oldPubKey }() + err := Download([]string{"files/file1.c4gh"}, s.configFilePath, "test") require.Error(s.T(), err) - assert.Contains(s.T(), err.Error(), "v2 download is not yet implemented") + assert.Contains(s.T(), err.Error(), "failed to decode /datasets/TES01/files response") } func (s *DownloadTestSuite) TestDownloadOneFileWithPublicKey() { @@ -186,6 +199,32 @@ func (s *DownloadTestSuite) TestDownloadOneFileWithPublicKey() { assert.Equal(s.T(), "test content dummy file", string(downloadedContent)) } +// TestDownloadDefaultOutdir guards that the default --outdir "" writes to +// the current working directory. The previous prefix-based escape check +// rejected this case (filepath.Clean("") = "." and filepath.Join strips +// the leading "./", so strings.HasPrefix on the cleaned path was always +// false), meaning every download without --outdir failed AFTER the body +// stream was already opened on the server side. +func (s *DownloadTestSuite) TestDownloadDefaultOutdir() { + cwd, err := os.Getwd() + require.NoError(s.T(), err) + runDir := filepath.Join(s.tempDir, "defaultoutdir-run") + require.NoError(s.T(), os.MkdirAll(runDir, 0750)) + require.NoError(s.T(), os.Chdir(runDir)) + defer os.Chdir(cwd) //nolint:errcheck + + os.Args = []string{"", "download", "files/dummy-file.txt.c4gh"} + downloadCmd.Flag("pubkey").Value.Set(fmt.Sprintf("%s.pub.pem", s.testKeyFile)) + downloadCmd.Flag("url").Value.Set(s.httpTestServer.URL) + downloadCmd.Flag("outdir").Value.Set("") // explicit default + downloadCmd.Flag("dataset-id").Value.Set("TES01") + require.NoError(s.T(), downloadCmd.Execute()) + + downloaded, err := os.ReadFile(filepath.Join(runDir, "files", "dummy-file.txt.c4gh")) + require.NoError(s.T(), err) + assert.Equal(s.T(), "test content dummy file", string(downloaded)) +} + func (s *DownloadTestSuite) TestDownloadFileAlreadyExistsWithContinue() { if err := os.Mkdir(path.Join(s.tempDir, "files"), 0750); err != nil { s.FailNow("failed to create temporary directory", err) @@ -282,306 +321,39 @@ func generateDummyToken(t *testing.T) string { return accessToken } -func (s *DownloadTestSuite) TestGetFilesInfo() { - files, err := GetFilesInfo(s.httpTestServer.URL, "TES01", "", s.accessToken, "test-version") - require.NoError(s.T(), err) - require.Len(s.T(), files, 3) - assert.Equal(s.T(), "file1id", files[0].FileID) - assert.Equal(s.T(), "file1.c4gh", files[0].DisplayFileName) - assert.Equal(s.T(), "files/file1.c4gh", files[0].FilePath) - assert.Equal(s.T(), "file2id", files[1].FileID) - assert.Equal(s.T(), "file2.c4gh", files[1].DisplayFileName) - assert.Equal(s.T(), "files/file2.c4gh", files[1].FilePath) - assert.Equal(s.T(), "dummyFile", files[2].FileID) - assert.Equal(s.T(), "dummy-file.txt.c4gh", files[2].DisplayFileName) - assert.Equal(s.T(), "files/dummy-file.txt.c4gh", files[2].FilePath) -} - -func (s *DownloadTestSuite) TestFileIdUrl() { - for _, test := range []struct { - testName, baseURL, datasetID, filePath string - expectedURL string - expectedError error - }{ - { - testName: "ValidInputNoPubKey", - baseURL: s.httpTestServer.URL, - datasetID: "TES01", - filePath: "files/file1", - expectedURL: fmt.Sprintf("%s/s3/TES01/files/file1.c4gh", s.httpTestServer.URL), - expectedError: nil, - }, { - testName: "UnknownFilePath", - baseURL: s.httpTestServer.URL, - datasetID: "TES01", - filePath: "files/unknown", - expectedURL: "", - expectedError: errors.New("File not found in dataset files/unknown.c4gh"), - }, { - testName: "FileIdInFilePath", - baseURL: s.httpTestServer.URL, - datasetID: "TES01", - filePath: "file1id", - expectedURL: fmt.Sprintf("%s/s3/TES01/files/file1.c4gh", s.httpTestServer.URL), - expectedError: nil, - }, { - testName: "InvalidUrl", - baseURL: "some/url", - datasetID: "TES01", - filePath: "file1id", - expectedURL: "", - expectedError: errors.New("invalid base URL"), - }, - } { - s.T().Run(test.testName, func(t *testing.T) { - client := downloadclient.NewV1Client(downloadclient.Config{ - BaseURL: test.baseURL, - Token: s.accessToken, - ClientVersion: "test", - }, nil) - client.SetHTTPClientForTest(s.httpTestServer.Client()) - url, _, err := getFileIDURL(context.Background(), client, test.baseURL, test.datasetID, "", test.filePath) - assert.Equal(t, test.expectedError, err) - assert.Equal(t, test.expectedURL, url) - }) - } -} - -func (s *DownloadTestSuite) TestGetDatasets() { - datasets, err := GetDatasets(s.httpTestServer.URL, s.accessToken, "test-version") - require.NoError(s.T(), err) - assert.Equal(s.T(), datasets, []string{"https://doi.example/ty009.sfrrss/600.45asasga"}) -} - -// Guards the pre-abstraction error shape: transport/status failures from -// GetDatasets were wrapped as "failed to get datasets, reason: ...". -// This test ensures the shim still preserves that prefix after the -// downloadclient refactor. -func (s *DownloadTestSuite) TestGetDatasets_WrapsTransportError() { - ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - w.WriteHeader(http.StatusForbidden) - })) - defer ts.Close() - - _, err := GetDatasets(ts.URL, s.accessToken, "test-version") - require.Error(s.T(), err) - assert.Contains(s.T(), err.Error(), "failed to get datasets, reason:") - assert.Contains(s.T(), err.Error(), "status 403") -} - -// Same parity guard for GetFilesInfo: pre-abstraction, transport/status -// failures got a "failed to get files, reason: ..." prefix while parse -// errors kept their own "failed to parse file list JSON, reason: ..." -// shape. The shim must not double-wrap parse errors. -func (s *DownloadTestSuite) TestGetFilesInfo_WrapsTransportError() { - ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - w.WriteHeader(http.StatusForbidden) - })) - defer ts.Close() - - _, err := GetFilesInfo(ts.URL, "TES01", "", s.accessToken, "test-version") - require.Error(s.T(), err) - assert.Contains(s.T(), err.Error(), "failed to get files, reason:") - assert.Contains(s.T(), err.Error(), "status 403") -} - -func (s *DownloadTestSuite) TestGetFilesInfo_PassesParseErrorThrough() { - ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - fmt.Fprint(w, "not json") - })) - defer ts.Close() - - _, err := GetFilesInfo(ts.URL, "TES01", "", s.accessToken, "test-version") - require.Error(s.T(), err) - assert.Contains(s.T(), err.Error(), "failed to parse file list") - assert.NotContains(s.T(), err.Error(), "failed to get files, reason: failed to parse", - "parse errors must not be double-wrapped") +// errReader returns some data then an error on the next Read, so +// writeBodyToDisk triggers its cleanup defer. +type errReader struct { + data []byte + err error + sent bool } -func (s *DownloadTestSuite) TestGetBodyNoPublicKey() { - bodyStream, size, err := getBody(s.httpTestServer.URL, "test-token", "") - if err != nil { - s.T().Errorf("getBody returned an error: %v", err) +func (r *errReader) Read(p []byte) (int, error) { + if !r.sent { + r.sent = true + n := copy(p, r.data) - return // Exit early if there's an error to avoid nil pointer panics below + return n, nil } - defer bodyStream.Close() - - body, err := io.ReadAll(bodyStream) - if err != nil { - s.T().Errorf("failed to read from bodyStream: %v", err) - } - - expectedBody := "test response" - if string(body) != expectedBody { - s.T().Errorf("getBody returned incorrect response body, got: %s, want: %s", string(body), expectedBody) - } - - if size != int64(len(expectedBody)) && size != -1 { - s.T().Logf("Note: size returned (%d) does not match expected length (%d)", size, len(expectedBody)) - } + return 0, r.err } -func (s *DownloadTestSuite) TestGetBodyWithPublicKey() { - bodyStream, _, err := getBody(s.httpTestServer.URL, "test-token", "test-public-key") +func (s *DownloadTestSuite) TestWriteBodyToDiskCleanupOnFailure() { + destPath := filepath.Join(s.tempDir, "cleanup-test.c4gh") - if err != nil { - s.T().Fatalf("getBody returned an error: %v", err) - } - - defer bodyStream.Close() - - body, err := io.ReadAll(bodyStream) - if err != nil { - s.T().Fatalf("failed to read from bodyStream: %v", err) - } - - expectedBody := "test response" - if string(body) != expectedBody { - s.T().Errorf("getBody returned incorrect response body, got: %s, want: %s", string(body), expectedBody) - } -} - -func (s *DownloadTestSuite) TestGetBodyPreconditionFailed() { - // Test the specific 412 logic where the body becomes the error message - errorMessage := "error message with precondition failed" - errorServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.WriteHeader(http.StatusPreconditionFailed) - fmt.Fprint(w, errorMessage) - })) - defer errorServer.Close() + reader := &errReader{data: []byte("partial"), err: errors.New("mid-stream failure")} + err := writeBodyToDisk(reader, int64(len("partial")+10), destPath) + assert.Error(s.T(), err, "expected writeBodyToDisk to return an error when body errors mid-stream") - bodyStream, size, err := getBody(errorServer.URL, "test-token", "") + // Final target should not exist + _, err = os.Stat(destPath) + assert.True(s.T(), os.IsNotExist(err), "the final target file should not exist after a failed download") - assert.Nil(s.T(), bodyStream) // On error, the stream should be nil - assert.Equal(s.T(), int64(0), size) - assert.Error(s.T(), err) - assert.Equal(s.T(), errorMessage, err.Error()) -} - -func (s *DownloadTestSuite) TestSetupCookiejar() { - var testCookie string - switch runtime.GOOS { - case "windows": - testCookie = filepath.Join(s.tempDir, "sda-cli", "sda_cookie") - case "darwin": // macOS - testCookie = filepath.Join(s.tempDir, "Library", "Caches", "sda-cli", "sda_cookie") - default: // Linux and others - testCookie = filepath.Join(s.tempDir, ".cache", "sda-cli", "sda_cookie") - } - pwdCookie, _ := filepath.Abs(".sda_cookie") - for _, test := range []struct { - cachePath string - cookiePath string - cookieString string - createCookie bool - expectedError error - testName string - }{ - { - cachePath: s.tempDir, - cookiePath: testCookie, - cookieString: "", - createCookie: false, - testName: "cookie_file_doesn't_exist", - }, - { - cachePath: "", - cookiePath: pwdCookie, - cookieString: "[{\"Name\":\"test-cookie\", \"Value\":\"current_dir_cookie\"}]", - createCookie: true, - testName: "current_dir_cookie", - }, - { - cachePath: s.tempDir, - cookiePath: testCookie, - cookieString: "[{\"Name\":\"test-cookie\", \"Value\":\"cache_path_cookie\"}]", - createCookie: true, - testName: "cache_path_cookie", - }, - { - cachePath: s.tempDir, - cookiePath: testCookie, - cookieString: "[{\"Name\":\"test-cookie\", \"Value\":\"test-data\",\"Domain\":\"example.org\"}]", - createCookie: true, - testName: "wrong_domain", - }, - { - cachePath: s.tempDir, - cookiePath: testCookie, - cookieString: "[{\"Name\":\"test-cookie\", \"Value\":\"test-data\",\"Expires\":\"2001-01-01T00:00:00Z\"}]", - createCookie: true, - testName: "expired", - }, - { - cachePath: s.tempDir, - cookiePath: testCookie, - cookieString: fmt.Sprintf("[{\"Name\":\"test-cookie\", \"Value\":\"not_expired_cookie\",\"Expires\":\"%s\",\"MaxAge\":0}]", time.Now().AddDate(1, 0, 0).Format(time.RFC3339)), - createCookie: true, - testName: "not_expired_cookie", - }, - { - cachePath: s.tempDir, - cookiePath: testCookie, - cookieString: "[{\"Name\":\"test-cookie\", \"Value\":\"max-age_cookie\",\"Expires\":\"0001-01-01T00:00:00Z\",\"MaxAge\":300}]", - createCookie: true, - testName: "max-age_cookie", - }, - } { - s.T().Run(test.testName, func(t *testing.T) { - if runtime.GOOS == "windows" { - os.Setenv("LocalAppData", test.cachePath) - } else { - os.Setenv("HOME", test.cachePath) - } - if test.createCookie { - cookieFile, _ := filepath.Abs(test.cookiePath) - if err := os.WriteFile(cookieFile, []byte(test.cookieString), 0600); err != nil { - fmt.Fprintln(os.Stderr, "failed to save cookie file ", err.Error()) - } - } - - u, _ := url.Parse(s.httpTestServer.URL) - setupCookieJar(u) - assert.Equal(t, test.cookiePath, cookiePath) - cj := cookieJar.Cookies(u) - - if strings.HasSuffix(test.testName, "cookie") { - assert.Equal(t, "test-cookie", cj[0].Name) - assert.Equal(t, test.testName, cj[0].Value) - assert.Equal(t, "0001-01-01T00:00:00Z", cj[0].Expires.Format(time.RFC3339)) - _ = os.Remove(cookiePath) - } else { - assert.Nil(t, cj) - } - }) - } -} - -func (s *DownloadTestSuite) TestDownloadCleanupOnFailure() { - failServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.WriteHeader(http.StatusInternalServerError) - fmt.Fprint(w, "server error") - })) - defer failServer.Close() - - targetFile := "cleanup-test.c4gh" - fullPath := filepath.Join(s.tempDir, targetFile) - - downloadCmd.Flag("ignore-existing").Value.Set("false") - outDir = s.tempDir - - err := downloadFile(failServer.URL, s.accessToken, "", targetFile) - assert.Error(s.T(), err, "Expected downloadFile to return an error on 500 response") - - // Check that the .part file was cleaned up - _, err = os.Stat(fullPath + ".part") - assert.True(s.T(), os.IsNotExist(err), "The .part file should have been removed by the defer cleanup block") - - // Check that the final target file was not created - _, err = os.Stat(fullPath) - assert.True(s.T(), os.IsNotExist(err), "The final target file should not exist after a failed download") + // .part should have been cleaned up + _, err = os.Stat(destPath + ".part") + assert.True(s.T(), os.IsNotExist(err), "the .part file should have been removed by the defer cleanup block") } func (s *DownloadTestSuite) TestDownloadCleanupPartialFileWhenFullExists() { @@ -639,8 +411,25 @@ func (s *DownloadTestSuite) TestDownloadConflictingFlags() { s.Contains(err.Error(), "both --ignore-existing and --overwrite-existing flags are set, choose one of them") } +// downloadOneWithClient is a small helper for TestDownloadPromptOverwrite. +// It lets the test drive downloadOne directly with a V1Client pointed at +// the suite's test server, so we can exercise the overwrite-prompt logic +// without going through downloadCmd.Execute (which has its own flag +// bookkeeping that's less ergonomic to reset between four sub-cases). +func (s *DownloadTestSuite) downloadOneWithClient(userArg string) error { + client := downloadclient.NewV1Client(downloadclient.Config{ + BaseURL: s.httpTestServer.URL, + Token: s.accessToken, + ClientVersion: "test", + }, nil) + client.SetHTTPClientForTest(s.httpTestServer.Client()) + + return downloadOne(context.Background(), client, userArg) +} + func (s *DownloadTestSuite) TestDownloadPromptOverwrite() { - targetFile := "prompt-test.c4gh" + targetFile := "files/dummy-file.txt.c4gh" + expectedContent := "test content dummy file" fullPath := filepath.Join(s.tempDir, targetFile) originalStdin := os.Stdin defer func() { os.Stdin = originalStdin }() @@ -651,8 +440,15 @@ func (s *DownloadTestSuite) TestDownloadPromptOverwrite() { overwriteExisting = false } + oldDatasetID := datasetID + datasetID = "TES01" + defer func() { datasetID = oldDatasetID }() + outDir = s.tempDir + // Ensure parent dir exists so we can pre-populate the target + s.Require().NoError(os.MkdirAll(filepath.Dir(fullPath), 0750)) + // Test YES resetFlags() err := os.WriteFile(fullPath, []byte("old content"), 0600) @@ -664,13 +460,13 @@ func (s *DownloadTestSuite) TestDownloadPromptOverwrite() { _, _ = w.Write([]byte("y\n")) _ = w.Close() }() - err = downloadFile(s.httpTestServer.URL, s.accessToken, "", targetFile) + err = s.downloadOneWithClient(targetFile) s.NoError(err) // Verify content is overwritten content, err := os.ReadFile(fullPath) s.NoError(err) - s.Equal("test response", string(content)) + s.Equal(expectedContent, string(content)) // Test NO resetFlags() @@ -684,7 +480,7 @@ func (s *DownloadTestSuite) TestDownloadPromptOverwrite() { _ = w2.Close() }() - err = downloadFile(s.httpTestServer.URL, s.accessToken, "", targetFile) + err = s.downloadOneWithClient(targetFile) s.NoError(err) // Verify content is NOT overwritten @@ -704,23 +500,23 @@ func (s *DownloadTestSuite) TestDownloadPromptOverwrite() { _ = w3.Close() }() - err = downloadFile(s.httpTestServer.URL, s.accessToken, "", targetFile) + err = s.downloadOneWithClient(targetFile) s.NoError(err) // Verify content is overwritten s.True(overwriteExisting) content, err = os.ReadFile(fullPath) s.NoError(err) - s.Equal("test response", string(content)) + s.Equal(expectedContent, string(content)) // Subsequent download (overwrite without prompting) err = os.WriteFile(fullPath, []byte("second old content"), 0600) s.Require().NoError(err) - err = downloadFile(s.httpTestServer.URL, s.accessToken, "", targetFile) + err = s.downloadOneWithClient(targetFile) s.NoError(err) content, err = os.ReadFile(fullPath) s.NoError(err) - s.Equal("test response", string(content)) + s.Equal(expectedContent, string(content)) // Test NEVER resetFlags() @@ -734,7 +530,7 @@ func (s *DownloadTestSuite) TestDownloadPromptOverwrite() { _ = w4.Close() }() - err = downloadFile(s.httpTestServer.URL, s.accessToken, "", targetFile) + err = s.downloadOneWithClient(targetFile) s.NoError(err) // Verify content is NOT overwritten @@ -745,7 +541,7 @@ func (s *DownloadTestSuite) TestDownloadPromptOverwrite() { // Subsequent download (skip overwrite without prompting) if !ignoreExisting { - err = downloadFile(s.httpTestServer.URL, s.accessToken, "", targetFile) + err = s.downloadOneWithClient(targetFile) s.NoError(err) } content, err = os.ReadFile(fullPath) diff --git a/download/integration_v2_test.go b/download/integration_v2_test.go index 09d3e139..57b90311 100644 --- a/download/integration_v2_test.go +++ b/download/integration_v2_test.go @@ -4,6 +4,7 @@ package download_test import ( "context" + "io" "os" "testing" @@ -79,3 +80,30 @@ func TestV2_DatasetInfo_Smoke(t *testing.T) { assert.Equal(t, "EGAD00000000001", info.DatasetID) assert.Greater(t, info.FileCount, 0) } + +// TestV2_DownloadFile_EndToEnd exercises the full v2 download path against +// the dev stack: resolve test-file.c4gh via the exact filePath filter, +// follow the server-provided downloadUrl, stream the encrypted bytes. +// Requires DOWNLOAD_V2_PUBKEY_B64 (CI extracts it from the reencrypt +// container's /shared/c4gh.pub.pem). +func TestV2_DownloadFile_EndToEnd(t *testing.T) { + client := buildIntegrationClient(t) + + pubKeyBase64 := os.Getenv("DOWNLOAD_V2_PUBKEY_B64") + require.NotEmpty(t, pubKeyBase64, "DOWNLOAD_V2_PUBKEY_B64 must be set") + + result, err := client.DownloadFile(context.Background(), downloadclient.DownloadRequest{ + DatasetID: "EGAD00000000001", + UserArg: "test-file.c4gh", + PublicKeyBase64: pubKeyBase64, + }) + require.NoError(t, err) + defer result.Body.Close() + + got, err := io.ReadAll(result.Body) + require.NoError(t, err) + assert.Greater(t, len(got), 0) + if result.ContentLength > 0 { + assert.Equal(t, result.ContentLength, int64(len(got)), "body size should match Content-Length") + } +} diff --git a/downloadclient/downloadclient.go b/downloadclient/downloadclient.go index 72ed6cdd..767ba555 100644 --- a/downloadclient/downloadclient.go +++ b/downloadclient/downloadclient.go @@ -4,43 +4,54 @@ import ( "context" "errors" "fmt" + "io" "net/url" - - "go.nhat.io/cookiejar" ) // Config holds per-call configuration. type Config struct { BaseURL string // e.g. "https://download.example.org" Token string // bearer token (raw, no "Bearer " prefix) - ClientVersion string // sda-cli version (e.g. v1.2.3); SDA-Client-Version on v1, User-Agent "sda-cli/" on v2 + ClientVersion string // sda-cli version (e.g. v1.2.3); sent as both SDA-Client-Version and User-Agent on v1 and v2 +} + +// DownloadRequest describes one file to fetch. +type DownloadRequest struct { + // DatasetID — always required. + DatasetID string + // UserArg is the raw positional arg from the CLI: either a file path + // (may contain "/" or end in ".c4gh") or a fileId. Implementations + // disambiguate internally. + UserArg string + // PublicKeyBase64 is the recipient public key (v2 preferred: base64 + // of raw 32-byte X25519 key; v2 legacy: base64 of full PEM text; v1 + // uses whatever download.helpers.GetPublicKey64 produced). + PublicKeyBase64 string } -// Client is the SDA download API abstraction for list-family operations. -// The DownloadFile method joins this interface in #677 alongside v2 -// download implementation. +// DownloadResult bundles the three things a caller needs after a successful +// DownloadFile: the canonical File metadata (authoritative filename), a +// ReadCloser streaming the Crypt4GH-encrypted bytes, and the server's +// Content-Length (0 when absent). Callers must close Body. +type DownloadResult struct { + File File + Body io.ReadCloser + ContentLength int64 +} + +// Client is the SDA download API abstraction for list-family operations +// and file download. type Client interface { ListDatasets(ctx context.Context) ([]string, error) ListFiles(ctx context.Context, datasetID string, opts ListFilesOptions) ([]File, error) DatasetInfo(ctx context.Context, datasetID string) (DatasetInfo, error) -} -// Option customises the Client returned by New. Options only affect the -// versions they apply to; unknown options are silently ignored by -// versions that do not honour them (WithV1CookieJar is v1-only). -type Option func(*clientOpts) - -type clientOpts struct { - v1CookieJar *cookiejar.PersistentJar -} - -// WithV1CookieJar hands V1Client an externally-managed persistent cookie -// jar instead of letting it lazy-init its own. Required when the caller -// (e.g. download/download.go) also runs the legacy downloadFile path so -// metadata listing and /s3 transfer share the same in-memory jar and -// avoid clobbering each other via AutoSync to the shared on-disk file. -func WithV1CookieJar(jar *cookiejar.PersistentJar) Option { - return func(o *clientOpts) { o.v1CookieJar = jar } + // DownloadFile resolves req.UserArg against the dataset and returns a + // DownloadResult. The returned File is the authoritative name to use + // for the on-disk output — callers must not derive the output path + // from req.UserArg, because UserArg may be a fileId with no + // relationship to the filename. + DownloadFile(ctx context.Context, req DownloadRequest) (DownloadResult, error) } // ValidateVersion returns the same error shape as New for a given @@ -58,11 +69,10 @@ func ValidateVersion(apiVersion string) error { } // New returns a Client for the requested apiVersion. "v1" returns a V1Client; -// "v2" returns a V2Client (minimal, some methods are stubs until later PRs -// of issue #663, see V2Client doc). Returns an error if apiVersion is -// unsupported, BaseURL is empty or unparseable, or Token is empty. -// ClientVersion is optional (header only) and not validated. -func New(cfg Config, apiVersion string, opts ...Option) (Client, error) { +// "v2" returns a V2Client. Returns an error if apiVersion is unsupported, +// BaseURL is empty or unparseable, or Token is empty. ClientVersion is +// optional (header only) and not validated. +func New(cfg Config, apiVersion string) (Client, error) { if err := ValidateVersion(apiVersion); err != nil { return nil, err } @@ -70,14 +80,9 @@ func New(cfg Config, apiVersion string, opts ...Option) (Client, error) { return nil, err } - var o clientOpts - for _, opt := range opts { - opt(&o) - } - switch apiVersion { case "v1": - return NewV1Client(cfg, o.v1CookieJar), nil + return NewV1Client(cfg, nil), nil case "v2": return NewV2Client(cfg), nil default: diff --git a/downloadclient/types.go b/downloadclient/types.go index fcae0e1a..74fc6388 100644 --- a/downloadclient/types.go +++ b/downloadclient/types.go @@ -2,17 +2,15 @@ // and shared types. v1 and v2 implementations live in v1.go and v2.go. package downloadclient -// File is the shared metadata type for a dataset file. Fields match v1's -// wire shape. v2 has additional fields (checksums array, downloadUrl) -// that are handled inside V2Client's conversion layer and surfaced -// through this same type where applicable (see #675/#677). +// File is the shared metadata type for a dataset file. type File struct { - FileID string `json:"fileId"` - DisplayFileName string `json:"displayFileName"` - FilePath string `json:"filePath"` - DecryptedFileSize int64 `json:"decryptedFileSize"` - DecryptedFileChecksum string `json:"decryptedFileChecksum"` - DecryptedFileChecksumType string `json:"decryptedFileChecksumType"` + FileID string `json:"fileId"` + DisplayFileName string `json:"displayFileName"` + FilePath string `json:"filePath"` + DecryptedFileSize int64 `json:"decryptedFileSize"` + // downloadURL is v2-only (server-provided relative URL). + // Empty on v1 File values. + downloadURL string } // Checksum is the v2 checksum element. Included for completeness; v1 diff --git a/downloadclient/v1.go b/downloadclient/v1.go index 50988bf9..c42690c0 100644 --- a/downloadclient/v1.go +++ b/downloadclient/v1.go @@ -12,6 +12,7 @@ import ( "path/filepath" "strings" + "github.com/NBISweden/sda-cli/helpers" "go.nhat.io/cookiejar" "golang.org/x/net/publicsuffix" ) @@ -154,6 +155,97 @@ func (c *V1Client) DatasetInfo(_ context.Context, _ string) (DatasetInfo, error) return DatasetInfo{}, ErrNotSupportedOnV1 } +// DownloadFile implements Client. Resolves req.UserArg (either a file +// path or a fileId) via ListFiles + legacy substring match, then GETs +// /s3/{dataset}/{filePath}. Returns the resolved File so callers can +// derive the output filename from canonical metadata rather than from +// UserArg (which may be a bare fileId). Caller is responsible for +// closing the returned body. +func (c *V1Client) DownloadFile(ctx context.Context, req DownloadRequest) (DownloadResult, error) { + // Forward the caller-supplied pubkey so the metadata GET emits + // Client-Public-Key — mirrors legacy getFileIDURL → GetFilesInfo. + // Wrap list-resolution failures with the legacy "failed to get + // files, reason: ..." prefix that scripts and the download.go shim + // have relied on since before the downloadclient abstraction. + files, err := c.ListFiles(ctx, req.DatasetID, ListFilesOptions{ + LegacyV1PubKey: req.PublicKeyBase64, + }) + if err != nil { + return DownloadResult{}, fmt.Errorf("failed to get files, reason: %v", err) + } + target, err := v1MatchFile(files, req.UserArg) + if err != nil { + return DownloadResult{}, err + } + + // The v1 /s3 server expects the user-prefix (e.g. "user_example.com/…") + // already stripped — legacy download.getFileIDURL ran AnonymizeFilepath + // on target.FilePath before building the URL. Preserve that behavior so + // datasets whose files carry a user prefix don't 404 on v1 download. + reqURL := c.cfg.BaseURL + "/s3/" + url.PathEscape(req.DatasetID) + "/" + helpers.AnonymizeFilepath(target.FilePath) + httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, nil) + if err != nil { + return DownloadResult{}, err + } + httpReq.Header.Set("Authorization", "Bearer "+c.cfg.Token) + // SDA-Client-Version is emitted on every v1 call by legacy getBody. + if c.cfg.ClientVersion != "" { + httpReq.Header.Set("SDA-Client-Version", c.cfg.ClientVersion) + } + if req.PublicKeyBase64 != "" { + httpReq.Header.Set("Client-Public-Key", req.PublicKeyBase64) + } + + resp, err := c.http.Do(httpReq) // #nosec G704 + if err != nil { + return DownloadResult{}, err + } + // Partial-Content without a Range request is a server bug; treat as a + // non-success to avoid renaming a truncated .part as a complete file. + if resp.StatusCode != http.StatusOK { + b, _ := io.ReadAll(io.LimitReader(resp.Body, 201)) + _ = resp.Body.Close() + // Legacy getBody surfaces 412 bodies verbatim — some sda-download + // paths return actionable messages (e.g. token expired) in the body. + if resp.StatusCode == http.StatusPreconditionFailed { + return DownloadResult{}, errors.New(strings.TrimSpace(string(b))) + } + body := string(b) + if len(body) > 200 { + body = body[:200] + } + + return DownloadResult{}, fmt.Errorf("server returned status %d: %s", resp.StatusCode, body) + } + + return DownloadResult{File: target, Body: resp.Body, ContentLength: resp.ContentLength}, nil +} + +// v1MatchFile mirrors the substring-match logic from the retired +// download.getFileIDURL. Known to be imprecise; v2 uses an exact +// filePath filter instead. Semantics are unchanged from the legacy +// code — we just house it here now. +func v1MatchFile(files []File, userArg string) (File, error) { + if strings.Contains(userArg, "/") { + if !strings.HasSuffix(userArg, ".c4gh") { + userArg += ".c4gh" + } + for _, f := range files { + if strings.Contains(f.FilePath, userArg) { + return f, nil + } + } + } else { + for _, f := range files { + if strings.Contains(f.FileID, userArg) { + return f, nil + } + } + } + + return File{}, fmt.Errorf("file not found in dataset: %s", userArg) +} + // getBody is the v1 HTTP helper. Headers, error shape, and 412 handling // match the legacy download.getBody (download/download.go:483-514) verbatim. // diff --git a/downloadclient/v1_test.go b/downloadclient/v1_test.go index 920d7e89..553cfc0a 100644 --- a/downloadclient/v1_test.go +++ b/downloadclient/v1_test.go @@ -3,6 +3,7 @@ package downloadclient import ( "context" "fmt" + "io" "net/http" "net/http/httptest" "testing" @@ -146,3 +147,108 @@ func TestV1Client_DatasetInfo_NotSupported(t *testing.T) { _, err := c.DatasetInfo(context.Background(), "TES01") require.ErrorIs(t, err, ErrNotSupportedOnV1) } + +func TestV1Client_DownloadFile_ByPath(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/metadata/datasets/TES01/files": + fmt.Fprint(w, `[{"fileId":"f1","displayFileName":"a.c4gh","filePath":"dir/a.c4gh"}]`) + case "/s3/TES01/dir/a.c4gh": + assert.Equal(t, "key-b64", r.Header.Get("Client-Public-Key")) + w.Header().Set("Content-Length", "11") + fmt.Fprint(w, "encrypted..") + default: + t.Fatalf("unexpected path %q", r.URL.Path) + } + })) + defer ts.Close() + + c := NewV1Client(Config{BaseURL: ts.URL, Token: "t"}, nil) + c.SetHTTPClientForTest(ts.Client()) + + result, err := c.DownloadFile(context.Background(), DownloadRequest{ + DatasetID: "TES01", + UserArg: "dir/a.c4gh", + PublicKeyBase64: "key-b64", + }) + require.NoError(t, err) + defer result.Body.Close() + assert.Equal(t, int64(11), result.ContentLength) + b, _ := io.ReadAll(result.Body) + assert.Equal(t, "encrypted..", string(b)) +} + +func TestV1Client_DownloadFile_ByID(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/metadata/datasets/TES01/files": + fmt.Fprint(w, `[{"fileId":"f-xyz","displayFileName":"a.c4gh","filePath":"a.c4gh"}]`) + case "/s3/TES01/a.c4gh": + w.Header().Set("Content-Length", "5") + fmt.Fprint(w, "hello") + default: + t.Fatalf("unexpected path %q", r.URL.Path) + } + })) + defer ts.Close() + + c := NewV1Client(Config{BaseURL: ts.URL, Token: "t"}, nil) + c.SetHTTPClientForTest(ts.Client()) + + result, err := c.DownloadFile(context.Background(), DownloadRequest{ + DatasetID: "TES01", UserArg: "f-xyz", PublicKeyBase64: "key", + }) + require.NoError(t, err) + defer result.Body.Close() +} + +// TestV1Client_DownloadFile_StripsUserPrefix guards the v1 /s3 URL against +// a user-prefixed FilePath: the retired download.getFileIDURL ran +// AnonymizeFilepath before URL construction, and the v1 server expects the +// prefix already stripped. Regression here would 404 every v1 download for +// users whose dataset files carry a "user_/..." prefix. +func TestV1Client_DownloadFile_StripsUserPrefix(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/metadata/datasets/TES01/files": + fmt.Fprint(w, `[{"fileId":"f1","displayFileName":"a.c4gh","filePath":"user_example.com/files/a.c4gh"}]`) + case "/s3/TES01/files/a.c4gh": + fmt.Fprint(w, "ok") + default: + t.Fatalf("unexpected path %q — user prefix should have been stripped", r.URL.Path) + } + })) + defer ts.Close() + + c := NewV1Client(Config{BaseURL: ts.URL, Token: "t"}, nil) + c.SetHTTPClientForTest(ts.Client()) + + result, err := c.DownloadFile(context.Background(), DownloadRequest{ + DatasetID: "TES01", UserArg: "files/a.c4gh", + }) + require.NoError(t, err) + defer result.Body.Close() +} + +// TestV1Client_DownloadFile_WrapsListFailure guards the legacy +// "failed to get files, reason: ..." error prefix on list-resolution +// failures inside DownloadFile. Scripts and the download.go shim have +// asserted on this prefix since before the downloadclient abstraction; without +// the wrap, callers see the bare transport error and string-matching +// breaks. +func TestV1Client_DownloadFile_WrapsListFailure(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusInternalServerError) + fmt.Fprint(w, "boom") + })) + defer ts.Close() + + c := NewV1Client(Config{BaseURL: ts.URL, Token: "t"}, nil) + c.SetHTTPClientForTest(ts.Client()) + + _, err := c.DownloadFile(context.Background(), DownloadRequest{ + DatasetID: "TES01", UserArg: "anything", PublicKeyBase64: "k", + }) + require.Error(t, err) + assert.Contains(t, err.Error(), "failed to get files, reason:") +} diff --git a/downloadclient/v2.go b/downloadclient/v2.go index fa23294c..6ffb0d3b 100644 --- a/downloadclient/v2.go +++ b/downloadclient/v2.go @@ -8,6 +8,7 @@ import ( "io" "net/http" "net/url" + "strings" ) // maxErrorBodyBytes is the maximum number of bytes from a server error @@ -18,8 +19,6 @@ const maxErrorBodyBytes = 200 // V2Client talks to the v2 SDA download API // (GET /datasets, /datasets/{id}/files, /files/{id}, etc.). -// Methods fill in across #675, #676, #677. Until then, unimplemented -// methods return a clear "not implemented until #N" error. type V2Client struct { cfg Config http *http.Client @@ -113,6 +112,119 @@ func (c *V2Client) ListFiles(ctx context.Context, datasetID string, opts ListFil }) } +// DownloadFile implements Client. Resolves req.UserArg (either a file path +// or a fileId) via resolveFile, then follows the server-provided downloadURL +// with the X-C4GH-Public-Key header. Returns the resolved File so callers +// can use its canonical FilePath for the on-disk name (a userArg that was +// a bare fileId is not usable as a filename). 403 responses (from either +// the list resolution step or the download GET) are flattened to an +// ambiguous "does not exist or access denied" error to preserve the +// server's existence-leakage contract. Caller is responsible for closing +// the body. +func (c *V2Client) DownloadFile(ctx context.Context, req DownloadRequest) (DownloadResult, error) { + if req.PublicKeyBase64 == "" { + return DownloadResult{}, errors.New("v2 downloads require --pubkey (X-C4GH-Public-Key header)") + } + + target, err := c.resolveFile(ctx, req.DatasetID, req.UserArg) + if err != nil { + // Flatten 403 from the list-resolution step to preserve the server's + // existence-leakage contract — a forbidden dataset/file must look + // identical to a missing one. getJSON formats non-2xx errors as + // "server returned status N: ...", so substring-match on "status 403" + // until #678 replaces the error surface with typed Problem Details. + if strings.Contains(err.Error(), "status 403") { + return DownloadResult{}, fmt.Errorf("dataset/file does not exist or access denied: %s", req.UserArg) + } + + return DownloadResult{}, err + } + if target.FileID == "" { + return DownloadResult{}, fmt.Errorf("dataset/file does not exist or access denied: %s", req.UserArg) + } + if target.downloadURL == "" { + return DownloadResult{}, fmt.Errorf("server returned empty downloadUrl for %s", req.UserArg) + } + + // Resolve the server-provided downloadURL against BaseURL. + // The swagger specifies downloadUrl as a relative path (e.g. "/files/f1"), + // but we use ResolveReference rather than string concat to handle + // trailing/leading-slash mismatches cleanly. + base, err := url.Parse(c.cfg.BaseURL) + if err != nil { + return DownloadResult{}, fmt.Errorf("invalid base URL %q: %w", c.cfg.BaseURL, err) + } + ref, err := url.Parse(target.downloadURL) + if err != nil { + return DownloadResult{}, fmt.Errorf("server returned invalid downloadUrl %q: %w", target.downloadURL, err) + } + resolved := base.ResolveReference(ref) + + httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, resolved.String(), nil) + if err != nil { + return DownloadResult{}, err + } + httpReq.Header.Set("Authorization", "Bearer "+c.cfg.Token) + httpReq.Header.Set("X-C4GH-Public-Key", req.PublicKeyBase64) + if c.cfg.ClientVersion != "" { + httpReq.Header.Set("User-Agent", "sda-cli/"+c.cfg.ClientVersion) + httpReq.Header.Set("SDA-Client-Version", c.cfg.ClientVersion) + } + + resp, err := c.http.Do(httpReq) // #nosec G704 + if err != nil { + return DownloadResult{}, fmt.Errorf("http request: %w", err) + } + // Partial-Content without a Range request is a server bug; accepting it + // would rename a truncated .part as a complete file. + if resp.StatusCode != http.StatusOK { + b, _ := io.ReadAll(io.LimitReader(resp.Body, maxErrorBodyBytes+1)) + _ = resp.Body.Close() + if resp.StatusCode == http.StatusForbidden { + return DownloadResult{}, fmt.Errorf("dataset/file does not exist or access denied: %s", req.UserArg) + } + body := string(b) + if len(body) > maxErrorBodyBytes { + body = body[:maxErrorBodyBytes] + } + + return DownloadResult{}, fmt.Errorf("server returned status %d: %s", resp.StatusCode, body) + } + + return DownloadResult{File: target, Body: resp.Body, ContentLength: resp.ContentLength}, nil +} + +// resolveFile converts UserArg (path or fileId) into an downloadclient.File so +// callers can use its downloadURL. Uses the exact filePath filter for paths; +// for bare ids, falls back to list + match (v2 has no exact-id filter). +// Returns a zero File (FileID == "") if not found. +func (c *V2Client) resolveFile(ctx context.Context, datasetID, userArg string) (File, error) { + isPath := strings.Contains(userArg, "/") || strings.HasSuffix(userArg, ".c4gh") + if isPath { + files, err := c.ListFiles(ctx, datasetID, ListFilesOptions{ExactPath: userArg}) + if err != nil { + return File{}, err + } + if len(files) == 0 { + return File{}, nil + } + + return files[0], nil + } + // Bare id: list + match. v2 has no exact-id filter. + files, err := c.ListFiles(ctx, datasetID, ListFilesOptions{}) + if err != nil { + return File{}, err + } + for _, f := range files { + if f.FileID == userArg { + return f, nil + } + } + + return File{}, nil +} + // DatasetInfo implements Client. Calls GET /datasets/{id} and returns the // v2-only dataset metadata (file count + total decrypted size). func (c *V2Client) DatasetInfo(ctx context.Context, datasetID string) (DatasetInfo, error) { diff --git a/downloadclient/v2_test.go b/downloadclient/v2_test.go index ba768739..f56c5c43 100644 --- a/downloadclient/v2_test.go +++ b/downloadclient/v2_test.go @@ -3,6 +3,7 @@ package downloadclient import ( "context" "fmt" + "io" "net/http" "net/http/httptest" "testing" @@ -11,6 +12,137 @@ import ( "github.com/stretchr/testify/require" ) +func TestV2Client_DownloadFile_ByPath(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/datasets/EGAD001/files": + assert.Equal(t, "a.c4gh", r.URL.Query().Get("filePath")) + w.Header().Set("Content-Type", "application/json") + fmt.Fprint(w, `{"files":[{"fileId":"f1","filePath":"a.c4gh","size":11,"decryptedSize":10,"checksums":[],"downloadUrl":"/files/f1"}],"nextPageToken":null}`) + case "/files/f1": + assert.Equal(t, "k-raw", r.Header.Get("X-C4GH-Public-Key")) + w.Header().Set("Content-Length", "11") + fmt.Fprint(w, "v2-bytes...") + default: + t.Fatalf("unexpected path %q", r.URL.Path) + } + })) + defer ts.Close() + + c := NewV2Client(Config{BaseURL: ts.URL, Token: "t"}) + c.http = ts.Client() + + result, err := c.DownloadFile(context.Background(), DownloadRequest{ + DatasetID: "EGAD001", UserArg: "a.c4gh", PublicKeyBase64: "k-raw", + }) + require.NoError(t, err) + defer result.Body.Close() + assert.Equal(t, int64(11), result.ContentLength) +} + +func TestV2Client_DownloadFile_NotFound403(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, "/datasets/EGAD001/files", r.URL.Path) + w.Header().Set("Content-Type", "application/json") + fmt.Fprint(w, `{"files":[],"nextPageToken":null}`) + })) + defer ts.Close() + + c := NewV2Client(Config{BaseURL: ts.URL, Token: "t"}) + c.http = ts.Client() + + _, err := c.DownloadFile(context.Background(), DownloadRequest{ + DatasetID: "EGAD001", UserArg: "missing.c4gh", PublicKeyBase64: "k", + }) + require.Error(t, err) + assert.Contains(t, err.Error(), "dataset/file does not exist or access denied") +} + +// TestV2Client_DownloadFile_ByFileID covers the bare-fileId branch of +// resolveFile: no "/" in UserArg and no ".c4gh" suffix, so the client must +// list + scan by FileID rather than use the exact-path filter. +func TestV2Client_DownloadFile_ByFileID(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/datasets/EGAD001/files": + // No filePath filter must be set for the bare-id branch. + assert.Empty(t, r.URL.Query().Get("filePath")) + w.Header().Set("Content-Type", "application/json") + fmt.Fprint(w, `{"files":[{"fileId":"other","filePath":"other.c4gh","downloadUrl":"/files/other"},{"fileId":"f-xyz","filePath":"a.c4gh","downloadUrl":"/files/f-xyz"}],"nextPageToken":null}`) + case "/files/f-xyz": + w.Header().Set("Content-Length", "3") + fmt.Fprint(w, "xyz") + default: + t.Fatalf("unexpected path %q", r.URL.Path) + } + })) + defer ts.Close() + + c := NewV2Client(Config{BaseURL: ts.URL, Token: "t"}) + c.http = ts.Client() + + result, err := c.DownloadFile(context.Background(), DownloadRequest{ + DatasetID: "EGAD001", UserArg: "f-xyz", PublicKeyBase64: "k", + }) + require.NoError(t, err) + defer result.Body.Close() + got, _ := io.ReadAll(result.Body) + assert.Equal(t, "xyz", string(got)) +} + +// TestV2Client_DownloadFile_AbsoluteDownloadURL guards URL resolution: when +// the server returns an absolute downloadUrl (e.g. a pre-signed storage +// redirect), the client must hit that URL verbatim rather than concatenate +// it onto BaseURL and produce a broken "http://apihttp://storage/..." URL. +func TestV2Client_DownloadFile_AbsoluteDownloadURL(t *testing.T) { + storage := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Content-Length", "7") + fmt.Fprint(w, "payload") + })) + defer storage.Close() + + api := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, "/datasets/EGAD001/files", r.URL.Path) + w.Header().Set("Content-Type", "application/json") + fmt.Fprintf(w, `{"files":[{"fileId":"f1","filePath":"a.c4gh","downloadUrl":"%s/storage/signed"}],"nextPageToken":null}`, storage.URL) + })) + defer api.Close() + + c := NewV2Client(Config{BaseURL: api.URL, Token: "t"}) + c.http = api.Client() + + result, err := c.DownloadFile(context.Background(), DownloadRequest{ + DatasetID: "EGAD001", UserArg: "a.c4gh", PublicKeyBase64: "k", + }) + require.NoError(t, err) + defer result.Body.Close() + got, _ := io.ReadAll(result.Body) + assert.Equal(t, "payload", string(got)) +} + +// TestV2Client_DownloadFile_ListForbidden403 guards the existence-leakage +// contract for the list-resolution step: a 403 from GET /datasets/{id}/files +// must be flattened to the same ambiguous message as a 200-with-empty-list +// so that a forbidden dataset is indistinguishable from a missing one. +func TestV2Client_DownloadFile_ListForbidden403(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, "/datasets/EGAD001/files", r.URL.Path) + w.WriteHeader(http.StatusForbidden) + fmt.Fprint(w, `{"title":"Forbidden","status":403,"detail":"access denied"}`) + })) + defer ts.Close() + + c := NewV2Client(Config{BaseURL: ts.URL, Token: "t"}) + c.http = ts.Client() + + _, err := c.DownloadFile(context.Background(), DownloadRequest{ + DatasetID: "EGAD001", UserArg: "a.c4gh", PublicKeyBase64: "k", + }) + require.Error(t, err) + assert.Contains(t, err.Error(), "dataset/file does not exist or access denied") + assert.NotContains(t, err.Error(), "403", "403 status must not leak through") +} + func TestV2Client_ListDatasets_SinglePage(t *testing.T) { ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { assert.Equal(t, "/datasets", r.URL.Path) @@ -82,8 +214,6 @@ func TestV2Client_ListFiles_Paginated(t *testing.T) { assert.Equal(t, "f1", got[0].FileID) assert.Equal(t, "a.c4gh", got[0].FilePath) assert.Equal(t, int64(90), got[0].DecryptedFileSize) - assert.Equal(t, "aaaa", got[0].DecryptedFileChecksum) - assert.Equal(t, "sha256", got[0].DecryptedFileChecksumType) } func TestV2Client_ListFiles_ExactFilePath(t *testing.T) { diff --git a/downloadclient/v2_types.go b/downloadclient/v2_types.go index 4307555a..a9c05aec 100644 --- a/downloadclient/v2_types.go +++ b/downloadclient/v2_types.go @@ -1,11 +1,6 @@ package downloadclient -// v2File is the v2 server's FileInfo wire shape. Differs from downloadclient.File: -// - Scalar DecryptedFileSize → DecryptedSize (int64 here vs int in v1) -// - DecryptedFileChecksum/Type → Checksums array -// - New DownloadURL (server-provided; clients must not construct /files/{id}) -// - Drops v1-only fields (DisplayFileName, etc.) -// +// v2File is the v2 server's FileInfo wire shape. // Conversion to the shared File type happens at the V2Client boundary. type v2File struct { FileID string `json:"fileId"` @@ -43,28 +38,11 @@ type datasetInfoResponse struct { Size int64 `json:"size"` } -// toFile converts a v2File into the shared downloadclient.File. -// Maps the Checksums array to the legacy scalar fields: -// prefer sha256 if present, else first entry. -// DisplayFileName is not populated (v2 doesn't return it). func (f v2File) toFile() File { - out := File{ + return File{ FileID: f.FileID, FilePath: f.FilePath, DecryptedFileSize: f.DecryptedSize, + downloadURL: f.DownloadURL, } - for _, c := range f.Checksums { - if c.Type == "sha256" { - out.DecryptedFileChecksum = c.Checksum - out.DecryptedFileChecksumType = c.Type - - return out - } - } - if len(f.Checksums) > 0 { - out.DecryptedFileChecksum = f.Checksums[0].Checksum - out.DecryptedFileChecksumType = f.Checksums[0].Type - } - - return out }