diff --git a/clientutil/parse.go b/clientutil/parse.go index 759c42d2..af4ae847 100644 --- a/clientutil/parse.go +++ b/clientutil/parse.go @@ -4,8 +4,10 @@ package clientutil import ( + "encoding/hex" "net/http" "strconv" + "strings" "time" "github.com/pkg/errors" @@ -63,3 +65,16 @@ func ParseLastModified(m http.Header, f string) (time.Time, error) { return mod, nil } + +// ParseMD5 returns the bytes parsed from the hex-encoded MD5 string +// It trims potential surrounding double quotes before decoding. +// It returns nil if the MD5 string is not valid. +func ParseMD5(md5Hex string) []byte { + // Trim surrounding double quotes if present. + trimmed := strings.Trim(md5Hex, "\"") + decoded, _ := hex.DecodeString(trimmed) + if len(decoded) != 16 { + return nil + } + return decoded +} diff --git a/clientutil/parse_test.go b/clientutil/parse_test.go index 195c66d5..2ae90e68 100644 --- a/clientutil/parse_test.go +++ b/clientutil/parse_test.go @@ -4,6 +4,8 @@ package clientutil import ( + "bytes" + "fmt" "net/http" "testing" "time" @@ -108,3 +110,49 @@ func TestParseContentLength(t *testing.T) { }) } } + +func TestParseMD5(t *testing.T) { + for _, tc := range []struct { + label string + in string + out []byte + }{ + { + label: "valid md5", + in: "b1946ac92492d2347c6235b4d2611184", + out: []byte{0xb1, 0x94, 0x6a, 0xc9, 0x24, 0x92, 0xd2, 0x34, 0x7c, 0x62, 0x35, 0xb4, 0xd2, 0x61, 0x11, 0x84}, + }, + { + label: "invalid hex string", + in: "not-a-hex-string", + }, + { + label: "odd length hex string", + in: "abc", + }, + { + label: "empty string", + in: "", + out: []byte{}, + }, + { + label: "valid md5 with quotes", + in: "\"b1946ac92492d2347c6235b4d2611184\"", + out: []byte{0xb1, 0x94, 0x6a, 0xc9, 0x24, 0x92, 0xd2, 0x34, 0x7c, 0x62, 0x35, 0xb4, 0xd2, 0x61, 0x11, 0x84}, + }, + { + label: "invalid hex string with quotes", + in: "\"not-a-hex-string\"", + }, + { + label: "only quotes", + in: "\"\"", + out: []byte{}, + }, + } { + t.Run(tc.label, func(t *testing.T) { + out := ParseMD5(tc.in) + testutil.Assert(t, bytes.Equal(out, tc.out), fmt.Sprintf("output mismatch: %v != %v", out, tc.out)) + }) + } +} diff --git a/objstore.go b/objstore.go index bdbb52a3..9b2af517 100644 --- a/objstore.go +++ b/objstore.go @@ -280,6 +280,9 @@ type ObjectAttributes struct { // LastModified is the timestamp the object was last modified. LastModified time.Time `json:"last_modified"` + + // MD5 is the MD5 hash of the object, if available. + MD5 []byte `json:"md5,omitempty"` } type IterObjectAttributes struct { diff --git a/providers/azure/azure.go b/providers/azure/azure.go index f3b891ec..7377f120 100644 --- a/providers/azure/azure.go +++ b/providers/azure/azure.go @@ -348,6 +348,7 @@ func (b *Bucket) Attributes(ctx context.Context, name string) (objstore.ObjectAt return objstore.ObjectAttributes{ Size: *resp.ContentLength, LastModified: *resp.LastModified, + MD5: resp.ContentMD5, }, nil } diff --git a/providers/bos/bos.go b/providers/bos/bos.go index be7a1b3c..3a711589 100644 --- a/providers/bos/bos.go +++ b/providers/bos/bos.go @@ -23,6 +23,7 @@ import ( "gopkg.in/yaml.v2" "github.com/thanos-io/objstore" + "github.com/thanos-io/objstore/clientutil" ) // partSize 128MB. @@ -313,6 +314,7 @@ func (b *Bucket) Attributes(_ context.Context, name string) (objstore.ObjectAttr return objstore.ObjectAttributes{ Size: objMeta.ContentLength, LastModified: lastModified, + MD5: clientutil.ParseMD5(objMeta.ContentMD5), }, nil } diff --git a/providers/cos/cos.go b/providers/cos/cos.go index 9ac96bfd..4c894f8c 100644 --- a/providers/cos/cos.go +++ b/providers/cos/cos.go @@ -185,6 +185,8 @@ func (b *Bucket) Attributes(ctx context.Context, name string) (objstore.ObjectAt return objstore.ObjectAttributes{}, err } + // oss does not return md5 and etag is not necessarily md5 of the object content + // https://www.tencentcloud.com/document/product/436/7729 return objstore.ObjectAttributes{ Size: size, LastModified: mod, diff --git a/providers/filesystem/filesystem.go b/providers/filesystem/filesystem.go index 3717cf3d..a6a815af 100644 --- a/providers/filesystem/filesystem.go +++ b/providers/filesystem/filesystem.go @@ -5,6 +5,7 @@ package filesystem import ( "context" + "crypto/md5" "fmt" "io" "os" @@ -171,9 +172,21 @@ func (b *Bucket) Attributes(ctx context.Context, name string) (objstore.ObjectAt return objstore.ObjectAttributes{}, errors.Wrapf(err, "stat %s", file) } + f, err := os.Open(filepath.Clean(file)) + if err != nil { + return objstore.ObjectAttributes{}, errors.Wrapf(err, "open file %s for md5 calculation", file) + } + defer f.Close() + + h := md5.New() + if _, err := io.Copy(h, f); err != nil { + return objstore.ObjectAttributes{}, errors.Wrapf(err, "copy file content for md5 calculation %s", file) + } + return objstore.ObjectAttributes{ Size: stat.Size(), LastModified: stat.ModTime(), + MD5: h.Sum(nil), }, nil } diff --git a/providers/gcs/gcs.go b/providers/gcs/gcs.go index 484e33a9..9e80f842 100644 --- a/providers/gcs/gcs.go +++ b/providers/gcs/gcs.go @@ -312,6 +312,7 @@ func (b *Bucket) Attributes(ctx context.Context, name string) (objstore.ObjectAt return objstore.ObjectAttributes{ Size: attrs.Size, LastModified: attrs.Updated, + MD5: attrs.MD5, }, nil } diff --git a/providers/obs/obs.go b/providers/obs/obs.go index b6036fed..3c0213f8 100644 --- a/providers/obs/obs.go +++ b/providers/obs/obs.go @@ -5,6 +5,7 @@ package obs import ( "context" + "encoding/base64" "io" "math" "os" @@ -373,9 +374,15 @@ func (b *Bucket) Attributes(ctx context.Context, name string) (objstore.ObjectAt if err != nil { return objstore.ObjectAttributes{}, errors.Wrap(err, "failed to get object metadata") } + + // obs etag is base64 md5 of the object content for unencrypted objects + // https://support.huaweicloud.com/intl/en-us/sdk-go-devg-obs/obs_33_0519.html#section4 + md5, _ := base64.StdEncoding.DecodeString(output.ETag) + return objstore.ObjectAttributes{ Size: output.ContentLength, LastModified: output.LastModified, + MD5: md5, }, nil } diff --git a/providers/oci/oci.go b/providers/oci/oci.go index edea003d..d3541844 100644 --- a/providers/oci/oci.go +++ b/providers/oci/oci.go @@ -24,6 +24,7 @@ import ( "gopkg.in/yaml.v2" "github.com/thanos-io/objstore" + "github.com/thanos-io/objstore/clientutil" ) // DirDelim is the delimiter used to model a directory structure in an object store bucket. @@ -289,9 +290,16 @@ func (b *Bucket) Attributes(ctx context.Context, name string) (objstore.ObjectAt if err != nil { return objstore.ObjectAttributes{}, err } + + var md5 []byte + if response.ContentMd5 != nil { + md5 = clientutil.ParseMD5(*response.ContentMd5) + } + return objstore.ObjectAttributes{ Size: *response.ContentLength, LastModified: response.LastModified.Time, + MD5: md5, }, nil } diff --git a/providers/oss/oss.go b/providers/oss/oss.go index 4941a451..21606116 100644 --- a/providers/oss/oss.go +++ b/providers/oss/oss.go @@ -156,9 +156,17 @@ func (b *Bucket) Attributes(ctx context.Context, name string) (objstore.ObjectAt return objstore.ObjectAttributes{}, err } + // etag is md5 of the object, if an object is created by calling the PutObject operation + // https://www.alibabacloud.com/help/en/oss/developer-reference/getobjectmeta + var md5 []byte + if etag := m.Get("ETag"); etag != "" { + md5 = clientutil.ParseMD5(etag) + } + return objstore.ObjectAttributes{ Size: size, LastModified: mod, + MD5: md5, }, nil } diff --git a/providers/s3/s3.go b/providers/s3/s3.go index 5c8cd953..f5c1f301 100644 --- a/providers/s3/s3.go +++ b/providers/s3/s3.go @@ -26,6 +26,7 @@ import ( "gopkg.in/yaml.v2" "github.com/thanos-io/objstore" + "github.com/thanos-io/objstore/clientutil" "github.com/thanos-io/objstore/exthttp" ) @@ -601,6 +602,7 @@ func (b *Bucket) Attributes(ctx context.Context, name string) (objstore.ObjectAt return objstore.ObjectAttributes{ Size: objInfo.Size, LastModified: objInfo.LastModified, + MD5: clientutil.ParseMD5(objInfo.ETag), }, nil } diff --git a/providers/swift/swift.go b/providers/swift/swift.go index c5263b25..ec0cd8aa 100644 --- a/providers/swift/swift.go +++ b/providers/swift/swift.go @@ -23,6 +23,7 @@ import ( "github.com/prometheus/common/model" "github.com/thanos-io/objstore" + "github.com/thanos-io/objstore/clientutil" "github.com/thanos-io/objstore/exthttp" "gopkg.in/yaml.v2" ) @@ -310,9 +311,11 @@ func (c *Container) Attributes(_ context.Context, name string) (objstore.ObjectA if err != nil { return objstore.ObjectAttributes{}, errors.Wrap(err, "get object attributes") } + return objstore.ObjectAttributes{ Size: info.Bytes, LastModified: info.LastModified, + MD5: clientutil.ParseMD5(info.Hash), }, nil }