Skip to content

Commit 6bfd945

Browse files
committed
Added md5 attribute
Signed-off-by: Sebastian Rabenhorst <[email protected]> fmt Signed-off-by: Sebastian Rabenhorst <[email protected]> Simpler md5 parsing Signed-off-by: Sebastian Rabenhorst <[email protected]> Simpler md5 parsing Signed-off-by: Sebastian Rabenhorst <[email protected]>
1 parent a0136a6 commit 6bfd945

File tree

13 files changed

+113
-0
lines changed

13 files changed

+113
-0
lines changed

clientutil/parse.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@
44
package clientutil
55

66
import (
7+
"encoding/hex"
78
"net/http"
89
"strconv"
10+
"strings"
911
"time"
1012

1113
"github.com/pkg/errors"
@@ -63,3 +65,16 @@ func ParseLastModified(m http.Header, f string) (time.Time, error) {
6365

6466
return mod, nil
6567
}
68+
69+
// ParseMD5 returns the bytes parsed from the hex-encoded MD5 string
70+
// It trims potential surrounding double quotes before decoding.
71+
// It returns nil if the MD5 string is not valid.
72+
func ParseMD5(md5Hex string) []byte {
73+
// Trim surrounding double quotes if present.
74+
trimmed := strings.Trim(md5Hex, "\"")
75+
decoded, _ := hex.DecodeString(trimmed)
76+
if len(decoded) != 16 {
77+
return nil
78+
}
79+
return decoded
80+
}

clientutil/parse_test.go

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
package clientutil
55

66
import (
7+
"bytes"
8+
"fmt"
79
"net/http"
810
"testing"
911
"time"
@@ -108,3 +110,49 @@ func TestParseContentLength(t *testing.T) {
108110
})
109111
}
110112
}
113+
114+
func TestParseMD5(t *testing.T) {
115+
for _, tc := range []struct {
116+
label string
117+
in string
118+
out []byte
119+
}{
120+
{
121+
label: "valid md5",
122+
in: "b1946ac92492d2347c6235b4d2611184",
123+
out: []byte{0xb1, 0x94, 0x6a, 0xc9, 0x24, 0x92, 0xd2, 0x34, 0x7c, 0x62, 0x35, 0xb4, 0xd2, 0x61, 0x11, 0x84},
124+
},
125+
{
126+
label: "invalid hex string",
127+
in: "not-a-hex-string",
128+
},
129+
{
130+
label: "odd length hex string",
131+
in: "abc",
132+
},
133+
{
134+
label: "empty string",
135+
in: "",
136+
out: []byte{},
137+
},
138+
{
139+
label: "valid md5 with quotes",
140+
in: "\"b1946ac92492d2347c6235b4d2611184\"",
141+
out: []byte{0xb1, 0x94, 0x6a, 0xc9, 0x24, 0x92, 0xd2, 0x34, 0x7c, 0x62, 0x35, 0xb4, 0xd2, 0x61, 0x11, 0x84},
142+
},
143+
{
144+
label: "invalid hex string with quotes",
145+
in: "\"not-a-hex-string\"",
146+
},
147+
{
148+
label: "only quotes",
149+
in: "\"\"",
150+
out: []byte{},
151+
},
152+
} {
153+
t.Run(tc.label, func(t *testing.T) {
154+
out := ParseMD5(tc.in)
155+
testutil.Assert(t, bytes.Equal(out, tc.out), fmt.Sprintf("output mismatch: %v != %v", out, tc.out))
156+
})
157+
}
158+
}

objstore.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,9 @@ type ObjectAttributes struct {
280280

281281
// LastModified is the timestamp the object was last modified.
282282
LastModified time.Time `json:"last_modified"`
283+
284+
// MD5 is the MD5 hash of the object, if available.
285+
MD5 []byte `json:"md5,omitempty"`
283286
}
284287

285288
type IterObjectAttributes struct {

providers/azure/azure.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,7 @@ func (b *Bucket) Attributes(ctx context.Context, name string) (objstore.ObjectAt
348348
return objstore.ObjectAttributes{
349349
Size: *resp.ContentLength,
350350
LastModified: *resp.LastModified,
351+
MD5: resp.ContentMD5,
351352
}, nil
352353
}
353354

providers/bos/bos.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"gopkg.in/yaml.v2"
2424

2525
"github.com/thanos-io/objstore"
26+
"github.com/thanos-io/objstore/clientutil"
2627
)
2728

2829
// partSize 128MB.
@@ -313,6 +314,7 @@ func (b *Bucket) Attributes(_ context.Context, name string) (objstore.ObjectAttr
313314
return objstore.ObjectAttributes{
314315
Size: objMeta.ContentLength,
315316
LastModified: lastModified,
317+
MD5: clientutil.ParseMD5(objMeta.ContentMD5),
316318
}, nil
317319
}
318320

providers/cos/cos.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,8 @@ func (b *Bucket) Attributes(ctx context.Context, name string) (objstore.ObjectAt
185185
return objstore.ObjectAttributes{}, err
186186
}
187187

188+
// oss does not return md5 and etag is not necessarily md5 of the object content
189+
// https://www.tencentcloud.com/document/product/436/7729
188190
return objstore.ObjectAttributes{
189191
Size: size,
190192
LastModified: mod,

providers/filesystem/filesystem.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ package filesystem
55

66
import (
77
"context"
8+
"crypto/md5"
89
"fmt"
910
"io"
1011
"os"
@@ -171,9 +172,21 @@ func (b *Bucket) Attributes(ctx context.Context, name string) (objstore.ObjectAt
171172
return objstore.ObjectAttributes{}, errors.Wrapf(err, "stat %s", file)
172173
}
173174

175+
f, err := os.Open(filepath.Clean(file))
176+
if err != nil {
177+
return objstore.ObjectAttributes{}, errors.Wrapf(err, "open file %s for md5 calculation", file)
178+
}
179+
defer f.Close()
180+
181+
h := md5.New()
182+
if _, err := io.Copy(h, f); err != nil {
183+
return objstore.ObjectAttributes{}, errors.Wrapf(err, "copy file content for md5 calculation %s", file)
184+
}
185+
174186
return objstore.ObjectAttributes{
175187
Size: stat.Size(),
176188
LastModified: stat.ModTime(),
189+
MD5: h.Sum(nil),
177190
}, nil
178191
}
179192

providers/gcs/gcs.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,7 @@ func (b *Bucket) Attributes(ctx context.Context, name string) (objstore.ObjectAt
312312
return objstore.ObjectAttributes{
313313
Size: attrs.Size,
314314
LastModified: attrs.Updated,
315+
MD5: attrs.MD5,
315316
}, nil
316317
}
317318

providers/obs/obs.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ package obs
55

66
import (
77
"context"
8+
"encoding/base64"
89
"io"
910
"math"
1011
"os"
@@ -373,9 +374,15 @@ func (b *Bucket) Attributes(ctx context.Context, name string) (objstore.ObjectAt
373374
if err != nil {
374375
return objstore.ObjectAttributes{}, errors.Wrap(err, "failed to get object metadata")
375376
}
377+
378+
// obs etag is base64 md5 of the object content for unencrypted objects
379+
// https://support.huaweicloud.com/intl/en-us/sdk-go-devg-obs/obs_33_0519.html#section4
380+
md5, _ := base64.StdEncoding.DecodeString(output.ETag)
381+
376382
return objstore.ObjectAttributes{
377383
Size: output.ContentLength,
378384
LastModified: output.LastModified,
385+
MD5: md5,
379386
}, nil
380387
}
381388

providers/oci/oci.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"gopkg.in/yaml.v2"
2525

2626
"github.com/thanos-io/objstore"
27+
"github.com/thanos-io/objstore/clientutil"
2728
)
2829

2930
// DirDelim is the delimiter used to model a directory structure in an object store bucket.
@@ -289,9 +290,16 @@ func (b *Bucket) Attributes(ctx context.Context, name string) (objstore.ObjectAt
289290
if err != nil {
290291
return objstore.ObjectAttributes{}, err
291292
}
293+
294+
var md5 []byte
295+
if response.ContentMd5 != nil {
296+
md5 = clientutil.ParseMD5(*response.ContentMd5)
297+
}
298+
292299
return objstore.ObjectAttributes{
293300
Size: *response.ContentLength,
294301
LastModified: response.LastModified.Time,
302+
MD5: md5,
295303
}, nil
296304
}
297305

0 commit comments

Comments
 (0)