Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ authors = ["Fabian Gans <[email protected]>"]
version = "0.9.6"

[deps]
AWSS3 = "1c724243-ef5b-51ab-93f4-b0a88ac62a95"
Blosc = "a74b3585-a348-5f62-a45c-50e91977d574"
ChunkCodecCore = "0b6fb165-00bc-4d37-ab8b-79f91016dbe1"
ChunkCodecLibZlib = "4c0bbee4-addc-4d73-81a0-b6caacae83c8"
Expand All @@ -21,6 +20,12 @@ Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
URIs = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4"
ZipArchives = "49080126-0e18-4c2a-b176-c102e4b3760c"

[weakdeps]
AWSS3 = "1c724243-ef5b-51ab-93f4-b0a88ac62a95"

[extensions]
ZarrAWSS3Ext = "AWSS3"

[compat]
AWSS3 = "0.10, 0.11"
Blosc = "0.5, 0.6, 0.7"
Expand Down
1 change: 1 addition & 0 deletions docs/Project.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
[deps]
AWS = "fbe9abb3-538b-5e4e-ba9e-bc94f4f92ebc"
AWSS3 = "1c724243-ef5b-51ab-93f4-b0a88ac62a95"
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
Expand Down
4 changes: 2 additions & 2 deletions docs/src/s3examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ AWS configuration first, for options look at the documentation of
account, you can access the dataset without credentials as follows:

````@example aws
using Zarr, AWS
Zarr.AWSS3.AWS.global_aws_config(Zarr.AWSS3.AWS.AWSConfig(creds=nothing, region="us-west-2"))
using Zarr, AWSS3
AWSS3.AWS.global_aws_config(AWSS3.AWS.AWSConfig(creds=nothing, region="us-west-2"))
````

Then we can directly open a zarr group stored on s3
Expand Down
16 changes: 16 additions & 0 deletions ext/ZarrAWSS3Ext.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
module ZarrAWSS3Ext

import Zarr
import Zarr:
S3Store,
AbstractStore,
cloud_list_objects,
ConcurrentRead,
storageregexlist,
concurrent_io_tasks

using AWSS3: AWSS3, s3_put, s3_get, s3_delete, s3_list_objects, s3_exists

include("s3store.jl")

end
26 changes: 9 additions & 17 deletions src/Storage/s3store.jl → ext/s3store.jl
Original file line number Diff line number Diff line change
@@ -1,12 +1,4 @@
using AWSS3: AWSS3, s3_put, s3_get, s3_delete, s3_list_objects, s3_exists

struct S3Store <: AbstractStore
bucket::String
aws::AWSS3.AWS.AbstractAWSConfig
end


function S3Store(bucket::String;
function Zarr.S3Store(bucket::String;
aws = nothing,
)
if aws === nothing
Expand Down Expand Up @@ -35,7 +27,7 @@ end

Base.delete!(s::S3Store, d::String) = s3_delete(s.aws,s.bucket,d)

function storagesize(s::S3Store,p)
function Zarr.storagesize(s::S3Store,p)
prefix = (isempty(p) || endswith(p,"/")) ? p : string(p,"/")
r = s3_list_objects(s.aws,s.bucket,prefix)
s = 0
Expand All @@ -48,21 +40,21 @@ function storagesize(s::S3Store,p)
s
end

function isinitialized(s::S3Store, i::String)
function Zarr.isinitialized(s::S3Store, i::String)
s3_exists(s.aws,s.bucket,i)
end


function cloud_list_objects(s::S3Store,p)
function Zarr.cloud_list_objects(s::S3Store,p)
prefix = (isempty(p) || endswith(p,"/")) ? p : string(p,"/")
AWSS3.S3.list_objects_v2(s.bucket, Dict("prefix"=>prefix, "delimiter" => "/"), aws_config = s.aws)
end
function subdirs(s::S3Store, p)
function Zarr.subdirs(s::S3Store, p)
s3_resp = cloud_list_objects(s, p)
!haskey(s3_resp,"CommonPrefixes") && return String[]
allstrings(s3_resp["CommonPrefixes"],"Prefix")
end
function subkeys(s::S3Store, p)
function Zarr.subkeys(s::S3Store, p)
s3_resp = cloud_list_objects(s, p)
!haskey(s3_resp,"Contents") && return String[]
r = allstrings(s3_resp["Contents"],"Key")
Expand All @@ -71,13 +63,13 @@ end
allstrings(v::AbstractArray,prefixkey) = map(i -> rstrip(String(i[prefixkey]),'/'), v)
allstrings(v,prefixkey) = [rstrip(String(v[prefixkey]),'/')]

push!(storageregexlist,r"^s3://"=>S3Store)
# push!(storageregexlist,r"^s3://"=>S3Store)

function storefromstring(::Type{<:S3Store}, s, _)
function Zarr.storefromstring(::Type{<:S3Store}, s, _)
decomp = split(s,"/",keepempty=false)
bucket = decomp[2]
path = join(decomp[3:end],"/")
S3Store(String(bucket),aws=AWSS3.AWS.global_aws_config()),path
end

store_read_strategy(::S3Store) = ConcurrentRead(concurrent_io_tasks[])
Zarr.store_read_strategy(::S3Store) = ConcurrentRead(concurrent_io_tasks[])
19 changes: 17 additions & 2 deletions src/Storage/Storage.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,22 @@ They may optionally implement the following methods:
"""
abstract type AbstractStore end

#Define the interface
# Define the interface

"""
S3Store(bucket::String; aws=nothing)

An S3-backed Zarr store. Available after loading the `ZarrAWSS3Ext` extension.
"""
struct S3Store <: AbstractStore
bucket::String
aws::Any
end

function S3Store(args...)
error("AWSS3 must be loaded to use S3Store. Try `using AWSS3`.")
end

"""
storagesize(d::AbstractStore, p::AbstractString)

Expand Down Expand Up @@ -196,10 +211,10 @@ isemptysub(s::AbstractStore, p) = isempty(subkeys(s,p)) && isempty(subdirs(s,p))
#Here different storage backends can register regexes that are checked against
#during auto-check of storage format when doing zopen
storageregexlist = Pair[]
push!(storageregexlist, r"^s3://" => S3Store)

include("directorystore.jl")
include("dictstore.jl")
include("s3store.jl")
include("gcstore.jl")
include("consolidated.jl")
include("http.jl")
Expand Down
3 changes: 2 additions & 1 deletion test/Filters.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ using Zarr: Fletcher32Filter, FixedScaleOffsetFilter, ShuffleFilter, QuantizeFil

data = rand(100)
enc = zencode(data, Fletcher32Filter())
enc[begin] += 1
# Corrupt the checksum by modifying a byte (handle overflow safely)
enc[begin] = UInt8((enc[begin] + 1) % 256)
@test_throws "Checksum mismatch in Fletcher32 decoding" zdecode(enc, Fletcher32Filter())
end

Expand Down
3 changes: 2 additions & 1 deletion test/Project.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
[deps]
AWSS3 = "1c724243-ef5b-51ab-93f4-b0a88ac62a95"
CondaPkg = "992eb4ea-22a4-4c89-a5bb-47a3300528ab"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
Expand All @@ -12,4 +13,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Zarr = "0a941bbe-ad1d-11e8-39d9-ab76183a1d99"

[sources]
Zarr = {path = ".."}
Zarr = {path = ".."}
3 changes: 2 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@ using JSON: json
using Pkg
using Dates

@testset "Zarr" begin

@testset "Zarr" begin


@testset "ZArray" begin
@testset "fields" begin
z = zzeros(Int64, 2, 3)
Expand Down
12 changes: 9 additions & 3 deletions test/storage.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
@testset "Zarr error" begin
@test_throws ErrorException S3Store("test")
end

using AWSS3

@testset "Path Normalization" begin
mixed_path = ".\\\\path///to\\a\\place/..\\///"
norm_path = "path/to/a"
Expand Down Expand Up @@ -159,8 +165,8 @@ end
s = Minio.Server(joinpath("./",tempname()), address="localhost:9001")
run(s, wait=false)
cfg = MinioConfig("http://localhost:9001")
Zarr.AWSS3.global_aws_config(cfg)
Zarr.AWSS3.S3.create_bucket("zarrdata")
AWSS3.global_aws_config(cfg)
AWSS3.S3.create_bucket("zarrdata")
ds = S3Store("zarrdata")
test_store_common(ds)
@test sprint(show, ds) == "S3 Object Storage"
Expand All @@ -171,7 +177,7 @@ end
end

@testset "AWS S3 Storage" begin
Zarr.AWSS3.AWS.global_aws_config(Zarr.AWSS3.AWS.AWSConfig(creds=nothing, region="us-west-2"))
AWSS3.AWS.global_aws_config(AWSS3.AWS.AWSConfig(creds=nothing, region="us-west-2"))
S3, p = Zarr.storefromstring("s3://mur-sst/zarr-v1")
@test Zarr.is_zgroup(S3, p)
@test storagesize(S3, p) == 10551
Expand Down
Loading