Skip to content

Commit 14b8511

Browse files
authored
Merge pull request #65 from JuliaIO/sf/chaos_buffer_test
2 parents 304c8bd + 1064f35 commit 14b8511

File tree

4 files changed

+123
-21
lines changed

4 files changed

+123
-21
lines changed

Project.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
1717
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
1818
Tar_jll = "9b64493d-8859-5bf3-93d7-7c32dd38186f"
1919
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
20+
SimpleBufferStream = "777ac1f9-54b0-4bf8-805c-2214025038e7"
2021

2122
[targets]
22-
test = ["Pkg", "Random", "Tar_jll", "Test"]
23+
test = ["Pkg", "Random", "Tar_jll", "Test", "SimpleBufferStream"]

src/extract.jl

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -250,17 +250,17 @@ function git_file_hash(
250250
# TODO: this largely duplicates the logic of read_data
251251
# read_data could be used directly if SHA offered an interface
252252
# where you write data to an IO object and it maintains a hash
253-
t = round_up(size)
254-
while size > 0
255-
n = min(t, length(buf))
256-
r = readbytes!(tar, buf, n)
257-
r < n && eof(tar) && throw(EOFError())
258-
v = view(buf, 1:min(r, size))
259-
SHA.update!(ctx, v)
260-
size -= length(v)
261-
t -= r
253+
padded_size = round_up(size)
254+
while padded_size > 0
255+
max_read_len = min(padded_size, length(buf))
256+
read_len = readbytes!(tar, buf, max_read_len)
257+
read_len < max_read_len && eof(tar) && throw(EOFError())
258+
nonpadded_view = view(buf, 1:min(read_len, size))
259+
SHA.update!(ctx, nonpadded_view)
260+
size -= length(nonpadded_view)
261+
padded_size -= read_len
262262
end
263-
@assert size == t == 0
263+
@assert size == padded_size == 0
264264
return bytes2hex(SHA.digest!(ctx))
265265
end
266266

@@ -516,16 +516,16 @@ function read_data(
516516
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
517517
tee::IO = devnull,
518518
)::Nothing
519-
t = round_up(size)
520-
while size > 0
521-
n = min(t, length(buf))
522-
r = readbytes!(tar, buf, n)
523-
write(tee, view(buf, 1:r))
524-
r < n && eof(io) && error("premature end of tar file")
525-
size -= write(file, view(buf, 1:min(r, size)))
526-
t -= r
519+
padded_size = round_up(size)
520+
while padded_size > 0
521+
max_read_len = min(padded_size, length(buf))
522+
read_len = readbytes!(tar, buf, max_read_len)
523+
write(tee, view(buf, 1:read_len))
524+
read_len < max_read_len && eof(tar) && error("premature end of tar file")
525+
size -= write(file, view(buf, 1:min(read_len, size)))
526+
padded_size -= read_len
527527
end
528-
@assert size == t == 0
528+
@assert size == padded_size == 0
529529
return
530530
end
531531

test/runtests.jl

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,43 @@
11
include("setup.jl")
22

3+
@testset "ChaosBufferStream" begin
4+
@testset "constant usage" begin
5+
io = BufferStream()
6+
cio = ChaosBufferStream(io; chunksizes=[17], sleepamnts=[0.001])
7+
write(io, rand(UInt8, 30))
8+
close(io)
9+
10+
# Test that data comes out in 17-byte chunks (except for the last)
11+
buff = Array{UInt8}(undef, 30)
12+
t = @elapsed begin
13+
@test readbytes!(cio, buff, 30) == 17
14+
@test readbytes!(cio, buff, 30) == 13
15+
end
16+
@test t >= 0.001
17+
end
18+
19+
@testset "random usage" begin
20+
io = BufferStream()
21+
chunksizes = 5:10
22+
cio = ChaosBufferStream(io; chunksizes=chunksizes, sleepamnts=[0.0])
23+
write(io, rand(UInt8, 3000))
24+
close(io)
25+
26+
buff = Array{UInt8}(undef, 10)
27+
while !eof(cio)
28+
r = readbytes!(cio, buff, 10)
29+
# In normal operation, the chunk size must be one of
30+
# the given chunksizes, but at the end of the stream
31+
# it is allowed to be less.
32+
if !eof(cio)
33+
@test r chunksizes
34+
else
35+
@test r <= maximum(chunksizes)
36+
end
37+
end
38+
end
39+
end
40+
341
@testset "empty tarball" begin
442
dir = mktempdir()
543
tarball = Tar.create(dir)
@@ -430,6 +468,32 @@ end
430468
end
431469
end
432470

471+
@testset "inconvenient stream buffering" begin
472+
# We will try feeding in an adversarial length that used to cause an assertion error
473+
open(tarball, read=true) do io
474+
# This will cause an assertion error because we know the padded space beyond the
475+
# end of the test file content will be larger than 17 bytes, causing the `for`
476+
# loop to exit early, failing the assertion.
477+
@test hash == Tar.tree_hash(ChaosBufferStream(io; chunksizes=[17]); skip_empty=true)
478+
end
479+
480+
# This also affected read_data()
481+
mktempdir() do dir
482+
open(tarball, read=true) do io
483+
Tar.extract(ChaosBufferStream(io; chunksizes=[17]), dir)
484+
check_tree_hash(hash, dir)
485+
end
486+
end
487+
488+
# We also perform a fuzzing test to convince ourselves there are no other errors
489+
# of this type within `Tar.tree_hash()`.
490+
for idx in 1:100
491+
open(tarball, read=true) do io
492+
@test hash == Tar.tree_hash(ChaosBufferStream(io), skip_empty=true)
493+
end
494+
end
495+
end
496+
433497
@testset "with predicate" begin
434498
# generate a version of dir with .skip entries
435499
dir = make_test_dir(true)

test/setup.jl

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
using Test
22
using Random
33
using ArgTools
4+
using SimpleBufferStream
45

56
if !Sys.iswindows()
67
using Tar_jll
@@ -104,7 +105,13 @@ function make_test_dir(gen_skip::Bool=false)
104105
touch(joinpath(dir, "file.skip"))
105106
end
106107
mkdir(joinpath(dir, "dir"))
107-
touch(joinpath(dir, "dir", "file"))
108+
109+
# Create a file that actually has content
110+
fpath = joinpath(dir, "dir", "file")
111+
touch(fpath)
112+
open(fpath, write=true) do io
113+
write(io, rand(UInt8, 1000))
114+
end
108115
mkdir(joinpath(dir, "empty"))
109116
if gen_skip
110117
touch(joinpath(dir, "dir", "file.skip"))
@@ -147,3 +154,33 @@ end
147154
function tar_write_dir(io::IO, path::String, mode::Integer=0o755)
148155
Tar.write_header(io, Tar.Header(path, :directory, mode, 0, ""))
149156
end
157+
158+
"""
159+
ChaosBufferStream(input::IO; chunksizes, sleepamnts)
160+
161+
Acts as a highly-inconvenient BufferStream, allowing tests to purposefully break
162+
up their data flow reads/writes into chunks of random size (taken via `rand(chunksizes)`)
163+
and with a pause of `rand(sleepamnts)` in between each chunk.
164+
165+
Usage example:
166+
167+
t_hash = Tar.tree_hash(ChaosBufferStream(io))
168+
169+
Or slightly more complicated:
170+
171+
http_io = BufferStream()
172+
@async HTTP.get(url; response_stream=http_io)
173+
t_hash = Tar.tree_hash(ChaosBufferStream(http_io; chunksizes=4000:8000, sleepamnts=[1e-3, 2e-3]))
174+
"""
175+
function ChaosBufferStream(input::IO; chunksizes = 1024:2048, sleepamnts = 1e-3:1e-4:5e-3)
176+
output = BufferStream()
177+
@async begin
178+
while !eof(input)
179+
sleep(rand(sleepamnts))
180+
chunk = read(input, rand(chunksizes))
181+
write(output, chunk)
182+
end
183+
close(output)
184+
end
185+
return output
186+
end

0 commit comments

Comments
 (0)