@@ -43,8 +43,9 @@ function extract_tarball(
4343 root:: String ;
4444 buf:: Vector{UInt8} = Vector {UInt8} (undef, DEFAULT_BUFFER_SIZE),
4545 skeleton:: IO = devnull ,
46+ copy_symlinks:: Bool = false ,
4647)
47- read_tarball (predicate, tar; buf= buf, skeleton= skeleton) do hdr, parts
48+ paths = read_tarball (predicate, tar; buf= buf, skeleton= skeleton) do hdr, parts
4849 # get the file system version of the path
4950 sys_path = reduce (joinpath, init= root, parts)
5051 # delete anything that's there already
@@ -60,7 +61,7 @@ function extract_tarball(
6061 if hdr. type == :directory
6162 mkdir (sys_path)
6263 elseif hdr. type == :symlink
63- symlink (hdr. link, sys_path)
64+ copy_symlinks || symlink (hdr. link, sys_path)
6465 elseif hdr. type == :file
6566 read_data (tar, sys_path, size= hdr. size, buf= buf)
6667 # set executable bit if necessary
@@ -74,6 +75,89 @@ function extract_tarball(
7475 error (" unsupported tarball entry type: $(hdr. type) " )
7576 end
7677 end
78+ copy_symlinks || return
79+
80+ # resolve the internal targets of symlinks
81+ for (path, what) in paths
82+ what isa AbstractString || continue
83+ target = link_target (paths, path, what)
84+ paths[path] = something (target, :symlink )
85+ end
86+
87+ # follow chains of symlinks
88+ follow (seen:: Vector , what:: Symbol ) =
89+ what == :symlink ? what : seen[end ]
90+ follow (seen:: Vector , what:: String ) =
91+ what in seen ? :symlink : follow (push! (seen, what), paths[what])
92+ for (path, what) in paths
93+ what isa AbstractString || continue
94+ paths[path] = follow ([path], what)
95+ end
96+
97+ # copies that need to be made
98+ copies = Pair{String,String}[]
99+ for (path, what) in paths
100+ what isa AbstractString || continue
101+ push! (copies, path => what)
102+ end
103+ sort! (copies, by= last)
104+
105+ while ! isempty (copies)
106+ i = 1
107+ while i ≤ length (copies)
108+ path, what = copies[i]
109+ # check if source is complete yet
110+ if any (startswith (p, " $what /" ) for (p, w) in copies)
111+ # `what` is an incomplete directory
112+ # need to wait for source to be complete
113+ i += 1
114+ else
115+ # source complete, can copy now
116+ deleteat! (copies, i)
117+ src = reduce (joinpath, init= root, split (what, ' /' ))
118+ dst = reduce (joinpath, init= root, split (path, ' /' ))
119+ cp (src, dst)
120+ end
121+ end
122+ end
123+ end
124+
125+ # resolve symlink target or nothing if not valid
126+ function link_target (
127+ paths:: Dict{String,Union{String,Symbol}} ,
128+ path:: AbstractString ,
129+ link:: AbstractString ,
130+ )
131+ first (link) == ' /' && return
132+ path_parts = split (path, r" /+" )
133+ link_parts = split (link, r" /+" )
134+ pop! (path_parts)
135+ part = nothing # remember the last part
136+ while ! isempty (link_parts)
137+ part = popfirst! (link_parts)
138+ part in (" " , " ." ) && continue
139+ if part == " .."
140+ isempty (path_parts) && return
141+ pop! (path_parts)
142+ else
143+ push! (path_parts, part)
144+ prefix = join (path_parts, ' /' )
145+ prefix in keys (paths) || return
146+ isempty (link_parts) && break
147+ what = paths[prefix]
148+ if what isa AbstractString
149+ prefix = link_target (paths, prefix, what)
150+ path_parts = split (prefix, ' /' )
151+ end
152+ end
153+ end
154+ isempty (path_parts) && return
155+ target = join (path_parts, ' /' )
156+ # if link ends in `/` or `.` target must be a directory
157+ part in (" " , " ." ) && paths[target] != :directory && return
158+ # can't copy a circular link to a prefix of itself
159+ (path == target || startswith (path, " $target /" )) && return
160+ return target
77161end
78162
79163function git_tree_hash (
@@ -210,8 +294,9 @@ function read_tarball(
210294 skeleton:: IO = devnull ,
211295)
212296 write_skeleton_header (skeleton, buf= buf)
297+ # symbols for path types except symlinks store the link
298+ paths = Dict {String,Union{Symbol,String}} ()
213299 globals = Dict {String,String} ()
214- links = Set {String} ()
215300 while ! eof (tar)
216301 hdr = read_header (tar, globals, buf= buf, tee= skeleton)
217302 hdr === nothing && break
@@ -226,18 +311,15 @@ function read_tarball(
226311 for part in split (hdr. path, ' /' )
227312 (isempty (part) || part == " ." ) && continue
228313 # check_header doesn't allow ".." in path
229- path in links && error ("""
314+ get (paths, path, nothing ) isa String && error ("""
230315 Refusing to extract path with symlink prefix, possible attack
316+ * path to extract: $(repr (hdr. path))
231317 * symlink prefix: $(repr (path))
232- * extracted path: $(repr (hdr. path))
233318 """ )
319+ isempty (path) || (paths[path] = :directory )
234320 path = isempty (path) ? part : " $path /$part "
235321 end
236- if hdr. type == :symlink
237- push! (links, path)
238- else
239- delete! (links, path)
240- end
322+ paths[path] = hdr. type == :symlink ? hdr. link : hdr. type
241323 before = applicable (position, tar) ? position (tar) : 0
242324 callback (hdr, split (path, ' /' , keepempty= false ))
243325 applicable (position, tar) || continue
@@ -246,6 +328,7 @@ function read_tarball(
246328 advanced == expected ||
247329 error (" callback read $advanced bytes instead of $expected " )
248330 end
331+ return paths
249332end
250333
251334function read_header (
0 commit comments