From 5cd8e51285b4b6086cc4ff6a05be39a52bce979b Mon Sep 17 00:00:00 2001 From: Juli Tera Date: Fri, 8 Aug 2025 13:15:39 -0700 Subject: [PATCH 01/27] Add prototype --- .../lib/aws-sdk-s3/customizations.rb | 5 +- .../lib/aws-sdk-s3/directory_uploader.rb | 102 ++++++++++++++++++ .../lib/aws-sdk-s3/transfer_manager.rb | 61 +++++++++++ 3 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 gems/aws-sdk-s3/lib/aws-sdk-s3/directory_uploader.rb create mode 100644 gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations.rb index e21e4bc6bf9..b991f74e764 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations.rb @@ -24,8 +24,11 @@ module S3 autoload :ExpressCredentialsProvider, 'aws-sdk-s3/express_credentials_provider' # s3 access grants auth - autoload :AccessGrantsCredentials, 'aws-sdk-s3/access_grants_credentials' autoload :AccessGrantsCredentialsProvider, 'aws-sdk-s3/access_grants_credentials_provider' + + # testing transfer manager + autoload :DirectoryUploader, 'aws-sdk-s3/directory_uploader' + autoload :TransferManager, 'aws-sdk-s3/transfer_manager' end end diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/directory_uploader.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/directory_uploader.rb new file mode 100644 index 00000000000..2b6e95937a2 --- /dev/null +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/directory_uploader.rb @@ -0,0 +1,102 @@ +# frozen_string_literal: true + +require 'find' +require 'set' +require 'thread' + +module Aws + module S3 + # @api private + class DirectoryUploader + def initialize(options = {}) + @client = options[:client] || Client.new + @thread_count = options[:thread_count] || 10 + end + + # @return [Client] + attr_reader :client + + def upload(source, options = {}) + raise ArgumentError, 'Invalid directory' unless Dir.exist?(source) + + upload_opts = options.dup + @source = source + @recursive = upload_opts.delete(:recursive) || false + @follow_symlinks = upload_opts.delete(:follow_symlinks) || false + @s3_prefix = upload_opts.delete(:s3_prefix) || nil + @s3_delimiter = upload_opts.delete(:s3_delimiter) || '/' + @filter_callback = upload_opts.delete(:filter_callback) || nil + + uploader = FileUploader.new(multipart_threshold: upload_opts.delete(:multipart_threshold), client: @client) + + queue = SizedQueue.new(5) # TODO: random number + @disable_queue = false + _producer = Thread.new do + if @recursive + stream_recursive_files(queue) + else + stream_direct_files(queue) + end + @thread_count.times { queue << :done } + end + + threads = [] + @thread_count.times do + thread = Thread.new do + while (file = queue.shift) != :done + path = File.join(@source, file) + # TODO: key to consider s3_prefix and custom delimiter + uploader.upload(path, upload_opts.merge(key: file)) + end + nil + rescue StandardError => e # TODO: handle failure policies + @disable_queue = true + queue.clear + raise e + end + threads << thread + end + threads.map(&:value).compact + end + + private + + def stream_recursive_files(queue) + visited = Set.new + # TODO: add filter callback + Find.find(@source) do |p| + break if @disable_queue + + if !@follow_symlinks && File.symlink?(p) + Find.prune + next + end + + absolute_path = File.realpath(p) + if visited.include?(absolute_path) + Find.prune + next + end + + visited << absolute_path + + # TODO: if non-default s3_delimiter is used, validate here and fail + queue << p.sub(%r{^#{Regexp.escape(@source)}/}, '') if File.file?(p) + end + end + + def stream_direct_files(queue) + # TODO: add filter callback4 + Dir.each_child(@source) do |entry| + break if @disable_queue + + path = File.join(@source, entry) + next if !@follow_symlinks && File.symlink?(path) + + # TODO: if non-default s3_delimiter is used, validate here and fail + queue << entry if File.file?(path) + end + end + end + end +end diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb new file mode 100644 index 00000000000..7dc6669d9f3 --- /dev/null +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb @@ -0,0 +1,61 @@ +# frozen_string_literal: true + +module Aws + module S3 + # A high-level S3 transfer utility that provides enhanced upload and download + # capabilities with automatic multipart handling, progress tracking, and + # handling of large files. The following features are supported: + # + # * upload a S3 object with multipart upload + # * download a S3 object with multipart download + # * track transfer progress by using progress listener + class TransferManager + def initialize(options = {}) + @client = options[:client] || Client.new + end + + attr_reader :client + + def upload_file(source, options = {}) + uploading_options = options.dup + uploader = FileUploader.new(multipart_threshold: uploading_options.delete(:multipart_threshold), client: @client) + # TODO: wrap with user-agent metric tracking + response = uploader.upload(source, uploading_options) + yield response if block_given? + true + end + + def upload_stream(options = {}, &block) + uploading_options = options.dup + uploader = MultipartStreamUploader.new( + client: @client, + thread_count: uploading_options.delete(:thread_count), + tempfile: uploading_options.delete(:tempfile), + part_size: uploading_options.delete(:part_size) + ) + # TODO: wrap with user-agent metric tracking + uploader.upload(uploading_options, &block) + true + end + + def download_file(destination, options = {}) + downloader = FileDownloader.new(client: @client) + # TODO: wrap with user-agent metric tracking + downloader.download(destination, options) + true + end + + def upload_directory(source, options = {}) + upload_directory_opts = options.dup + directory_uploader = DirectoryUploader.new( + client: @client, + thread_count: upload_directory_opts.delete(:thread_count) + ) + directory_uploader.upload(source, upload_directory_opts) + true # TODO: need to change depending on failure policy set + end + + def download_directory(source, options = {}); end + end + end +end From fa34176e13399556c380b4df6b6e1676fe7a1737 Mon Sep 17 00:00:00 2001 From: Juli Tera Date: Mon, 11 Aug 2025 08:18:22 -0700 Subject: [PATCH 02/27] Add transfer manager interface --- .../lib/aws-sdk-s3/customizations.rb | 2 +- .../lib/aws-sdk-s3/transfer_manager.rb | 52 +++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations.rb index e21e4bc6bf9..c0dba64b79c 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations.rb @@ -18,13 +18,13 @@ module S3 autoload :ObjectMultipartCopier, 'aws-sdk-s3/object_multipart_copier' autoload :PresignedPost, 'aws-sdk-s3/presigned_post' autoload :Presigner, 'aws-sdk-s3/presigner' + autoload :TransferManager, 'aws-sdk-s3/transfer_manager' # s3 express session auth autoload :ExpressCredentials, 'aws-sdk-s3/express_credentials' autoload :ExpressCredentialsProvider, 'aws-sdk-s3/express_credentials_provider' # s3 access grants auth - autoload :AccessGrantsCredentials, 'aws-sdk-s3/access_grants_credentials' autoload :AccessGrantsCredentialsProvider, 'aws-sdk-s3/access_grants_credentials_provider' end diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb new file mode 100644 index 00000000000..e16106a61b7 --- /dev/null +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: true + +module Aws + module S3 + # A high-level S3 transfer utility that provides enhanced upload and download + # capabilities with automatic multipart handling, progress tracking, and + # handling of large files. The following features are supported: + # + # * upload a S3 object with multipart upload + # * download a S3 object with multipart download + # * track transfer progress by using progress listener + class TransferManager + def initialize(options = {}) + @client = options.delete(:client) || Client.new + end + + attr_reader :client + + def upload_file(source, options = {}) + uploading_options = options.dup + uploader = FileUploader.new( + multipart_threshold: uploading_options.delete(:multipart_threshold), + client: @client + ) + # TODO: wrap with user-agent metric tracking + response = uploader.upload(source, uploading_options) + yield response if block_given? + true + end + + def upload_stream(options = {}, &block) + uploading_options = options.dup + uploader = MultipartStreamUploader.new( + client: @client, + thread_count: uploading_options.delete(:thread_count), + tempfile: uploading_options.delete(:tempfile), + part_size: uploading_options.delete(:part_size) + ) + # TODO: wrap with user-agent metric tracking + uploader.upload(uploading_options, &block) + true + end + + def download_file(destination, options = {}) + downloader = FileDownloader.new(client: @client) + # TODO: wrap with user-agent metric tracking + downloader.download(destination, options) + true + end + end + end +end From 1a065f716fd1fbb094d5e0c9c568260219710c69 Mon Sep 17 00:00:00 2001 From: Juli Tera Date: Mon, 11 Aug 2025 08:54:47 -0700 Subject: [PATCH 03/27] Refactor MultipartStreamUploader --- .../aws-sdk-s3/multipart_stream_uploader.rb | 135 ++++++++---------- 1 file changed, 61 insertions(+), 74 deletions(-) diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb index dccd224a22f..4120d733613 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb @@ -12,30 +12,21 @@ class MultipartStreamUploader # api private PART_SIZE = 5 * 1024 * 1024 # 5MB - # api private - THREAD_COUNT = 10 - - # api private - TEMPFILE_PREIX = 'aws-sdk-s3-upload_stream'.freeze - # @api private - CREATE_OPTIONS = - Set.new(Client.api.operation(:create_multipart_upload).input.shape.member_names) + CREATE_OPTIONS = Set.new(Client.api.operation(:create_multipart_upload).input.shape.member_names) # @api private - UPLOAD_PART_OPTIONS = - Set.new(Client.api.operation(:upload_part).input.shape.member_names) + UPLOAD_PART_OPTIONS = Set.new(Client.api.operation(:upload_part).input.shape.member_names) # @api private - COMPLETE_UPLOAD_OPTIONS = - Set.new(Client.api.operation(:complete_multipart_upload).input.shape.member_names) + COMPLETE_UPLOAD_OPTIONS = Set.new(Client.api.operation(:complete_multipart_upload).input.shape.member_names) # @option options [Client] :client def initialize(options = {}) @client = options[:client] || Client.new @tempfile = options[:tempfile] @part_size = options[:part_size] || PART_SIZE - @thread_count = options[:thread_count] || THREAD_COUNT + @thread_count = options[:thread_count] || 10 end # @return [Client] @@ -43,7 +34,7 @@ def initialize(options = {}) # @option options [required,String] :bucket # @option options [required,String] :key - # @option options [Integer] :thread_count (THREAD_COUNT) + # @option options [Integer] :thread_count (10) # @return [Seahorse::Client::Response] - the CompleteMultipartUploadResponse def upload(options = {}, &block) Aws::Plugins::UserAgent.metric('S3_TRANSFER') do @@ -61,10 +52,7 @@ def initiate_upload(options) def complete_upload(upload_id, parts, options) @client.complete_multipart_upload( - **complete_opts(options).merge( - upload_id: upload_id, - multipart_upload: { parts: parts } - ) + **complete_opts(options).merge(upload_id: upload_id, multipart_upload: { parts: parts }) ) end @@ -76,7 +64,8 @@ def upload_parts(upload_id, options, &block) threads = upload_in_threads( read_pipe, completed, upload_part_opts(options).merge(upload_id: upload_id), - thread_errors) + thread_errors + ) begin block.call(write_pipe) ensure @@ -85,7 +74,7 @@ def upload_parts(upload_id, options, &block) end threads.map(&:value).compact end - rescue => e + rescue StandardError => e thread_errors + [e] end @@ -97,50 +86,44 @@ def upload_parts(upload_id, options, &block) end def abort_upload(upload_id, options, errors) - @client.abort_multipart_upload( - bucket: options[:bucket], - key: options[:key], - upload_id: upload_id - ) + @client.abort_multipart_upload(bucket: options[:bucket], key: options[:key], upload_id: upload_id) msg = "multipart upload failed: #{errors.map(&:message).join('; ')}" raise MultipartUploadError.new(msg, errors) - rescue MultipartUploadError => error - raise error - rescue => error - msg = "failed to abort multipart upload: #{error.message}. "\ + rescue MultipartUploadError => e + raise e + rescue StandardError => e + msg = "failed to abort multipart upload: #{e.message}. "\ "Multipart upload failed: #{errors.map(&:message).join('; ')}" - raise MultipartUploadError.new(msg, errors + [error]) + raise MultipartUploadError.new(msg, errors + [e]) end def create_opts(options) - CREATE_OPTIONS.inject({}) do |hash, key| + CREATE_OPTIONS.each_with_object({}) do |key, hash| hash[key] = options[key] if options.key?(key) - hash end end def upload_part_opts(options) - UPLOAD_PART_OPTIONS.inject({}) do |hash, key| + UPLOAD_PART_OPTIONS.each_with_object({}) do |key, hash| hash[key] = options[key] if options.key?(key) - hash end end def complete_opts(options) - COMPLETE_UPLOAD_OPTIONS.inject({}) do |hash, key| + COMPLETE_UPLOAD_OPTIONS.each_with_object({}) do |key, hash| hash[key] = options[key] if options.key?(key) - hash end end def read_to_part_body(read_pipe) return if read_pipe.closed? - temp_io = @tempfile ? Tempfile.new(TEMPFILE_PREIX) : StringIO.new(String.new) + + temp_io = @tempfile ? Tempfile.new('aws-sdk-s3-upload_stream') : StringIO.new(String.new) temp_io.binmode bytes_copied = IO.copy_stream(read_pipe, temp_io, @part_size) temp_io.rewind - if bytes_copied == 0 - if Tempfile === temp_io + if bytes_copied.zero? + if temp_io.is_a?(Tempfile) temp_io.close temp_io.unlink end @@ -155,48 +138,52 @@ def upload_in_threads(read_pipe, completed, options, thread_errors) part_number = 0 options.fetch(:thread_count, @thread_count).times.map do thread = Thread.new do - begin - loop do - body, thread_part_number = mutex.synchronize do - [read_to_part_body(read_pipe), part_number += 1] - end - break unless (body || thread_part_number == 1) - begin - part = options.merge( - body: body, - part_number: thread_part_number, - ) - resp = @client.upload_part(part) - completed_part = {etag: resp.etag, part_number: part[:part_number]} - - # get the requested checksum from the response - if part[:checksum_algorithm] - k = "checksum_#{part[:checksum_algorithm].downcase}".to_sym - completed_part[k] = resp[k] - end - completed.push(completed_part) - ensure - if Tempfile === body - body.close - body.unlink - elsif StringIO === body - body.string.clear - end - end + loop do + body, thread_part_number = mutex.synchronize do + [read_to_part_body(read_pipe), part_number += 1] end - nil - rescue => error - # keep other threads from uploading other parts - mutex.synchronize do - thread_errors.push(error) - read_pipe.close_read unless read_pipe.closed? + break unless body || thread_part_number == 1 + + begin + part = options.merge(body: body, part_number: thread_part_number) + resp = @client.upload_part(part) + completed_part = create_completed_part(resp, part) + completed.push(completed_part) + ensure + clear_body(body) end - error end + nil + rescue StandardError => e + # keep other threads from uploading other parts + mutex.synchronize do + thread_errors.push(e) + read_pipe.close_read unless read_pipe.closed? + end + e end thread end end + + def create_completed_part(resp, part) + completed_part = { etag: resp.etag, part_number: part[:part_number] } + return completed_part unless part[:checksum_algorithm] + + # get the requested checksum from the response + k = "checksum_#{part[:checksum_algorithm].downcase}".to_sym + completed_part[k] = resp[k] + completed_part + end + + def clear_body(body) + if body.is_a?(Tempfile) + body.close + body.unlink + elsif body.is_a?(StringIO) + body.string.clear + end + end end end end From 60ce6760f5a103389ee115a196ccc64cb5aa25ec Mon Sep 17 00:00:00 2001 From: Juli Tera Date: Mon, 11 Aug 2025 09:21:38 -0700 Subject: [PATCH 04/27] Add abort upload when complete upload fails in MultipartStreamUploader --- .../lib/aws-sdk-s3/multipart_stream_uploader.rb | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb index 4120d733613..1be4ef5a764 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb @@ -54,6 +54,8 @@ def complete_upload(upload_id, parts, options) @client.complete_multipart_upload( **complete_opts(options).merge(upload_id: upload_id, multipart_upload: { parts: parts }) ) + rescue StandardError => e + abort_upload(upload_id, options, [e]) end def upload_parts(upload_id, options, &block) @@ -62,7 +64,8 @@ def upload_parts(upload_id, options, &block) errors = begin IO.pipe do |read_pipe, write_pipe| threads = upload_in_threads( - read_pipe, completed, + read_pipe, + completed, upload_part_opts(options).merge(upload_id: upload_id), thread_errors ) @@ -77,12 +80,9 @@ def upload_parts(upload_id, options, &block) rescue StandardError => e thread_errors + [e] end + return ordered_parts(completed) if errors.empty? - if errors.empty? - Array.new(completed.size) { completed.pop }.sort_by { |part| part[:part_number] } - else - abort_upload(upload_id, options, errors) - end + abort_upload(upload_id, options, errors) end def abort_upload(upload_id, options, errors) @@ -176,6 +176,10 @@ def create_completed_part(resp, part) completed_part end + def ordered_parts(parts) + parts.size.times.map { parts.pop }.sort_by { |part| part[:part_number] } + end + def clear_body(body) if body.is_a?(Tempfile) body.close From 5cef0784a211d96f4bfd7775cc2da2e068b5e921 Mon Sep 17 00:00:00 2001 From: Juli Tera Date: Mon, 11 Aug 2025 10:59:24 -0700 Subject: [PATCH 05/27] Add TM docs --- .../lib/aws-sdk-s3/transfer_manager.rb | 206 +++++++++++++++++- 1 file changed, 200 insertions(+), 6 deletions(-) diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb index e16106a61b7..af531b21188 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb @@ -10,25 +10,149 @@ module S3 # * download a S3 object with multipart download # * track transfer progress by using progress listener class TransferManager + + # @param [Hash] options + # @option options [S3::Client] :client (S3::Client.new) + # The S3 client to use for {TransferManager} operations. If not provided, a new default client + # will be created automatically. def initialize(options = {}) @client = options.delete(:client) || Client.new end + # @return [S3::Client] attr_reader :client - def upload_file(source, options = {}) + # Uploads a file from disk to S3. + # + # # a small file are uploaded with PutObject API + # tm = TransferManager.new + # tm.upload_file('/path/to/small_file', 'example-bucket', 'example-key') + # + # Files larger than or equal to `:multipart_threshold` are uploaded using multipart upload APIs. + # + # # large files are automatically split into parts and the parts are uploaded in parallel + # tm.upload_file('/path/to/large_file', 'example-bucket', 'example-key') + # + # The response of the S3 upload API is yielded if a block given. + # + # # API response will have etag value of the file + # tm.upload_file('/path/to/file', 'example-bucket', 'example-key') do |response| + # etag = response.etag + # end + # + # You can provide a callback to monitor progress of the upload: + # + # # bytes and totals are each an array with 1 entry per part + # progress = proc do |bytes, totals| + # bytes.map.with_index do |b, i| + # puts "Part #{i+1}: #{b} / #{totals[i]}".join(' ') + "Total: #{100.0 * bytes.sum / totals.sum }%" + # end + # end + # tm.upload_file('/path/to/file', 'example-bucket', 'example-key', progress_callback: progress) + # + # @param [String, Pathname, File, Tempfile] source + # A file on the local file system that will be uploaded. This can either be a `String` or `Pathname` to the + # file, an open `File` object, or an open `Tempfile` object. If you pass an open `File` or `Tempfile` object, + # then you are responsible for closing it after the upload completes. When using an open Tempfile, rewind it + # before uploading or else the object will be empty. + # + # @param [String] bucket + # The name of the S3 bucket to upload to. + # + # @param [String] key + # The object key name for the uploaded file. + # + # @param [Hash] options + # Additional options for {Client#put_object} when file sizes below the multipart threshold. + # For files larger than the multipart threshold, options for {Client#create_multipart_upload}, + # {Client#complete_multipart_upload}, and {Client#upload_part} can be provided. + # + # @option options [Integer] :multipart_threshold (104857600) + # Files larger han or equal to `:multipart_threshold` are uploaded using the S3 multipart upload APIs. + # Default threshold is `100MB`. + # + # @option options [Integer] :thread_count (10) + # The number of parallel multipart uploads. This option is not used if the file is smaller than + # `:multipart_threshold`. + # + # @option options [Proc] :progress_callback (nil) + # A Proc that will be called when each chunk of the upload is sent. + # It will be invoked with `[bytes_read]` and `[total_sizes]`. + # + # @raise [MultipartUploadError] If an file is being uploaded in parts, and the upload can not be completed, + # then the upload is aborted and this error is raised. The raised error has a `#errors` method that + # returns the failures that caused the upload to be aborted. + # + # @return [Boolean] Returns `true` when the file is uploaded without any errors. + # + # @see Client#put_object + # @see Client#create_multipart_upload + # @see Client#complete_multipart_upload + # @see Client#upload_part + def upload_file(source, bucket, key, options = {}) uploading_options = options.dup uploader = FileUploader.new( multipart_threshold: uploading_options.delete(:multipart_threshold), client: @client ) # TODO: wrap with user-agent metric tracking - response = uploader.upload(source, uploading_options) + response = uploader.upload(source, uploading_options.merge(bucket: bucket, key: key)) yield response if block_given? true end - def upload_stream(options = {}, &block) + # Uploads a stream in a streaming fashion to S3. + # + # Passed chunks automatically split into multipart upload parts and the parts are uploaded in parallel. + # This allows for streaming uploads that never touch the disk. + # + # **Note**: There are known issues in JRuby until jruby-9.1.15.0, so avoid using this with older JRuby versions. + # + # @example Streaming chunks of data + # tm = TransferManager.new + # tm.upload_stream('example-bucket', 'example-key') do |write_stream| + # 10.times { write_stream << 'foo' } + # end + # @example Streaming chunks of data + # tm.upload_stream('example-bucket', 'example-key') do |write_stream| + # IO.copy_stream(IO.popen('ls'), write_stream) + # end + # @example Streaming chunks of data + # tm.upload_stream('example-bucket', 'example-key') do |write_stream| + # IO.copy_stream(STDIN, write_stream) + # end + # + # @param [String] bucket + # The name of the S3 bucket to upload to. + # + # @param [String] key + # The object key name for the uploaded file. + # + # @param [Hash] options + # Additional options for {Client#create_multipart_upload}, {Client#complete_multipart_upload}, and + # {Client#upload_part} can be provided. + # + # @option options [Integer] :thread_count (10) + # The number of parallel multipart uploads. + # + # @option options [Boolean] :tempfile (false) + # Normally read data is stored in memory when building the parts in order to complete the underlying + # multipart upload. By passing `:tempfile => true`, the data read will be temporarily stored on disk reducing + # the memory footprint vastly. + # + # @option options [Integer] :part_size (5242880) + # Define how big each part size but the last should be. Default `:part_size` is `5 * 1024 * 1024`. + # + # @raise [MultipartUploadError] If an object is being uploaded in parts, and the upload can not be completed, + # then the upload is aborted and this error is raised. The raised error has a `#errors` method that returns + # the failures that caused the upload to be aborted. + # + # @return [Boolean] Returns `true` when the object is uploaded without any errors. + # + # @see Client#create_multipart_upload + # @see Client#complete_multipart_upload + # @see Client#upload_part + def upload_stream(bucket, key, options = {}, &block) uploading_options = options.dup uploader = MultipartStreamUploader.new( client: @client, @@ -37,14 +161,84 @@ def upload_stream(options = {}, &block) part_size: uploading_options.delete(:part_size) ) # TODO: wrap with user-agent metric tracking - uploader.upload(uploading_options, &block) + uploader.upload(uploading_options.merge(bucket: bucket, key: key), &block) true end - def download_file(destination, options = {}) + # Downloads a file in S3 to a path on disk. + # + # # small files (< 5MB) are downloaded in a single API call + # tm = TransferManager.new + # tm.download_file('/path/to/file', 'bucket-name', 'key-name') + # + # Files larger than 5MB are downloaded using multipart method: + # + # # large files are split into parts and the parts are downloaded in parallel + # tm.download_file('/path/to/large_file', 'bucket-name', 'key-name') + # + # You can provide a callback to monitor progress of the download: + # + # # bytes and part_sizes are each an array with 1 entry per part + # # part_sizes may not be known until the first bytes are retrieved + # progress = proc do |bytes, part_sizes, file_size| + # bytes.map.with_index do |b, i| + # puts "Part #{i + 1}: #{b} / #{part_sizes[i]}".join(' ') + "Total: #{100.0 * bytes.sum / file_size}%" + # end + # end + # obj.download_file('/path/to/file', 'bucket-name', 'key-name', progress_callback: progress) + # + # @param [String] destination + # Where to download the file to. + # + # @param [String] bucket + # The name of the S3 bucket to upload to. + # + # @param [String] key + # The object key name for the uploaded file. + # + # @param [Hash] options + # Additional options for {Client#get_object} and #{Client#head_object} may be provided. + # + # @option options [String] :mode ("auto") `"auto"`, `"single_request"` or `"get_range"` + # + # * `"auto"` mode is enabled by default, which performs `multipart_download` + # * `"single_request`" mode forces only 1 GET request is made in download + # * `"get_range"` mode requires `:chunk_size` parameter to configured in customizing each range size + # + # @option options [Integer] :chunk_size required in `"get_range"` mode. + # + # @option options [Integer] :thread_count (10) Customize threads used in the multipart download. + # + # @option options [String] :version_id The object version id used to retrieve the object. + # + # @see https://docs.aws.amazon.com/AmazonS3/latest/dev/ObjectVersioning.html ObjectVersioning + # + # @option options [String] :checksum_mode ("ENABLED") + # When `"ENABLED"` and the object has a stored checksum, it will be used to validate the download and will + # raise an `Aws::Errors::ChecksumError` if checksum validation fails. You may provide a `on_checksum_validated` + # callback if you need to verify that validation occurred and which algorithm was used. + # To disable checksum validation, set `checksum_mode` to `"DISABLED"`. + # + # @option options [Callable] :on_checksum_validated + # Called each time a request's checksum is validated with the checksum algorithm and the + # response. For multipart downloads, this will be called for each part that is downloaded and validated. + # + # @option options [Proc] :progress_callback + # A Proc that will be called when each chunk of the download is received. It will be invoked with + # `bytes_read`, `part_sizes`, `file_size`. When the object is downloaded as parts (rather than by ranges), + # the `part_sizes` will not be known ahead of time and will be `nil` in the callback until the first bytes + # in the part are received. + # + # @raise [MultipartDownloadError] Raised when an object validation fails outside of service errors. + # + # @return [Boolean] Returns `true` when the file is downloaded without any errors. + # + # @see Client#get_object + # @see Client#head_object + def download_file(destination, bucket, key, options = {}) downloader = FileDownloader.new(client: @client) # TODO: wrap with user-agent metric tracking - downloader.download(destination, options) + downloader.download(destination, options.merge(bucket: bucket, key: key)) true end end From 9fb8f89d3db549d737ad09f77937d96bda88d5d2 Mon Sep 17 00:00:00 2001 From: Juli Tera Date: Tue, 12 Aug 2025 08:38:47 -0700 Subject: [PATCH 06/27] Update docs and syntax --- .../lib/aws-sdk-s3/transfer_manager.rb | 175 +++++++++--------- 1 file changed, 87 insertions(+), 88 deletions(-) diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb index af531b21188..a51245d56c4 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb @@ -10,7 +10,6 @@ module S3 # * download a S3 object with multipart download # * track transfer progress by using progress listener class TransferManager - # @param [Hash] options # @option options [S3::Client] :client (S3::Client.new) # The S3 client to use for {TransferManager} operations. If not provided, a new default client @@ -22,21 +21,98 @@ def initialize(options = {}) # @return [S3::Client] attr_reader :client + # Downloads a file in S3 to a path on disk. + # + # # small files (< 5MB) are downloaded in a single API call + # tm = TransferManager.new + # tm.download_file('/path/to/file', bucket: 'bucket-name', key: 'key-name') + # + # Files larger than 5MB are downloaded using multipart method: + # + # # large files are split into parts and the parts are downloaded in parallel + # tm.download_file('/path/to/large_file', bucket: 'bucket-name', key: 'key-name') + # + # You can provide a callback to monitor progress of the download: + # + # # bytes and part_sizes are each an array with 1 entry per part + # # part_sizes may not be known until the first bytes are retrieved + # progress = proc do |bytes, part_sizes, file_size| + # bytes.map.with_index do |b, i| + # puts "Part #{i + 1}: #{b} / #{part_sizes[i]}".join(' ') + "Total: #{100.0 * bytes.sum / file_size}%" + # end + # end + # tm.download_file('/path/to/file', bucket: 'bucket-name', key: 'key-name', progress_callback: progress) + # + # @param [String] destination + # Where to download the file to. + # + # @param [String] bucket + # The name of the S3 bucket to upload to. + # + # @param [String] key + # The object key name in S3 bucket. + # + # @param [Hash] options + # Additional options for {Client#get_object} and #{Client#head_object} may be provided. + # + # @option options [String] :mode ("auto") `"auto"`, `"single_request"` or `"get_range"` + # + # * `"auto"` mode is enabled by default, which performs `multipart_download` + # * `"single_request`" mode forces only 1 GET request is made in download + # * `"get_range"` mode requires `:chunk_size` parameter to configured in customizing each range size + # + # @option options [Integer] :chunk_size required in `"get_range"` mode. + # + # @option options [Integer] :thread_count (10) Customize threads used in the multipart download. + # + # @option options [String] :version_id The object version id used to retrieve the object. + # + # @see https://docs.aws.amazon.com/AmazonS3/latest/dev/ObjectVersioning.html ObjectVersioning + # + # @option options [String] :checksum_mode ("ENABLED") + # When `"ENABLED"` and the object has a stored checksum, it will be used to validate the download and will + # raise an `Aws::Errors::ChecksumError` if checksum validation fails. You may provide a `on_checksum_validated` + # callback if you need to verify that validation occurred and which algorithm was used. + # To disable checksum validation, set `checksum_mode` to `"DISABLED"`. + # + # @option options [Callable] :on_checksum_validated + # Called each time a request's checksum is validated with the checksum algorithm and the + # response. For multipart downloads, this will be called for each part that is downloaded and validated. + # + # @option options [Proc] :progress_callback + # A Proc that will be called when each chunk of the download is received. It will be invoked with + # `bytes_read`, `part_sizes`, `file_size`. When the object is downloaded as parts (rather than by ranges), + # the `part_sizes` will not be known ahead of time and will be `nil` in the callback until the first bytes + # in the part are received. + # + # @raise [MultipartDownloadError] Raised when an object validation fails outside of service errors. + # + # @return [Boolean] Returns `true` when the file is downloaded without any errors. + # + # @see Client#get_object + # @see Client#head_object + def download_file(destination, bucket:, key:, **options) + downloader = FileDownloader.new(client: @client) + # TODO: wrap with user-agent metric tracking + downloader.download(destination, options.merge(bucket: bucket, key: key)) + true + end + # Uploads a file from disk to S3. # # # a small file are uploaded with PutObject API # tm = TransferManager.new - # tm.upload_file('/path/to/small_file', 'example-bucket', 'example-key') + # tm.upload_file('/path/to/small_file', bucket: 'bucket-name', key: 'key-name') # # Files larger than or equal to `:multipart_threshold` are uploaded using multipart upload APIs. # # # large files are automatically split into parts and the parts are uploaded in parallel - # tm.upload_file('/path/to/large_file', 'example-bucket', 'example-key') + # tm.upload_file('/path/to/large_file', bucket: 'bucket-name', key: 'key-name') # # The response of the S3 upload API is yielded if a block given. # # # API response will have etag value of the file - # tm.upload_file('/path/to/file', 'example-bucket', 'example-key') do |response| + # tm.upload_file('/path/to/file', bucket: 'bucket-name', key: 'key-name') do |response| # etag = response.etag # end # @@ -45,10 +121,10 @@ def initialize(options = {}) # # bytes and totals are each an array with 1 entry per part # progress = proc do |bytes, totals| # bytes.map.with_index do |b, i| - # puts "Part #{i+1}: #{b} / #{totals[i]}".join(' ') + "Total: #{100.0 * bytes.sum / totals.sum }%" + # puts "Part #{i + 1}: #{b} / #{totals[i]} " + "Total: #{100.0 * bytes.sum / totals.sum}%" # end # end - # tm.upload_file('/path/to/file', 'example-bucket', 'example-key', progress_callback: progress) + # tm.upload_file('/path/to/file', bucket: 'bucket-name', key: 'key-name', progress_callback: progress) # # @param [String, Pathname, File, Tempfile] source # A file on the local file system that will be uploaded. This can either be a `String` or `Pathname` to the @@ -89,7 +165,7 @@ def initialize(options = {}) # @see Client#create_multipart_upload # @see Client#complete_multipart_upload # @see Client#upload_part - def upload_file(source, bucket, key, options = {}) + def upload_file(source, bucket:, key:, **options) uploading_options = options.dup uploader = FileUploader.new( multipart_threshold: uploading_options.delete(:multipart_threshold), @@ -110,15 +186,15 @@ def upload_file(source, bucket, key, options = {}) # # @example Streaming chunks of data # tm = TransferManager.new - # tm.upload_stream('example-bucket', 'example-key') do |write_stream| + # tm.upload_stream(bucket: 'example-bucket', key: 'example-key') do |write_stream| # 10.times { write_stream << 'foo' } # end # @example Streaming chunks of data - # tm.upload_stream('example-bucket', 'example-key') do |write_stream| + # tm.upload_stream(bucket: 'example-bucket', key: 'example-key') do |write_stream| # IO.copy_stream(IO.popen('ls'), write_stream) # end # @example Streaming chunks of data - # tm.upload_stream('example-bucket', 'example-key') do |write_stream| + # tm.upload_stream(bucket: 'example-bucket', key: 'example-key') do |write_stream| # IO.copy_stream(STDIN, write_stream) # end # @@ -152,7 +228,7 @@ def upload_file(source, bucket, key, options = {}) # @see Client#create_multipart_upload # @see Client#complete_multipart_upload # @see Client#upload_part - def upload_stream(bucket, key, options = {}, &block) + def upload_stream(bucket:, key:, **options, &block) uploading_options = options.dup uploader = MultipartStreamUploader.new( client: @client, @@ -164,83 +240,6 @@ def upload_stream(bucket, key, options = {}, &block) uploader.upload(uploading_options.merge(bucket: bucket, key: key), &block) true end - - # Downloads a file in S3 to a path on disk. - # - # # small files (< 5MB) are downloaded in a single API call - # tm = TransferManager.new - # tm.download_file('/path/to/file', 'bucket-name', 'key-name') - # - # Files larger than 5MB are downloaded using multipart method: - # - # # large files are split into parts and the parts are downloaded in parallel - # tm.download_file('/path/to/large_file', 'bucket-name', 'key-name') - # - # You can provide a callback to monitor progress of the download: - # - # # bytes and part_sizes are each an array with 1 entry per part - # # part_sizes may not be known until the first bytes are retrieved - # progress = proc do |bytes, part_sizes, file_size| - # bytes.map.with_index do |b, i| - # puts "Part #{i + 1}: #{b} / #{part_sizes[i]}".join(' ') + "Total: #{100.0 * bytes.sum / file_size}%" - # end - # end - # obj.download_file('/path/to/file', 'bucket-name', 'key-name', progress_callback: progress) - # - # @param [String] destination - # Where to download the file to. - # - # @param [String] bucket - # The name of the S3 bucket to upload to. - # - # @param [String] key - # The object key name for the uploaded file. - # - # @param [Hash] options - # Additional options for {Client#get_object} and #{Client#head_object} may be provided. - # - # @option options [String] :mode ("auto") `"auto"`, `"single_request"` or `"get_range"` - # - # * `"auto"` mode is enabled by default, which performs `multipart_download` - # * `"single_request`" mode forces only 1 GET request is made in download - # * `"get_range"` mode requires `:chunk_size` parameter to configured in customizing each range size - # - # @option options [Integer] :chunk_size required in `"get_range"` mode. - # - # @option options [Integer] :thread_count (10) Customize threads used in the multipart download. - # - # @option options [String] :version_id The object version id used to retrieve the object. - # - # @see https://docs.aws.amazon.com/AmazonS3/latest/dev/ObjectVersioning.html ObjectVersioning - # - # @option options [String] :checksum_mode ("ENABLED") - # When `"ENABLED"` and the object has a stored checksum, it will be used to validate the download and will - # raise an `Aws::Errors::ChecksumError` if checksum validation fails. You may provide a `on_checksum_validated` - # callback if you need to verify that validation occurred and which algorithm was used. - # To disable checksum validation, set `checksum_mode` to `"DISABLED"`. - # - # @option options [Callable] :on_checksum_validated - # Called each time a request's checksum is validated with the checksum algorithm and the - # response. For multipart downloads, this will be called for each part that is downloaded and validated. - # - # @option options [Proc] :progress_callback - # A Proc that will be called when each chunk of the download is received. It will be invoked with - # `bytes_read`, `part_sizes`, `file_size`. When the object is downloaded as parts (rather than by ranges), - # the `part_sizes` will not be known ahead of time and will be `nil` in the callback until the first bytes - # in the part are received. - # - # @raise [MultipartDownloadError] Raised when an object validation fails outside of service errors. - # - # @return [Boolean] Returns `true` when the file is downloaded without any errors. - # - # @see Client#get_object - # @see Client#head_object - def download_file(destination, bucket, key, options = {}) - downloader = FileDownloader.new(client: @client) - # TODO: wrap with user-agent metric tracking - downloader.download(destination, options.merge(bucket: bucket, key: key)) - true - end end end end From fe2f7f77ae48bcaf209179e8c1239d1d09ca9700 Mon Sep 17 00:00:00 2001 From: Juli Tera Date: Tue, 12 Aug 2025 08:39:13 -0700 Subject: [PATCH 07/27] Add temp changelog entry --- gems/aws-sdk-s3/CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/gems/aws-sdk-s3/CHANGELOG.md b/gems/aws-sdk-s3/CHANGELOG.md index ad3779f590a..a751b956a26 100644 --- a/gems/aws-sdk-s3/CHANGELOG.md +++ b/gems/aws-sdk-s3/CHANGELOG.md @@ -1,6 +1,10 @@ Unreleased Changes ------------------ +* Issue - When multipart stream uploader fails to complete multipart upload, it calls abort multipart upload. + +* Feature - Add Transfer Manager (more notes to do) + 1.196.1 (2025-08-05) ------------------ From 9b6b5f4f6dee18e1f00229ab831de0231bef7ec0 Mon Sep 17 00:00:00 2001 From: Juli Tera Date: Tue, 12 Aug 2025 08:40:21 -0700 Subject: [PATCH 08/27] Omit spacing --- gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb index a51245d56c4..40f52ea1084 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb @@ -220,10 +220,10 @@ def upload_file(source, bucket:, key:, **options) # Define how big each part size but the last should be. Default `:part_size` is `5 * 1024 * 1024`. # # @raise [MultipartUploadError] If an object is being uploaded in parts, and the upload can not be completed, - # then the upload is aborted and this error is raised. The raised error has a `#errors` method that returns + # then the upload is aborted and this error is raised. The raised error has a `#errors` method that returns # the failures that caused the upload to be aborted. # - # @return [Boolean] Returns `true` when the object is uploaded without any errors. + # @return [Boolean] Returns `true` when the object is uploaded without any errors. # # @see Client#create_multipart_upload # @see Client#complete_multipart_upload From 7b37545b8049e71b90d80dfcd12e0d9fadca41d0 Mon Sep 17 00:00:00 2001 From: Juli Tera Date: Tue, 12 Aug 2025 08:41:05 -0700 Subject: [PATCH 09/27] Add empty test file --- gems/aws-sdk-s3/spec/transfer_manager_spec.rb | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 gems/aws-sdk-s3/spec/transfer_manager_spec.rb diff --git a/gems/aws-sdk-s3/spec/transfer_manager_spec.rb b/gems/aws-sdk-s3/spec/transfer_manager_spec.rb new file mode 100644 index 00000000000..5d4d084f2a3 --- /dev/null +++ b/gems/aws-sdk-s3/spec/transfer_manager_spec.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +require_relative 'spec_helper' + +module Aws + module S3 + describe TransferManager do + describe '#initialize' do + it 'constructs a default s3 client when one is not given' do + end + end + + describe '#download_file' do + it 'downloads single file' do + # TODO + end + + it 'downloads larger files in parts' do + # TODO + end + + it 'downloads larger files in ranges' do + # TODO + end + end + + describe '#upload_file' do + # TODO + end + + describe '#upload_stream' do + # TODO + end + end + end +end + From 6901b45c78073095ef296ae9ca8e16f95f6069ef Mon Sep 17 00:00:00 2001 From: Juli Tera Date: Tue, 12 Aug 2025 09:57:06 -0700 Subject: [PATCH 10/27] Add deprecation warnings --- .../lib/aws-sdk-s3/customizations/object.rb | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations/object.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations/object.rb index 5f3b80a9b7a..4e7d4691723 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations/object.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations/object.rb @@ -8,6 +8,15 @@ class Object # Make the method redefinable alias_method :copy_from, :copy_from + # @api private + def self.deprecation_msg(method) + "#################### DEPRECATION WARNING ####################\n"\ + "Called deprecated method `#{method}` of #{self}.\n"\ + "Use method `#{method}` from Aws::S3::TransferManager instead.\n"\ + "#{self} support will be removed in next major version.\n"\ + '#############################################################' + end + # Copies another object to this object. Use `multipart_copy: true` # for large objects. This is required for objects that exceed 5GB. # @@ -398,6 +407,7 @@ def upload_stream(options = {}, &block) end true end + deprecated(:upload_stream, message: deprecation_msg(:upload_stream)) # Uploads a file from disk to the current object in S3. # @@ -465,6 +475,7 @@ def upload_file(source, options = {}) yield response if block_given? true end + deprecated(:upload_file, message: deprecation_msg(:upload_file)) # Downloads a file in S3 to a path on disk. # @@ -534,6 +545,7 @@ def download_file(destination, options = {}) end true end + deprecated(:download_file, message: deprecation_msg(:download_file)) class Collection < Aws::Resources::Collection alias_method :delete, :batch_delete! From 689d987974dc26499e8c484a84c38fdd4257090c Mon Sep 17 00:00:00 2001 From: Juli Tera Date: Tue, 12 Aug 2025 10:12:08 -0700 Subject: [PATCH 11/27] Update changelog --- gems/aws-sdk-s3/CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gems/aws-sdk-s3/CHANGELOG.md b/gems/aws-sdk-s3/CHANGELOG.md index a751b956a26..3f444d250e2 100644 --- a/gems/aws-sdk-s3/CHANGELOG.md +++ b/gems/aws-sdk-s3/CHANGELOG.md @@ -3,7 +3,9 @@ Unreleased Changes * Issue - When multipart stream uploader fails to complete multipart upload, it calls abort multipart upload. -* Feature - Add Transfer Manager (more notes to do) +* Issue - For `Aws::S3::Object` class, the following methods have been deprecated: `upload_file`, `download_file`, and `upload_stream`. Use Transfer Manager instead. + +* Feature - Add Transfer Manager, a S3 transfer utility that provides upload/download capabilities with automatic multipart handling, progress tracking, and handling of large files. 1.196.1 (2025-08-05) ------------------ From f2e3a16126eb9d31c5b007e9ed14ca8cf480d44f Mon Sep 17 00:00:00 2001 From: Juli Tera Date: Tue, 12 Aug 2025 10:13:48 -0700 Subject: [PATCH 12/27] Revised changelog entries wording --- gems/aws-sdk-s3/CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gems/aws-sdk-s3/CHANGELOG.md b/gems/aws-sdk-s3/CHANGELOG.md index 3f444d250e2..8cb3e74f16d 100644 --- a/gems/aws-sdk-s3/CHANGELOG.md +++ b/gems/aws-sdk-s3/CHANGELOG.md @@ -3,9 +3,9 @@ Unreleased Changes * Issue - When multipart stream uploader fails to complete multipart upload, it calls abort multipart upload. -* Issue - For `Aws::S3::Object` class, the following methods have been deprecated: `upload_file`, `download_file`, and `upload_stream`. Use Transfer Manager instead. +* Issue - For `Aws::S3::Object` class, the following methods have been deprecated: `download_file`, `upload_file` and `upload_stream`. Use `Aws::S3::TransferManager` instead. -* Feature - Add Transfer Manager, a S3 transfer utility that provides upload/download capabilities with automatic multipart handling, progress tracking, and handling of large files. +* Feature - Add `Aws::S3::TransferManager`, a S3 transfer utility that provides upload/download capabilities with automatic multipart handling, progress tracking, and handling of large files. 1.196.1 (2025-08-05) ------------------ From 70fba157765e34bc93563ee9492a4f0ba5153a20 Mon Sep 17 00:00:00 2001 From: Juli Tera Date: Wed, 13 Aug 2025 12:13:19 -0700 Subject: [PATCH 13/27] Update consts --- gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_file_uploader.rb | 4 ++-- .../lib/aws-sdk-s3/multipart_stream_uploader.rb | 8 +++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_file_uploader.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_file_uploader.rb index 4c18ace70ab..27844c286f5 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_file_uploader.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_file_uploader.rb @@ -12,7 +12,7 @@ class MultipartFileUploader MAX_PARTS = 10_000 - THREAD_COUNT = 10 + DEFAULT_THREAD_COUNT = 10 CREATE_OPTIONS = Set.new(Client.api.operation(:create_multipart_upload).input.shape.member_names) @@ -30,7 +30,7 @@ class MultipartFileUploader # @option options [Integer] :thread_count (THREAD_COUNT) def initialize(options = {}) @client = options[:client] || Client.new - @thread_count = options[:thread_count] || THREAD_COUNT + @thread_count = options[:thread_count] || DEFAULT_THREAD_COUNT end # @return [Client] diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb index 1be4ef5a764..6f45d8fa36c 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb @@ -10,7 +10,9 @@ module S3 # @api private class MultipartStreamUploader # api private - PART_SIZE = 5 * 1024 * 1024 # 5MB + DEFAULT_PART_SIZE = 5 * 1024 * 1024 # 5MB + + DEFAULT_THREAD_COUNT = 10 # @api private CREATE_OPTIONS = Set.new(Client.api.operation(:create_multipart_upload).input.shape.member_names) @@ -25,8 +27,8 @@ class MultipartStreamUploader def initialize(options = {}) @client = options[:client] || Client.new @tempfile = options[:tempfile] - @part_size = options[:part_size] || PART_SIZE - @thread_count = options[:thread_count] || 10 + @part_size = options[:part_size] || DEFAULT_PART_SIZE + @thread_count = options[:thread_count] || DEFAULT_THREAD_COUNT end # @return [Client] From c945aadb28d7928290ac94f5bd666fc8b23d2397 Mon Sep 17 00:00:00 2001 From: Juli Tera Date: Wed, 13 Aug 2025 12:22:07 -0700 Subject: [PATCH 14/27] Separate specs between utilities and resource-model --- gems/aws-sdk-s3/spec/file_uploader_spec.rb | 95 ++++ .../spec/multipart_file_uploader_spec.rb | 189 ++++++++ .../spec/multipart_stream_uploader_spec.rb | 285 ++++++++++++ .../spec/object/upload_file_spec.rb | 220 +-------- .../spec/object/upload_stream_spec.rb | 439 ++---------------- 5 files changed, 619 insertions(+), 609 deletions(-) create mode 100644 gems/aws-sdk-s3/spec/file_uploader_spec.rb create mode 100644 gems/aws-sdk-s3/spec/multipart_file_uploader_spec.rb create mode 100644 gems/aws-sdk-s3/spec/multipart_stream_uploader_spec.rb diff --git a/gems/aws-sdk-s3/spec/file_uploader_spec.rb b/gems/aws-sdk-s3/spec/file_uploader_spec.rb new file mode 100644 index 00000000000..ce23750fe77 --- /dev/null +++ b/gems/aws-sdk-s3/spec/file_uploader_spec.rb @@ -0,0 +1,95 @@ +# frozen_string_literal: true + +require_relative 'spec_helper' +require 'tempfile' + +module Aws + module S3 + describe FileUploader do + let(:client) { S3::Client.new(stub_responses: true) } + let(:subject) { FileUploader.new(client: client) } + let(:params) { { bucket: 'bucket', key: 'key' } } + + describe '#initialize' do + it 'constructs a default s3 client when not given' do + client = double('client') + expect(S3::Client).to receive(:new).and_return(client) + + uploader = MultipartFileUploader.new + expect(uploader.client).to be(client) + end + + it 'sets a default multipart threshold when not given' do + expect(subject.multipart_threshold).to be(FileUploader::ONE_HUNDRED_MEGABYTES) + end + + it 'sets a custom multipart threshold' do + five_mb = 5 * 1024 * 1024 + uploader = FileUploader.new(client: client, multipart_threshold: five_mb) + expect(uploader.multipart_threshold).to be(five_mb) + end + end + + describe '#upload' do + let(:one_meg) { 1024 * 1024 } + let(:one_mb) { '.' * one_meg } + let(:one_meg_file) do + Tempfile.new('one-meg-file').tap do |f| + f.write(one_mb) + f.rewind + end + end + + let(:ten_meg_file) do + Tempfile.new('ten-meg-file').tap do |f| + 10.times { f.write(one_mb) } + f.rewind + end + end + + let(:one_hundred_seventeen_meg_file) do + Tempfile.new('one-hundred-seventeen-meg-file').tap do |f| + 117.times { f.write(one_mb) } + f.rewind + end + end + + it 'uploads a small file using Client#put_object' do + expect(client).to receive(:put_object).with({ bucket: 'bucket', key: 'key', body: one_meg_file }) + subject.upload(one_meg_file, params) + end + + it 'delegates the large file to use multipart upload' do + expect_any_instance_of(MultipartFileUploader).to receive(:upload).with(one_hundred_seventeen_meg_file, params) + subject.upload(one_hundred_seventeen_meg_file, params) + end + + it 'reports progress for a small file' do + expect(client).to receive(:put_object).with( + params.merge(body: ten_meg_file, on_chunk_sent: instance_of(Proc)) + ) do |args| + args[:on_chunk_sent].call(ten_meg_file, ten_meg_file.size, ten_meg_file.size) + end + callback = proc do |bytes, totals| + expect(bytes).to eq([ten_meg_file.size]) + expect(totals).to eq([ten_meg_file.size]) + end + subject.upload(ten_meg_file, params.merge(progress_callback: callback)) + end + + it 'accepts paths to files to upload' do + file = double('file') + expect(File).to receive(:open).with(ten_meg_file.path, 'rb').and_yield(file) + expect(client).to receive(:put_object).with(params.merge(body: file)) + subject.upload(ten_meg_file.path, params) + end + + it 'does not fail when given :thread_count' do + expect(client).to receive(:put_object).with(params.merge(body: ten_meg_file)) + subject.upload(ten_meg_file, params.merge(thread_count: 1)) + end + end + end + end +end + diff --git a/gems/aws-sdk-s3/spec/multipart_file_uploader_spec.rb b/gems/aws-sdk-s3/spec/multipart_file_uploader_spec.rb new file mode 100644 index 00000000000..a966359f190 --- /dev/null +++ b/gems/aws-sdk-s3/spec/multipart_file_uploader_spec.rb @@ -0,0 +1,189 @@ +# frozen_string_literal: true + +require_relative 'spec_helper' +require 'tempfile' + +module Aws + module S3 + describe MultipartFileUploader do + let(:client) { S3::Client.new(stub_responses: true) } + let(:subject) { MultipartFileUploader.new(client: client) } + let(:params) { { bucket: 'bucket', key: 'key' } } + + describe '#initialize' do + it 'constructs a default s3 client when not given' do + client = double('client') + expect(S3::Client).to receive(:new).and_return(client) + + uploader = MultipartFileUploader.new + expect(uploader.client).to be(client) + end + + it 'sets a default thread count when not given' do + expect(subject.instance_variable_get(:@thread_count)).to be(MultipartFileUploader::DEFAULT_THREAD_COUNT) + end + + it 'sets a custom thread count' do + subject = MultipartFileUploader.new(client: client, thread_count: 3) + expect(subject.instance_variable_get(:@thread_count)).to be(3) + end + end + + describe '#upload' do + let(:one_mb) { '.' * 1024 * 1024 } + let(:one_meg_file) do + Tempfile.new('one-meg-file').tap do |f| + f.write(one_mb) + f.rewind + end + end + + let(:ten_meg_file) do + Tempfile.new('ten-meg-file').tap do |f| + 10.times { f.write(one_mb) } + f.rewind + end + end + + let(:one_hundred_seventeen_meg_file) do + Tempfile.new('one-hundred-seventeen-meg-file').tap do |f| + 117.times { f.write(one_mb) } + f.rewind + end + end + + it 'uses multipart APIs for objects >= 100MB' do + client.stub_responses(:create_multipart_upload, upload_id: 'id') + client.stub_responses(:upload_part, etag: 'etag', checksum_crc32: 'checksum') + expect(client).to receive(:complete_multipart_upload).with( + bucket: 'bucket', + key: 'key', + upload_id: 'id', + multipart_upload: { + parts: [ + { checksum_crc32: 'checksum', etag: 'etag', part_number: 1 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 2 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 3 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 4 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 5 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 6 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 7 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 8 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 9 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 10 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 11 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 12 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 13 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 14 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 15 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 16 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 17 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 18 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 19 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 20 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 21 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 22 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 23 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 24 } + ] + }, + mpu_object_size: one_hundred_seventeen_meg_file.size + ) + subject.upload(one_hundred_seventeen_meg_file, params.merge(content_type: 'text/plain')) + end + + it 'allows for full object checksums' do + expect(client).to receive(:create_multipart_upload) + .with(params.merge(checksum_algorithm: 'CRC32', checksum_type: 'FULL_OBJECT', content_type: 'text/plain')) + .and_call_original + expect(client).to receive(:upload_part) + .with(hash_not_including(checksum_crc32: anything)) + .exactly(24).times + .and_call_original + expect(client).to receive(:complete_multipart_upload) + .with(hash_including(checksum_type: 'FULL_OBJECT', checksum_crc32: 'checksum')) + .and_call_original + client.stub_responses(:create_multipart_upload, upload_id: 'id') + client.stub_responses(:upload_part, etag: 'etag', checksum_crc32: 'part') + + subject.upload( + one_hundred_seventeen_meg_file, + params.merge(content_type: 'text/plain', checksum_crc32: 'checksum') + ) + end + + it 'reports progress for multipart uploads' do + allow(Thread).to receive(:new).and_yield.and_return(double(value: nil)) + client.stub_responses(:create_multipart_upload, upload_id: 'id') + client.stub_responses(:complete_multipart_upload) + expect(client).to receive(:upload_part).exactly(24).times do |args| + args[:on_chunk_sent].call(args[:body], args[:body].size, args[:body].size) + double(context: double(params: { checksum_algorithm: 'crc32' }), checksum_crc32: 'checksum', etag: 'etag') + end + callback = proc do |bytes, totals| + expect(bytes.size).to eq(24) + expect(totals.size).to eq(24) + end + + subject.upload( + one_hundred_seventeen_meg_file, + params.merge(content_type: 'text/plain', progress_callback: callback) + ) + end + + it 'raises when given a file smaller than 5MB' do + expect { subject.upload(one_meg_file, params) } + .to raise_error(ArgumentError, /unable to multipart upload files smaller than 5MB/) + end + + it 'automatically deletes failed multipart upload on error' do + allow_any_instance_of(FilePart).to receive(:read).and_return(nil) + client.stub_responses( + :upload_part, + [ + { etag: 'etag-1' }, + { etag: 'etag-2' }, + RuntimeError.new('part 3 failed'), + { etag: 'etag-4' } + ] + ) + + expect(client).to receive(:abort_multipart_upload).with(params.merge(upload_id: 'MultipartUploadId')) + expect do + subject.upload(one_hundred_seventeen_meg_file, params) + end.to raise_error(/multipart upload failed: part 3 failed/) + end + + it 'reports when it is unable to abort a failed multipart upload' do + allow(Thread).to receive(:new) do |_, &block| + double(value: block.call) + end + + client.stub_responses( + :upload_part, + [ + { etag: 'etag-1' }, + { etag: 'etag-2' }, + { etag: 'etag-3' }, + RuntimeError.new('part failed') + ] + ) + client.stub_responses(:abort_multipart_upload, [RuntimeError.new('network-error')]) + expect do + subject.upload(one_hundred_seventeen_meg_file, params) + end.to raise_error(/failed to abort multipart upload: network-error. Multipart upload failed: part failed/) + + end + + it 'aborts multipart upload when upload fails to complete' do + client.stub_responses(:complete_multipart_upload, RuntimeError.new('network-error')) + expect(client).to receive(:abort_multipart_upload).with(params.merge(upload_id: 'MultipartUploadId')) + expect do + subject.upload(one_hundred_seventeen_meg_file, params) + end.to raise_error(Aws::S3::MultipartUploadError) + end + end + end + end +end + diff --git a/gems/aws-sdk-s3/spec/multipart_stream_uploader_spec.rb b/gems/aws-sdk-s3/spec/multipart_stream_uploader_spec.rb new file mode 100644 index 00000000000..f10a99420eb --- /dev/null +++ b/gems/aws-sdk-s3/spec/multipart_stream_uploader_spec.rb @@ -0,0 +1,285 @@ +# frozen_string_literal: true + +require_relative 'spec_helper' +require 'tempfile' + +module Aws + module S3 + describe MultipartStreamUploader do + let(:client) { S3::Client.new(stub_responses: true) } + let(:subject) { MultipartStreamUploader.new(client: client) } + let(:params) { { bucket: 'bucket', key: 'key' } } + + describe '#initialize' do + it 'constructs a default s3 client when none provided' do + client = double('client') + expect(S3::Client).to receive(:new).and_return(client) + + uploader = MultipartStreamUploader.new + expect(uploader.client).to be(client) + end + + it 'sets default configuration values when none provided' do + expect(subject.instance_variable_get(:@tempfile)).to be_nil + expect(subject.instance_variable_get(:@part_size)).to eq(MultipartStreamUploader::DEFAULT_PART_SIZE) + expect(subject.instance_variable_get(:@thread_count)).to eq(MultipartStreamUploader::DEFAULT_THREAD_COUNT) + end + + it 'sets provided configurations' do + ten_mb = 10 * 1024 * 1024 + subject = MultipartStreamUploader.new(client: client, tempfile: true, part_size: ten_mb, thread_count: 1) + + expect(subject.client).to be(client) + expect(subject.instance_variable_get(:@tempfile)).to be(true) + expect(subject.instance_variable_get(:@part_size)).to eq(ten_mb) + expect(subject.instance_variable_get(:@thread_count)).to eq(1) + end + end + + describe '#upload_stream', :jruby_flaky do + let(:params) { { bucket: 'bucket', key: 'key' } } + let(:one_mb) { '.' * 1024 * 1024 } + let(:ten_mb) { one_mb * 10 } + let(:seventeen_mb) { one_mb * 17 } + + it 'can upload empty stream' do + client.stub_responses(:create_multipart_upload, upload_id: 'id') + client.stub_responses(:upload_part, etag: 'etag') + expected_params = params.merge( + upload_id: 'id', + multipart_upload: { parts: [{ etag: 'etag', part_number: 1 }] } + ) + expect(client).to receive(:complete_multipart_upload).with(expected_params).once + + subject.upload(params.merge(content_type: 'text/plain')) { |write_stream| write_stream << '' } + end + + it 'uses multipart APIs' do + client.stub_responses(:create_multipart_upload, upload_id: 'id') + client.stub_responses(:upload_part, etag: 'etag') + expected_params = params.merge( + upload_id: 'id', + multipart_upload: { + parts: [ + { etag: 'etag', part_number: 1 }, + { etag: 'etag', part_number: 2 }, + { etag: 'etag', part_number: 3 }, + { etag: 'etag', part_number: 4 } + ] + } + ) + expect(client).to receive(:complete_multipart_upload).with(expected_params).once + + subject.upload(params.merge(content_type: 'text/plain')) { |write_stream| write_stream << seventeen_mb } + end + + it 'uploads the correct parts' do + client.stub_responses(:create_multipart_upload, upload_id: 'id') + client.stub_responses(:upload_part, etag: 'etag') + 4.times.each do |p| + expect(client) + .to receive(:upload_part) + .with(params.merge(upload_id: 'id', body: instance_of(StringIO), part_number: p + 1)) + .once + .and_return(double(:upload_part, etag: 'etag')) + end + + subject.upload(params.merge(content_type: 'text/plain')) { |write_stream| write_stream << seventeen_mb } + end + + it 'uploads the correct parts when input is chunked' do + client.stub_responses(:create_multipart_upload, upload_id: 'id') + client.stub_responses(:upload_part, etag: 'etag') + client.stub_responses(:complete_multipart_upload) + 4.times.each do |p| + expect(client) + .to receive(:upload_part) + .with(params.merge(upload_id: 'id', body: instance_of(StringIO), part_number: p + 1)) + .once + .and_return(double(:upload_part, etag: 'etag')) + end + + subject.upload(params) do |write_stream| + 17.times { write_stream << one_mb } + end + end + + it 'passes stringios with correct contents to upload_part' do + client.stub_responses(:create_multipart_upload, upload_id: 'id') + client.stub_responses(:upload_part, etag: 'etag') + client.stub_responses(:complete_multipart_upload) + result = [] + mutex = Mutex.new + allow(client).to receive(:upload_part) do |part| + mutex.synchronize { result << [part[:part_number], part[:body].read.size] } + end.and_return(double(:upload_part, etag: 'etag')) + + subject.upload(params) do |write_stream| + 17.times { write_stream << one_mb } + end + + expect(result.sort_by(&:first)).to eq( + [ + [1, 5 * 1024 * 1024], + [2, 5 * 1024 * 1024], + [3, 5 * 1024 * 1024], + [4, 2 * 1024 * 1024] + ] + ) + end + + it 'automatically deletes failed multipart upload on part processing error' do + client.stub_responses( + :upload_part, + [ + { etag: 'etag-1' }, + { etag: 'etag-2' }, + RuntimeError.new('part 3 failed'), + { etag: 'etag-4' } + ] + ) + expect(client).to receive(:abort_multipart_upload).with(params.merge(upload_id: 'MultipartUploadId')) + + expect do + subject.upload(params) do |write_stream| + write_stream << seventeen_mb + rescue Errno::EPIPE + # ignore + end + end.to raise_error(MultipartUploadError, /multipart upload failed: part 3 failed/) + end + + it 'automatically deletes failed multipart upload on stream read error' do + expect(client).to receive(:abort_multipart_upload).with(params.merge(upload_id: 'MultipartUploadId')) + + expect do + subject.upload(params) do |_write_stream| + raise 'something went wrong' + end + end.to raise_error(/something went wrong/) + end + + it 'reports when it is unable to abort a failed multipart upload' do + client.stub_responses( + :upload_part, + [ + { etag: 'etag-1' }, + { etag: 'etag-2' }, + { etag: 'etag-3' }, + RuntimeError.new('part failed') + ] + ) + client.stub_responses( + :abort_multipart_upload, + [RuntimeError.new('network-error')] + ) + expect do + subject.upload(params) { |write_stream| write_stream << seventeen_mb } + end.to raise_error(S3::MultipartUploadError, /failed to abort multipart upload: network-error/) + end + + context 'when tempfile is true' do + let(:subject) { MultipartStreamUploader.new(client: client, tempfile: true) } + + it 'uses multipart APIs' do + client.stub_responses(:create_multipart_upload, upload_id: 'id') + client.stub_responses(:upload_part, etag: 'etag') + expected_params = params.merge( + upload_id: 'id', + multipart_upload: { + parts: [ + { etag: 'etag', part_number: 1 }, + { etag: 'etag', part_number: 2 }, + { etag: 'etag', part_number: 3 }, + { etag: 'etag', part_number: 4 } + ] + } + ) + expect(client).to receive(:complete_multipart_upload).with(expected_params).once + subject.upload(params.merge(content_type: 'text/plain')) { |write_stream| write_stream << seventeen_mb } + end + + it 'uploads the correct parts' do + client.stub_responses(:create_multipart_upload, upload_id: 'id') + client.stub_responses(:upload_part, etag: 'etag') + 4.times.each do |p| + expect(client) + .to receive(:upload_part) + .with(params.merge(upload_id: 'id', body: instance_of(Tempfile), part_number: p + 1)) + .once + .and_return(double(:upload_part, etag: 'etag')) + end + + subject.upload(params) { |write_stream| write_stream << seventeen_mb } + end + + it 'uploads the correct parts when input is chunked' do + client.stub_responses(:create_multipart_upload, upload_id: 'id') + client.stub_responses(:complete_multipart_upload) + 4.times.each do |p| + expect(client) + .to receive(:upload_part) + .with(params.merge(upload_id: 'id', body: instance_of(Tempfile), part_number: p + 1)) + .once + .and_return(double(:upload_part, etag: 'etag')) + end + + subject.upload(params) do |write_stream| + 17.times { write_stream << one_mb } + end + end + + it 'automatically deletes failed multipart upload on part processing error' do + client.stub_responses( + :upload_part, + [ + { etag: 'etag-1' }, + { etag: 'etag-2' }, + RuntimeError.new('part 3 failed'), + { etag: 'etag-4' } + ] + ) + expect(client).to receive(:abort_multipart_upload).with(params.merge(upload_id: 'MultipartUploadId')) + + expect do + subject.upload(params.merge(tempfile: true)) do |write_stream| + write_stream << seventeen_mb + rescue Errno::EPIPE + # ignore + end + end.to raise_error(MultipartUploadError, /multipart upload failed: part 3 failed/) + end + + it 'automatically deletes failed multipart upload on stream read error' do + expect(client).to receive(:abort_multipart_upload).with(params.merge(upload_id: 'MultipartUploadId')) + + expect do + subject.upload(params) do |_write_stream| + raise 'something went wrong' + end + end.to raise_error(/something went wrong/) + end + + it 'reports when it is unable to abort a failed multipart upload' do + client.stub_responses( + :upload_part, + [ + { etag: 'etag-1' }, + { etag: 'etag-2' }, + { etag: 'etag-3' }, + RuntimeError.new('part failed') + ] + ) + client.stub_responses( + :abort_multipart_upload, + [RuntimeError.new('network-error')] + ) + expect do + subject.upload(params) { |write_stream| write_stream << seventeen_mb } + end.to raise_error(S3::MultipartUploadError, /failed to abort multipart upload: network-error/) + end + end + end + end + end +end diff --git a/gems/aws-sdk-s3/spec/object/upload_file_spec.rb b/gems/aws-sdk-s3/spec/object/upload_file_spec.rb index 442c65afa49..906241b5ed3 100644 --- a/gems/aws-sdk-s3/spec/object/upload_file_spec.rb +++ b/gems/aws-sdk-s3/spec/object/upload_file_spec.rb @@ -6,18 +6,10 @@ module Aws module S3 describe Object do - RSpec::Matchers.define :file_part do |expected| - match do |actual| - actual.source == expected[:source] && - actual.first_byte == expected[:offset] && - actual.last_byte == expected[:offset] + expected[:size] && - actual.size == expected[:size] - end - end - let(:client) { S3::Client.new(stub_responses: true) } describe '#upload_file' do + let(:expected_params) { { bucket: 'bucket', key: 'key' } } let(:one_meg) { 1024 * 1024 } let(:object) { S3::Object.new(bucket_name: 'bucket', key: 'key', client: client) } let(:one_mb) { '.' * 1024 * 1024 } @@ -45,7 +37,7 @@ module S3 it 'uploads objects with custom options without mutating them' do options = {}.freeze - expect(client).to receive(:put_object).with({ bucket: 'bucket', key: 'key', body: one_meg_file }) + expect(client).to receive(:put_object).with(expected_params.merge(body: one_meg_file)) object.upload_file(one_meg_file, options) end @@ -56,206 +48,32 @@ module S3 end end - context 'small objects' do - it 'uploads small objects using Client#put_object' do - expect(client).to receive(:put_object).with({ bucket: 'bucket', key: 'key', body: ten_meg_file }) - object.upload_file(ten_meg_file) - end - - it 'reports progress for small objects' do - expect(client) - .to receive(:put_object) - .with({ bucket: 'bucket', key: 'key', body: ten_meg_file, on_chunk_sent: instance_of(Proc) }) do |args| - args[:on_chunk_sent].call(ten_meg_file, ten_meg_file.size, ten_meg_file.size) - end - callback = proc do |bytes, totals| - expect(bytes).to eq([ten_meg_file.size]) - expect(totals).to eq([ten_meg_file.size]) - end - object.upload_file(ten_meg_file, progress_callback: callback) - end - - it 'accepts an alternative multipart file threshold' do - expect(client).to receive(:put_object).with({ bucket: 'bucket', key: 'key', body: one_hundred_seventeen_meg_file }) - object.upload_file(one_hundred_seventeen_meg_file, multipart_threshold: 200 * one_meg) - end - - it 'accepts paths to files to upload' do - file = double('file') - expect(File).to receive(:open).with(ten_meg_file.path, 'rb').and_yield(file) - expect(client).to receive(:put_object).with({ bucket: 'bucket', key: 'key', body: file }) - object.upload_file(ten_meg_file.path) - end - - it 'does not fail when given :thread_count' do - expect(client).to receive(:put_object).with({ bucket: 'bucket', key: 'key', body: ten_meg_file }) - object.upload_file(ten_meg_file, thread_count: 1) - end + it 'uploads a small object' do + expect(client).to receive(:put_object).with(expected_params.merge(body: ten_meg_file)) + object.upload_file(ten_meg_file) end - context 'large objects' do - it 'uses multipart APIs for objects >= 100MB' do - client.stub_responses(:create_multipart_upload, upload_id: 'id') - client.stub_responses(:upload_part, etag: 'etag', checksum_crc32: 'checksum') - expect(client).to receive(:complete_multipart_upload).with( - bucket: 'bucket', - key: 'key', + it 'uploads a large object' do + expect(client).to receive(:complete_multipart_upload).with( + expected_params.merge( upload_id: 'id', multipart_upload: { parts: [ - { checksum_crc32: 'checksum', etag: 'etag', part_number: 1 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 2 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 3 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 4 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 5 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 6 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 7 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 8 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 9 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 10 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 11 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 12 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 13 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 14 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 15 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 16 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 17 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 18 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 19 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 20 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 21 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 22 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 23 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 24 } + { checksum_crc32: 'part', etag: 'etag', part_number: 1 }, + { checksum_crc32: 'part', etag: 'etag', part_number: 2 } ] }, - mpu_object_size: one_hundred_seventeen_meg_file.size + mpu_object_size: ten_meg_file.size ) - object.upload_file(one_hundred_seventeen_meg_file, content_type: 'text/plain') - end - - it 'allows for full object checksums' do - expect(client).to receive(:create_multipart_upload) - .with( - { - bucket: 'bucket', - key: 'key', - checksum_algorithm: 'CRC32', - checksum_type: 'FULL_OBJECT', - content_type: 'text/plain' - } - ).and_call_original - expect(client).to receive(:upload_part) - .with(hash_not_including(checksum_crc32: anything)) - .exactly(24).times - .and_call_original - expect(client).to receive(:complete_multipart_upload) - .with(hash_including(checksum_type: 'FULL_OBJECT', checksum_crc32: 'checksum')) - .and_call_original - client.stub_responses(:create_multipart_upload, upload_id: 'id') - client.stub_responses(:upload_part, etag: 'etag', checksum_crc32: 'part') - - object.upload_file(one_hundred_seventeen_meg_file, content_type: 'text/plain', checksum_crc32: 'checksum') - end - - it 'reports progress for multipart uploads' do - thread = double(value: nil) - allow(Thread).to receive(:new).and_yield.and_return(thread) - - client.stub_responses(:create_multipart_upload, upload_id: 'id') - client.stub_responses(:complete_multipart_upload) - expect(client).to receive(:upload_part).exactly(24).times do |args| - args[:on_chunk_sent].call(args[:body], args[:body].size, args[:body].size) - double( - context: double(params: { checksum_algorithm: 'crc32' }), - checksum_crc32: 'checksum', - etag: 'etag' - ) - end - callback = proc do |bytes, totals| - expect(bytes.size).to eq(24) - expect(totals.size).to eq(24) - end - object.upload_file(one_hundred_seventeen_meg_file, content_type: 'text/plain', progress_callback: callback) - end - - it 'defaults to THREAD_COUNT without the thread_count option' do - expect(Thread).to receive(:new).exactly(S3::MultipartFileUploader::THREAD_COUNT).times.and_yield.and_return(double(value: nil)) - client.stub_responses(:create_multipart_upload, upload_id: 'id') - client.stub_responses(:complete_multipart_upload) - object.upload_file(one_hundred_seventeen_meg_file) - end - - it 'respects the thread_count option' do - custom_thread_count = 20 - expect(Thread).to receive(:new).exactly(custom_thread_count).times.and_yield.and_return(double(value: nil)) - client.stub_responses(:create_multipart_upload, upload_id: 'id') - client.stub_responses(:complete_multipart_upload) - object.upload_file(one_hundred_seventeen_meg_file, thread_count: custom_thread_count) - end - - it 'raises an error if the multipart threshold is too small' do - error_msg = 'unable to multipart upload files smaller than 5MB' - expect do - object.upload_file(one_meg_file, multipart_threshold: one_meg) - end.to raise_error(ArgumentError, error_msg) - end - - it 'automatically deletes failed multipart upload on error' do - allow_any_instance_of(FilePart).to receive(:read).and_return(nil) - - client.stub_responses( - :upload_part, - [ - { etag: 'etag-1' }, - { etag: 'etag-2' }, - RuntimeError.new('part 3 failed'), - { etag: 'etag-4' } - ] - ) - - expect(client).to receive(:abort_multipart_upload).with( - bucket: 'bucket', - key: 'key', - upload_id: 'MultipartUploadId' - ) - - expect do - object.upload_file(one_hundred_seventeen_meg_file) - end.to raise_error(/multipart upload failed: part 3 failed/) - end - - it 'reports when it is unable to abort a failed multipart upload' do - allow(Thread).to receive(:new) do |_, &block| - double(value: block.call) - end - - client.stub_responses( - :upload_part, - [ - { etag: 'etag-1' }, - { etag: 'etag-2' }, - { etag: 'etag-3' }, - RuntimeError.new('part failed') - ] - ) - client.stub_responses(:abort_multipart_upload, [RuntimeError.new('network-error')]) - expect { object.upload_file(one_hundred_seventeen_meg_file) }.to raise_error( - S3::MultipartUploadError, - /failed to abort multipart upload: network-error. Multipart upload failed: part failed/ - ) - end - - it 'aborts multipart upload when upload fails to complete' do - client.stub_responses(:complete_multipart_upload, RuntimeError.new('network-error')) + ) + client.stub_responses(:create_multipart_upload, upload_id: 'id') + client.stub_responses(:upload_part, etag: 'etag', checksum_crc32: 'part') + object.upload_file(ten_meg_file, multipart_threshold: 5 * one_meg) + end - expect(client).to receive(:abort_multipart_upload).with( - bucket: 'bucket', - key: 'key', - upload_id: 'MultipartUploadId' - ) - expect { object.upload_file(one_hundred_seventeen_meg_file) }.to raise_error(Aws::S3::MultipartUploadError) - end + it 'accepts an alternative multipart file threshold' do + expect(client).to receive(:put_object).with(expected_params.merge(body: one_hundred_seventeen_meg_file)) + object.upload_file(one_hundred_seventeen_meg_file, multipart_threshold: 200 * one_meg) end end end diff --git a/gems/aws-sdk-s3/spec/object/upload_stream_spec.rb b/gems/aws-sdk-s3/spec/object/upload_stream_spec.rb index 8b4c1b56d1b..3597d497b70 100644 --- a/gems/aws-sdk-s3/spec/object/upload_stream_spec.rb +++ b/gems/aws-sdk-s3/spec/object/upload_stream_spec.rb @@ -9,27 +9,12 @@ module S3 let(:client) { S3::Client.new(stub_responses: true) } describe '#upload_stream', :jruby_flaky do - let(:object) do - S3::Object.new( - bucket_name: 'bucket', - key: 'key', - client: client - ) - end - - let(:zero_mb) { '' } - + let(:object) { S3::Object.new(bucket_name: 'bucket', key: 'key', client: client) } + let(:params) { { bucket: 'bucket', key: 'key' } } let(:one_mb) { '.' * 1024 * 1024 } + let(:seventeen_mb) { one_mb * 17 } - let(:ten_mb) do - one_mb * 10 - end - - let(:seventeen_mb) do - one_mb * 17 - end - - it 'can upload empty stream' do + it 'uploads stream' do client.stub_responses(:create_multipart_upload, upload_id: 'id') client.stub_responses(:upload_part, etag: 'etag') expect(client).to receive(:complete_multipart_upload).with( @@ -38,13 +23,14 @@ module S3 upload_id: 'id', multipart_upload: { parts: [ - { etag: 'etag', part_number: 1 } + { etag: 'etag', part_number: 1 }, + { etag: 'etag', part_number: 2 }, + { etag: 'etag', part_number: 3 }, + { etag: 'etag', part_number: 4 } ] } ).once - object.upload_stream(content_type: 'text/plain') do |write_stream| - write_stream << zero_mb - end + object.upload_stream(content_type: 'text/plain') { |write_stream| write_stream << seventeen_mb } end it 'respects the thread_count option' do @@ -55,123 +41,36 @@ module S3 object.upload_stream(thread_count: custom_thread_count) { |_write_stream| } end - it 'uses multipart APIs' do + it 'respects the tempfile option' do client.stub_responses(:create_multipart_upload, upload_id: 'id') client.stub_responses(:upload_part, etag: 'etag') expect(client).to receive(:complete_multipart_upload).with( - bucket: 'bucket', - key: 'key', - upload_id: 'id', - multipart_upload: { - parts: [ - { etag: 'etag', part_number: 1 }, - { etag: 'etag', part_number: 2 }, - { etag: 'etag', part_number: 3 }, - { etag: 'etag', part_number: 4 } - ] - } + params.merge( + upload_id: 'id', + multipart_upload: { + parts: [ + { etag: 'etag', part_number: 1 }, + { etag: 'etag', part_number: 2 }, + { etag: 'etag', part_number: 3 }, + { etag: 'etag', part_number: 4 } + ] + } + ) ).once - object.upload_stream(content_type: 'text/plain') do |write_stream| - write_stream << seventeen_mb - end + object.upload_stream(tempfile: true) { |write_stream| write_stream << seventeen_mb } end - it 'uploads the correct parts' do - client.stub_responses(:create_multipart_upload, upload_id: 'id') - client.stub_responses(:complete_multipart_upload) - expect(client).to receive(:upload_part).with({ - bucket: 'bucket', - key: 'key', - upload_id: 'id', - body: instance_of(StringIO), - part_number: 1 - }).once.and_return(double(:upload_part, etag: 'etag')) - expect(client).to receive(:upload_part).with({ - bucket: 'bucket', - key: 'key', - upload_id: 'id', - body: instance_of(StringIO), - part_number: 2 - }).once.and_return(double(:upload_part, etag: 'etag')) - expect(client).to receive(:upload_part).with({ - bucket: 'bucket', - key: 'key', - upload_id: 'id', - body: instance_of(StringIO), - part_number: 3 - }).once.and_return(double(:upload_part, etag: 'etag')) - expect(client).to receive(:upload_part).with({ - bucket: 'bucket', - key: 'key', - upload_id: 'id', - body: instance_of(StringIO), - part_number: 4 - }).once.and_return(double(:upload_part, etag: 'etag')) - object.upload_stream do |write_stream| - write_stream << seventeen_mb - end - end - - it 'uploads the correct parts when input is chunked' do + it 'uploads correct parts when chunked with custom part_size' do client.stub_responses(:create_multipart_upload, upload_id: 'id') client.stub_responses(:complete_multipart_upload) - expect(client).to receive(:upload_part).with({ - bucket: 'bucket', - key: 'key', - upload_id: 'id', - body: instance_of(StringIO), - part_number: 1 - }).once.and_return(double(:upload_part, etag: 'etag')) - expect(client).to receive(:upload_part).with({ - bucket: 'bucket', - key: 'key', - upload_id: 'id', - body: instance_of(StringIO), - part_number: 2 - }).once.and_return(double(:upload_part, etag: 'etag')) - expect(client).to receive(:upload_part).with({ - bucket: 'bucket', - key: 'key', - upload_id: 'id', - body: instance_of(StringIO), - part_number: 3 - }).once.and_return(double(:upload_part, etag: 'etag')) - expect(client).to receive(:upload_part).with({ - bucket: 'bucket', - key: 'key', - upload_id: 'id', - body: instance_of(StringIO), - part_number: 4 - }).once.and_return(double(:upload_part, etag: 'etag')) - object.upload_stream do |write_stream| - 17.times { write_stream << one_mb } + 3.times.each do |p| + expect(client) + .to receive(:upload_part) + .with(params.merge(upload_id: 'id', body: instance_of(StringIO), part_number: p + 1)) + .once + .and_return(double(:upload_part, etag: 'etag')) end - end - it 'uploads correct parts when chunked with custom part_size' do - client.stub_responses(:create_multipart_upload, upload_id: 'id') - client.stub_responses(:complete_multipart_upload) - expect(client).to receive(:upload_part).with({ - bucket: 'bucket', - key: 'key', - upload_id: 'id', - body: instance_of(StringIO), - part_number: 1 - }).once.and_return(double(:upload_part, etag: 'etag')) - expect(client).to receive(:upload_part).with({ - bucket: 'bucket', - key: 'key', - upload_id: 'id', - body: instance_of(StringIO), - part_number: 2 - }).once.and_return(double(:upload_part, etag: 'etag')) - expect(client).to receive(:upload_part).with({ - bucket: 'bucket', - key: 'key', - upload_id: 'id', - body: instance_of(StringIO), - part_number: 3 - }).once.and_return(double(:upload_part, etag: 'etag')) object.upload_stream(part_size: 7 * 1024 * 1024) do |write_stream| 17.times { write_stream << one_mb } end @@ -183,13 +82,9 @@ module S3 result = [] mutex = Mutex.new allow(client).to receive(:upload_part) do |part| - mutex.synchronize do - result << [ - part[:part_number], - part[:body].read.size - ] - end + mutex.synchronize { result << [part[:part_number], part[:body].read.size] } end.and_return(double(:upload_part, etag: 'etag')) + object.upload_stream(part_size: 7 * 1024 * 1024) do |write_stream| 17.times { write_stream << one_mb } end @@ -201,278 +96,6 @@ module S3 ] ) end - - it 'passes stringios with correct contents to upload_part' do - client.stub_responses(:create_multipart_upload, upload_id: 'id') - client.stub_responses(:complete_multipart_upload) - result = [] - mutex = Mutex.new - allow(client).to receive(:upload_part) do |part| - mutex.synchronize do - result << [ - part[:part_number], - part[:body].read.size - ] - end - end.and_return(double(:upload_part, etag: 'etag')) - object.upload_stream do |write_stream| - 17.times { write_stream << one_mb } - end - expect(result.sort_by(&:first)).to eq( - [ - [1, 5 * 1024 * 1024], - [2, 5 * 1024 * 1024], - [3, 5 * 1024 * 1024], - [4, 2 * 1024 * 1024] - ] - ) - end - - it 'automatically deletes failed multipart upload on part processing error' do - client.stub_responses( - :upload_part, [ - { etag: 'etag-1' }, - { etag: 'etag-2' }, - RuntimeError.new('part 3 failed'), - { etag: 'etag-4' } - ] - ) - - expect(client).to receive(:abort_multipart_upload) - .with(bucket: 'bucket', key: 'key', upload_id: 'MultipartUploadId') - - expect do - object.upload_stream do |write_stream| - begin - write_stream << seventeen_mb - rescue Errno::EPIPE - end - end - end.to raise_error('multipart upload failed: part 3 failed') - end - - it 'automatically deletes failed multipart upload on stream read error' do - expect(client).to receive(:abort_multipart_upload) - .with(bucket: 'bucket', key: 'key', upload_id: 'MultipartUploadId') - - expect do - object.upload_stream do |_write_stream| - raise 'something went wrong' - end - end.to raise_error(/something went wrong/) - end - - it 'reports when it is unable to abort a failed multipart upload' do - client.stub_responses( - :upload_part, - [ - { etag: 'etag-1' }, - { etag: 'etag-2' }, - { etag: 'etag-3' }, - RuntimeError.new('part failed') - ] - ) - client.stub_responses( - :abort_multipart_upload, - [ - RuntimeError.new('network-error') - ] - ) - expect do - object.upload_stream do |write_stream| - write_stream << seventeen_mb - end - end.to raise_error( - S3::MultipartUploadError, - /failed to abort multipart upload: network-error/ - ) - end - - context 'with tempfile option' do - it 'uses multipart APIs' do - client.stub_responses(:create_multipart_upload, upload_id: 'id') - client.stub_responses(:upload_part, etag: 'etag') - expect(client).to receive(:complete_multipart_upload).with( - bucket: 'bucket', - key: 'key', - upload_id: 'id', - multipart_upload: { - parts: [ - { etag: 'etag', part_number: 1 }, - { etag: 'etag', part_number: 2 }, - { etag: 'etag', part_number: 3 }, - { etag: 'etag', part_number: 4 } - ] - } - ).once - object.upload_stream( - content_type: 'text/plain', - tempfile: true - ) do |write_stream| - write_stream << seventeen_mb - end - end - - it 'uploads the correct parts' do - client.stub_responses(:create_multipart_upload, upload_id: 'id') - client.stub_responses(:complete_multipart_upload) - expect(client).to receive(:upload_part).with({ - bucket: 'bucket', - key: 'key', - upload_id: 'id', - body: instance_of(Tempfile), - part_number: 1 - }).once.and_return(double(:upload_part, etag: 'etag')) - expect(client).to receive(:upload_part).with({ - bucket: 'bucket', - key: 'key', - upload_id: 'id', - body: instance_of(Tempfile), - part_number: 2 - }).once.and_return(double(:upload_part, etag: 'etag')) - expect(client).to receive(:upload_part).with({ - bucket: 'bucket', - key: 'key', - upload_id: 'id', - body: instance_of(Tempfile), - part_number: 3 - }).once.and_return(double(:upload_part, etag: 'etag')) - expect(client).to receive(:upload_part).with({ - bucket: 'bucket', - key: 'key', - upload_id: 'id', - body: instance_of(Tempfile), - part_number: 4 - }).once.and_return(double(:upload_part, etag: 'etag')) - object.upload_stream(tempfile: true) do |write_stream| - write_stream << seventeen_mb - end - end - - it 'uploads the correct parts when input is chunked' do - client.stub_responses(:create_multipart_upload, upload_id: 'id') - client.stub_responses(:complete_multipart_upload) - expect(client).to receive(:upload_part).with({ - bucket: 'bucket', - key: 'key', - upload_id: 'id', - body: instance_of(Tempfile), - part_number: 1 - }).once.and_return(double(:upload_part, etag: 'etag')) - expect(client).to receive(:upload_part).with({ - bucket: 'bucket', - key: 'key', - upload_id: 'id', - body: instance_of(Tempfile), - part_number: 2 - }).once.and_return(double(:upload_part, etag: 'etag')) - expect(client).to receive(:upload_part).with({ - bucket: 'bucket', - key: 'key', - upload_id: 'id', - body: instance_of(Tempfile), - part_number: 3 - }).once.and_return(double(:upload_part, etag: 'etag')) - expect(client).to receive(:upload_part).with({ - bucket: 'bucket', - key: 'key', - upload_id: 'id', - body: instance_of(Tempfile), - part_number: 4 - }).once.and_return(double(:upload_part, etag: 'etag')) - object.upload_stream(tempfile: true) do |write_stream| - 17.times { write_stream << one_mb } - end - end - - it 'passes tempfiles with correct contents to upload_part' do - client.stub_responses(:create_multipart_upload, upload_id: 'id') - client.stub_responses(:complete_multipart_upload) - result = [] - mutex = Mutex.new - allow(client).to receive(:upload_part) do |part| - mutex.synchronize do - result << [ - part[:part_number], - part[:body].read.size - ] - end - end.and_return(double(:upload_part, etag: 'etag')) - object.upload_stream(tempfile: true) do |write_stream| - 17.times { write_stream << one_mb } - end - expect(result.sort_by(&:first)).to eq( - [ - [1, 5 * 1024 * 1024], - [2, 5 * 1024 * 1024], - [3, 5 * 1024 * 1024], - [4, 2 * 1024 * 1024] - ] - ) - end - - it 'automatically deletes failed multipart upload on part processing error' do - client.stub_responses( - :upload_part, - [ - { etag: 'etag-1' }, - { etag: 'etag-2' }, - RuntimeError.new('part 3 failed'), - { etag: 'etag-4' } - ] - ) - - expect(client).to receive(:abort_multipart_upload).with( - bucket: 'bucket', key: 'key', upload_id: 'MultipartUploadId' - ) - - expect do - object.upload_stream(tempfile: true) do |write_stream| - begin - write_stream << seventeen_mb - rescue Errno::EPIPE - end - end - end.to raise_error('multipart upload failed: part 3 failed') - end - - it 'automatically deletes failed multipart upload on stream read error' do - expect(client).to receive(:abort_multipart_upload).with( - bucket: 'bucket', key: 'key', upload_id: 'MultipartUploadId' - ) - - expect do - object.upload_stream(tempfile: true) do |_write_stream| - raise 'something went wrong' - end - end.to raise_error(/multipart upload failed/, /something went wrong/) - end - - it 'reports when it is unable to abort a failed multipart upload' do - client.stub_responses( - :upload_part, - [ - { etag: 'etag-1' }, - { etag: 'etag-2' }, - { etag: 'etag-3' }, - RuntimeError.new('part failed') - ] - ) - client.stub_responses( - :abort_multipart_upload, - [RuntimeError.new('network-error')] - ) - expect do - object.upload_stream(tempfile: true) do |write_stream| - write_stream << seventeen_mb - end - end.to raise_error( - S3::MultipartUploadError, - 'failed to abort multipart upload: network-error. '\ - 'Multipart upload failed: part failed' - ) - end - end end end end From f847baa13b6c3703eb4cdf347d63288b95cd7b8b Mon Sep 17 00:00:00 2001 From: Juli Tera Date: Wed, 13 Aug 2025 13:59:16 -0700 Subject: [PATCH 15/27] Update specs for file downloader --- gems/aws-sdk-s3/spec/file_downloader_spec.rb | 254 ++++++++++++++++++ .../spec/object/download_file_spec.rb | 217 ++------------- 2 files changed, 270 insertions(+), 201 deletions(-) create mode 100644 gems/aws-sdk-s3/spec/file_downloader_spec.rb diff --git a/gems/aws-sdk-s3/spec/file_downloader_spec.rb b/gems/aws-sdk-s3/spec/file_downloader_spec.rb new file mode 100644 index 00000000000..619a5a73496 --- /dev/null +++ b/gems/aws-sdk-s3/spec/file_downloader_spec.rb @@ -0,0 +1,254 @@ +# frozen_string_literal: true + +require_relative 'spec_helper' +require 'tempfile' + +module Aws + module S3 + describe FileDownloader do + let(:client) { S3::Client.new(stub_responses: true) } + let(:subject) { FileDownloader.new(client: client) } + let(:tmpdir) { Dir.tmpdir } + + describe '#initialize' do + it 'constructs a default s3 client when not given' do + client = double('client') + expect(S3::Client).to receive(:new).and_return(client) + + downloader = FileDownloader.new + expect(downloader.client).to be(client) + end + end + + describe '#download', :jruby_flaky do + let(:path) { Tempfile.new('destination').path } + let(:one_meg) { 1024 * 1024 } + + before(:each) do + allow(Dir).to receive(:tmpdir).and_return(tmpdir) + client.stub_responses(:head_object, lambda { |context| + case context.params[:key] + when 'single' + { content_length: one_meg, parts_count: nil } + when 'parts' + resp = { content_length: 20 * one_meg, parts_count: nil } + resp[:parts_count] = 4 if context.params[:part_number] + resp + when 'range' + { content_length: 15 * one_meg, parts_count: nil } + end + }) + end + + it 'downloads a single object using Client#get_object' do + params = { bucket: 'foo', key: 'single' } + expect(client).to receive(:get_object).with(params.merge(response_target: path)).exactly(1).times + subject.download(path, params) + end + + it 'downloads a large object in parts' do + parts = 0 + client.stub_responses(:get_object, lambda do |_ctx| + parts += 1 + { body: 'body', content_range: 'bytes 0-3/4' } + end) + subject.download(path, bucket: 'foo', key: 'parts') + expect(parts).to eq(4) + end + + it 'downloads a large object in ranges' do + client.stub_responses(:get_object, lambda { |context| + responses = { + 'bytes=0-5242879' => { body: 'body', content_range: 'bytes 0-5242879/15728640' }, + 'bytes=5242880-10485759' => { body: 'body', content_range: 'bytes 5242880-10485759/15728640' }, + 'bytes=10485760-15728639' => { body: 'body', content_range: 'bytes 10485760-15728639/15728640' } + } + responses[context.params[:range]] + }) + subject.download(path, bucket: 'foo', key: 'range', chunk_size: 5 * one_meg, mode: 'get_range') + end + + it 'supports download object with version_id' do + params = { bucket: 'foo', key: 'single', version_id: 'foo' } + expect(client).to receive(:get_object).with(params.merge(response_target: path)).exactly(1).times + + subject.download(path, params) + end + + it 'calls on_checksum_validated on single object' do + client.stub_responses(:get_object, { body: 'body', checksum_sha1: 'Agg/RXngimEkJcDBoX7ket14O5Q=' }) + callback_data = { called: 0 } + mutex = Mutex.new + callback = proc do |_alg, _resp| + mutex.synchronize { callback_data[:called] += 1 } + end + + subject.download(path, bucket: 'foo', key: 'single', on_checksum_validated: callback) + expect(callback_data[:called]).to eq(1) + end + + it 'calls on_checksum_validated on multipart object' do + callback_data = { called: 0 } + client.stub_responses( + :get_object, { body: 'body', content_range: 'bytes 0-3/4', checksum_sha1: 'Agg/RXngimEkJcDBoX7ket14O5Q=' } + ) + mutex = Mutex.new + callback = proc do |_alg, _resp| + mutex.synchronize { callback_data[:called] += 1 } + end + + subject.download(path, bucket: 'foo', key: 'parts', on_checksum_validated: callback) + expect(callback_data[:called]).to eq(4) + end + + it 'supports disabling checksum_mode' do + client.stub_responses(:head_object, lambda { |context| + expect(context.params[:checksum_mode]).to eq('DISABLED') + { content_length: one_meg, parts_count: nil } + }) + client.stub_responses(:get_object, lambda { |context| + expect(context.params[:checksum_mode]).to eq('DISABLED') + { body: 'body' } + }) + + subject.download(path, bucket: 'foo', key: 'single', checksum_mode: 'DISABLED') + end + + it 'downloads the file in range chunks' do + client.stub_responses(:get_object, lambda { |context| + ranges = context.params[:range].match(/bytes=(?\d+)-(?\d+)/) + expect(ranges[:end].to_i - ranges[:start].to_i + 1).to eq(one_meg) + { content_range: "bytes #{ranges[:start]}-#{ranges[:end]}/#{20 * one_meg}" } + }) + + subject.download(path, bucket: 'foo', key: 'range', chunk_size: one_meg) + end + + context 'multipart progress' do + it 'reports progress for single object' do + params = { bucket: 'foo', key: 'single' } + small_file_size = 1024 + expect(client) + .to receive(:get_object) + .with(params.merge(response_target: path, on_chunk_received: instance_of(Proc))) do |args| + args[:on_chunk_received].call(Tempfile.new('small-file'), small_file_size, small_file_size) + end + + n_calls = 0 + callback = proc do |bytes, part_sizes, total| + expect(bytes).to eq([small_file_size]) + expect(part_sizes).to eq([small_file_size]) + expect(total).to eq(small_file_size) + n_calls += 1 + end + + subject.download(path, params.merge(progress_callback: callback)) + expect(n_calls).to eq(1) + end + + it 'reports progress for downloading a large object in parts' do + expect(client).to receive(:get_object).exactly(4).times do |args| + args[:on_chunk_received].call(Tempfile.new('large-file'), 4, 4) + client.stub_data(:get_object, body: StringIO.new('chunk'), content_range: 'bytes 0-3/4') + end + + n_calls = 0 + mutex = Mutex.new + callback = proc do |bytes, part_sizes, total| + mutex.synchronize do + expect(bytes.size).to eq(4) + expect(part_sizes.size).to eq(4) + expect(total).to eq(20 * one_meg) + n_calls += 1 + end + end + subject.download(path, bucket: 'foo', key: 'parts', progress_callback: callback) + expect(n_calls).to eq(4) + end + end + + context 'error handling' do + it 'raises when checksum validation fails on single object' do + client.stub_responses(:get_object, { body: 'body', checksum_sha1: 'invalid' }) + expect { subject.download(path, bucket: 'foo', key: 'single') }.to raise_error(Aws::Errors::ChecksumError) + end + + it 'raises when checksum validation fails on multipart object' do + client.stub_responses(:get_object, { body: 'body', checksum_sha1: 'invalid' }) + expect(Thread).to receive(:new).and_yield.and_return(double(value: nil)) + expect { subject.download(path, bucket: 'foo', key: 'parts') }.to raise_error(Aws::Errors::ChecksumError) + end + + it 'raises when ETAG does not match during multipart get by ranges' do + client.stub_responses(:head_object, content_length: 15 * one_meg, parts_count: nil, etag: 'test-etag') + client.stub_responses(:get_object, lambda { |ctx| + expect(ctx.params[:if_match]).to eq('test-etag') + 'PreconditionFailed' + }) + expect(Thread).to receive(:new).and_yield.and_return(double(value: nil)) + expect { subject.download(path, bucket: 'foo', key: 'range', chunk_size: one_meg, mode: 'get_range') } + .to raise_error(Aws::S3::Errors::PreconditionFailed) + end + + it 'raises when ETAG does not match during multipart get by parts' do + client.stub_responses(:head_object, { content_length: 20 * one_meg, etag: 'test-etag', parts_count: 4 }) + client.stub_responses(:get_object, lambda { |ctx| + expect(ctx.params[:if_match]).to eq('test-etag') + 'PreconditionFailed' + }) + + expect(Thread).to receive(:new).and_yield.and_return(double(value: nil)) + expect { subject.download(path, bucket: 'foo', key: 'parts') } + .to raise_error(Aws::S3::Errors::PreconditionFailed) + end + + it 'raises when given an invalid mode' do + expect { subject.download(path, bucket: 'foo', key: 'parts', mode: 'invalid_mode') } + .to raise_error(ArgumentError, /Invalid mode invalid_mode provided/) + end + + it 'raises when given an "get_range" mode without :chunk_size' do + expect { subject.download(path, bucket: 'foo', key: 'range', mode: 'get_range') } + .to raise_error(ArgumentError, /In get_range mode, :chunk_size must be provided/) + end + + it 'raises when given :chunk_size is larger than file size' do + expect { subject.download(path, bucket: 'foo', key: 'parts', chunk_size: 50 * one_meg) } + .to raise_error(ArgumentError, /:chunk_size shouldn't exceed total file size/) + end + + it 'raises when :on_checksum_validated is not callable' do + expect { subject.download(path, bucket: 'foo', key: 'parts', on_checksum_validated: 'string') } + .to raise_error(ArgumentError, /:on_checksum_validated must be callable/) + end + + it 'raises when range validation fails' do + client.stub_responses(:get_object, { body: 'body', content_range: 'bytes 0-3/4' }) + expect(Thread).to receive(:new).and_yield.and_return(double(value: nil)) + expect { subject.download(path, bucket: 'foo', key: 'parts', mode: 'get_range', chunk_size: one_meg) } + .to raise_error(Aws::S3::MultipartDownloadError) + end + + it 'does not overwrite existing file when download fails' do + File.write(path, 'existing content') + + client.stub_responses(:get_object, lambda { |context| + responses = { + 'bytes=0-5242879' => { body: 'body', content_range: 'bytes 0-5242879/15728640' }, + 'bytes=5242880-10485759' => { body: 'body', content_range: 'bytes 5242880-10485759/15728640' }, + 'bytes=10485760-15728639' => { body: 'fake-range', content_range: 'bytes 10485800-15728639/15728640' } + } + responses[context.params[:range]] + }) + + expect(Thread).to receive(:new).and_yield.and_return(double(value: nil)) + expect { subject.download(path, bucket: 'foo', key: 'range', chunk_size: 5 * one_meg, mode: 'get_range') } + .to raise_error(Aws::S3::MultipartDownloadError) + expect(File.exist?(path)).to be(true) + expect(File.read(path)).to eq('existing content') + end + end + end + end + end +end diff --git a/gems/aws-sdk-s3/spec/object/download_file_spec.rb b/gems/aws-sdk-s3/spec/object/download_file_spec.rb index b6bd29aec92..1e7fa6f2bfb 100644 --- a/gems/aws-sdk-s3/spec/object/download_file_spec.rb +++ b/gems/aws-sdk-s3/spec/object/download_file_spec.rb @@ -11,44 +11,36 @@ module S3 describe '#download_file', :jruby_flaky do let(:path) { Tempfile.new('destination').path } - let(:small_obj) { S3::Object.new(bucket_name: 'bucket', key: 'small', client: client) } - let(:large_obj) { S3::Object.new(bucket_name: 'bucket', key: 'large', client: client) } - let(:single_obj) { S3::Object.new(bucket_name: 'bucket', key: 'single', client: client) } - let(:small_obj_params) { { bucket: 'bucket', key: 'small', response_target: path } } let(:one_meg) { 1024 * 1024 } before(:each) do allow(Dir).to receive(:tmpdir).and_return(tmpdir) - client.stub_responses(:head_object, lambda { |context| - case context.params[:key] - when 'small' - { content_length: one_meg, parts_count: nil } - when 'large' - resp = { content_length: 20 * one_meg, parts_count: nil } - resp[:parts_count] = 4 if context.params[:part_number] - resp - when 'single' - { content_length: 15 * one_meg, parts_count: nil } - end - }) end - it 'downloads single part files in Client#get_object' do - expect(client).to receive(:get_object).with(small_obj_params).exactly(1).times - small_obj.download_file(path) + it 'downloads a single file' do + client.stub_responses(:head_object, { content_length: one_meg, parts_count: nil }) + expected_params = { bucket: 'bucket', key: 'small', response_target: path } + expect(client).to receive(:get_object).with(expected_params).exactly(1).times + + object = S3::Object.new(bucket_name: 'bucket', key: 'small', client: client) + object.download_file(path) end - it 'download larger files in parts' do + it 'downloads a large file in parts' do parts = 0 + client.stub_responses(:head_object, { content_length: 20 * one_meg, parts_count: 4 }) client.stub_responses(:get_object, lambda { |_ctx| parts += 1 { body: 'body', content_range: 'bytes 0-3/4' } }) - large_obj.download_file(path) + + object = S3::Object.new(bucket_name: 'bucket', key: 'large', client: client) + object.download_file(path) expect(parts).to eq(4) end - it 'download larger files in ranges' do + it 'downloads a large file in ranges' do + client.stub_responses(:head_object, { content_length: 15 * one_meg, parts_count: nil }) client.stub_responses(:get_object, lambda { |context| responses = { 'bytes=0-5242879' => { body: 'body', content_range: 'bytes 0-5242879/15728640' }, @@ -57,186 +49,9 @@ module S3 } responses[context.params[:range]] }) - single_obj.download_file(path, chunk_size: 5 * one_meg, mode: 'get_range') - end - - it 'supports download object with version_id' do - expect(client).to receive(:get_object).with(small_obj_params.merge(version_id: 'foo')).exactly(1).times - small_obj.download_file(path, version_id: 'foo') - end - - it 'calls on_checksum_validated on single part' do - client.stub_responses(:get_object, { body: 'body', checksum_sha1: 'Agg/RXngimEkJcDBoX7ket14O5Q=' }) - callback_data = { called: 0 } - mutex = Mutex.new - callback = proc do |_alg, _resp| - mutex.synchronize { callback_data[:called] += 1 } - end - - small_obj.download_file(path, on_checksum_validated: callback) - expect(callback_data[:called]).to eq(1) - end - - it 'calls on_checksum_validated on multipart' do - callback_data = { called: 0 } - client.stub_responses( - :get_object, { body: 'body', content_range: 'bytes 0-3/4', checksum_sha1: 'Agg/RXngimEkJcDBoX7ket14O5Q=' } - ) - mutex = Mutex.new - callback = proc do |_alg, _resp| - mutex.synchronize { callback_data[:called] += 1 } - end - - large_obj.download_file(path, on_checksum_validated: callback) - expect(callback_data[:called]).to eq(4) - end - - it 'supports disabling checksum_mode' do - client.stub_responses(:head_object, lambda { |context| - expect(context.params[:checksum_mode]).to eq('DISABLED') - { content_length: one_meg, parts_count: nil } - }) - client.stub_responses(:get_object, lambda { |context| - expect(context.params[:checksum_mode]).to eq('DISABLED') - { body: 'body' } - }) - - small_obj.download_file(path, checksum_mode: 'DISABLED') - end - - it 'downloads the file in range chunks' do - client.stub_responses(:get_object, lambda { |context| - ranges = context.params[:range].match(/bytes=(?\d+)-(?\d+)/) - expect(ranges[:end].to_i - ranges[:start].to_i + 1).to eq(one_meg) - { content_range: "bytes #{ranges[:start]}-#{ranges[:end]}/#{20 * one_meg}" } - }) - - large_obj.download_file(path, chunk_size: one_meg) - end - - context 'multipart progress' do - it 'reports progress for single part objects' do - small_file_size = 1024 - expect(client) - .to receive(:get_object) - .with(small_obj_params.merge(on_chunk_received: instance_of(Proc))) do |args| - args[:on_chunk_received].call(Tempfile.new('small-file'), small_file_size, small_file_size) - end - - n_calls = 0 - callback = proc do |bytes, part_sizes, total| - expect(bytes).to eq([small_file_size]) - expect(part_sizes).to eq([small_file_size]) - expect(total).to eq(small_file_size) - n_calls += 1 - end - - small_obj.download_file(path, progress_callback: callback) - expect(n_calls).to eq(1) - end - - it 'reports progress for files downloaded in parts' do - expect(client).to receive(:get_object).exactly(4).times do |args| - args[:on_chunk_received].call(Tempfile.new('large-file'), 4, 4) - client.stub_data(:get_object, body: StringIO.new('chunk'), content_range: 'bytes 0-3/4') - end - - n_calls = 0 - mutex = Mutex.new - callback = proc do |bytes, part_sizes, total| - mutex.synchronize do - expect(bytes.size).to eq(4) - expect(part_sizes.size).to eq(4) - expect(total).to eq(20 * one_meg) - n_calls += 1 - end - end - large_obj.download_file(path, progress_callback: callback) - expect(n_calls).to eq(4) - end - end - - context 'error handling' do - it 'raises an error when checksum validation fails on single part' do - client.stub_responses(:get_object, { body: 'body', checksum_sha1: 'invalid' }) - - expect { small_obj.download_file(path) }.to raise_error(Aws::Errors::ChecksumError) - end - - it 'raises an error when checksum validation fails on multipart' do - client.stub_responses(:get_object, { body: 'body', checksum_sha1: 'invalid' }) - thread = double(value: nil) - - expect(Thread).to receive(:new).and_yield.and_return(thread) - expect { large_obj.download_file(path) }.to raise_error(Aws::Errors::ChecksumError) - end - - it 'does not download object when ETAG does not match during multipart get by ranges' do - client.stub_responses(:head_object, content_length: 15 * one_meg, parts_count: nil, etag: 'test-etag') - client.stub_responses(:get_object, lambda { |ctx| - expect(ctx.params[:if_match]).to eq('test-etag') - 'PreconditionFailed' - }) - thread = double(value: nil) - expect(Thread).to receive(:new).and_yield.and_return(thread) - expect { single_obj.download_file(path) }.to raise_error(Aws::S3::Errors::PreconditionFailed) - end - - it 'does not download object when ETAG does not match during multipart get by parts' do - client.stub_responses(:head_object, { content_length: 20 * one_meg, etag: 'test-etag', parts_count: 4 }) - client.stub_responses(:get_object, lambda { |ctx| - expect(ctx.params[:if_match]).to eq('test-etag') - 'PreconditionFailed' - }) - - thread = double(value: nil) - expect(Thread).to receive(:new).and_yield.and_return(thread) - expect { large_obj.download_file(path) }.to raise_error(Aws::S3::Errors::PreconditionFailed) - end - - it 'raises an error if an invalid mode is specified' do - expect { large_obj.download_file(path, mode: 'invalid_mode') } - .to raise_error(ArgumentError, /Invalid mode invalid_mode provided/) - end - - it 'raises an error if choose :get_range without :chunk_size' do - expect { large_obj.download_file(path, mode: 'get_range') } - .to raise_error(ArgumentError, 'In get_range mode, :chunk_size must be provided') - end - - it 'raises an error if :chunk_size is larger than file size' do - expect { large_obj.download_file(path, chunk_size: 50 * one_meg) } - .to raise_error(ArgumentError, ":chunk_size shouldn't exceed total file size.") - end - - it 'raises an error if :on_checksum_validated is not callable' do - expect { large_obj.download_file(path, on_checksum_validated: 'string') } - .to raise_error(ArgumentError, ':on_checksum_validated must be callable') - end - - it 'raises error when range validation fails' do - client.stub_responses(:get_object, { body: 'body', content_range: 'bytes 0-3/4' }) - expect { large_obj.download_file(path, mode: 'get_range', chunk_size: one_meg) } - .to raise_error(Aws::S3::MultipartDownloadError) - end - - it 'does not overwrite existing file when download fails' do - File.write(path, 'existing content') - - client.stub_responses(:get_object, lambda { |context| - responses = { - 'bytes=0-5242879' => { body: 'body', content_range: 'bytes 0-5242879/15728640' }, - 'bytes=5242880-10485759' => { body: 'body', content_range: 'bytes 5242880-10485759/15728640' }, - 'bytes=10485760-15728639' => { body: 'fake-range', content_range: 'bytes 10485800-15728639/15728640' } - } - responses[context.params[:range]] - }) - expect { single_obj.download_file(path, chunk_size: 5 * one_meg, mode: 'get_range') } - .to raise_error(Aws::S3::MultipartDownloadError) - expect(File.exist?(path)).to be(true) - expect(File.read(path)).to eq('existing content') - end + object = S3::Object.new(bucket_name: 'bucket', key: 'single', client: client) + object.download_file(path, chunk_size: 5 * one_meg, mode: 'get_range') end end end From dfde65f064e31b69c2e98242b0f9f333f326a4f5 Mon Sep 17 00:00:00 2001 From: Juli Tera Date: Thu, 14 Aug 2025 10:27:39 -0700 Subject: [PATCH 16/27] Streamline specs --- gems/aws-sdk-s3/spec/file_downloader_spec.rb | 50 ++++---- gems/aws-sdk-s3/spec/file_uploader_spec.rb | 38 +++--- .../spec/multipart_file_uploader_spec.rb | 112 +++++++----------- .../spec/multipart_stream_uploader_spec.rb | 25 ++-- .../spec/object/download_file_spec.rb | 53 +++------ .../spec/object/upload_file_spec.rb | 65 ++++------ .../spec/object/upload_stream_spec.rb | 84 +++---------- gems/aws-sdk-s3/spec/transfer_manager_spec.rb | 106 +++++++++++++++-- 8 files changed, 247 insertions(+), 286 deletions(-) diff --git a/gems/aws-sdk-s3/spec/file_downloader_spec.rb b/gems/aws-sdk-s3/spec/file_downloader_spec.rb index 619a5a73496..bd782e58bcf 100644 --- a/gems/aws-sdk-s3/spec/file_downloader_spec.rb +++ b/gems/aws-sdk-s3/spec/file_downloader_spec.rb @@ -23,6 +23,9 @@ module S3 describe '#download', :jruby_flaky do let(:path) { Tempfile.new('destination').path } let(:one_meg) { 1024 * 1024 } + let(:single_params) { { bucket: 'bucket', key: 'single' } } + let(:parts_params) { { bucket: 'bucket', key: 'parts' } } + let(:range_params) { { bucket: 'bucket', key: 'range' } } before(:each) do allow(Dir).to receive(:tmpdir).and_return(tmpdir) @@ -41,9 +44,8 @@ module S3 end it 'downloads a single object using Client#get_object' do - params = { bucket: 'foo', key: 'single' } - expect(client).to receive(:get_object).with(params.merge(response_target: path)).exactly(1).times - subject.download(path, params) + expect(client).to receive(:get_object).with(single_params.merge(response_target: path)).exactly(1).times + subject.download(path, single_params) end it 'downloads a large object in parts' do @@ -52,7 +54,7 @@ module S3 parts += 1 { body: 'body', content_range: 'bytes 0-3/4' } end) - subject.download(path, bucket: 'foo', key: 'parts') + subject.download(path, parts_params) expect(parts).to eq(4) end @@ -65,11 +67,11 @@ module S3 } responses[context.params[:range]] }) - subject.download(path, bucket: 'foo', key: 'range', chunk_size: 5 * one_meg, mode: 'get_range') + subject.download(path, range_params.merge(chunk_size: 5 * one_meg, mode: 'get_range')) end it 'supports download object with version_id' do - params = { bucket: 'foo', key: 'single', version_id: 'foo' } + params = single_params.merge(version_id: 'foo') expect(client).to receive(:get_object).with(params.merge(response_target: path)).exactly(1).times subject.download(path, params) @@ -83,7 +85,7 @@ module S3 mutex.synchronize { callback_data[:called] += 1 } end - subject.download(path, bucket: 'foo', key: 'single', on_checksum_validated: callback) + subject.download(path, single_params.merge(on_checksum_validated: callback)) expect(callback_data[:called]).to eq(1) end @@ -97,7 +99,7 @@ module S3 mutex.synchronize { callback_data[:called] += 1 } end - subject.download(path, bucket: 'foo', key: 'parts', on_checksum_validated: callback) + subject.download(path, parts_params.merge(on_checksum_validated: callback)) expect(callback_data[:called]).to eq(4) end @@ -111,7 +113,7 @@ module S3 { body: 'body' } }) - subject.download(path, bucket: 'foo', key: 'single', checksum_mode: 'DISABLED') + subject.download(path, single_params.merge(checksum_mode: 'DISABLED')) end it 'downloads the file in range chunks' do @@ -121,16 +123,15 @@ module S3 { content_range: "bytes #{ranges[:start]}-#{ranges[:end]}/#{20 * one_meg}" } }) - subject.download(path, bucket: 'foo', key: 'range', chunk_size: one_meg) + subject.download(path, range_params.merge(chunk_size: one_meg)) end context 'multipart progress' do it 'reports progress for single object' do - params = { bucket: 'foo', key: 'single' } small_file_size = 1024 expect(client) .to receive(:get_object) - .with(params.merge(response_target: path, on_chunk_received: instance_of(Proc))) do |args| + .with(single_params.merge(response_target: path, on_chunk_received: instance_of(Proc))) do |args| args[:on_chunk_received].call(Tempfile.new('small-file'), small_file_size, small_file_size) end @@ -142,7 +143,7 @@ module S3 n_calls += 1 end - subject.download(path, params.merge(progress_callback: callback)) + subject.download(path, single_params.merge(progress_callback: callback)) expect(n_calls).to eq(1) end @@ -162,7 +163,7 @@ module S3 n_calls += 1 end end - subject.download(path, bucket: 'foo', key: 'parts', progress_callback: callback) + subject.download(path, parts_params.merge(progress_callback: callback)) expect(n_calls).to eq(4) end end @@ -170,13 +171,13 @@ module S3 context 'error handling' do it 'raises when checksum validation fails on single object' do client.stub_responses(:get_object, { body: 'body', checksum_sha1: 'invalid' }) - expect { subject.download(path, bucket: 'foo', key: 'single') }.to raise_error(Aws::Errors::ChecksumError) + expect { subject.download(path, single_params) }.to raise_error(Aws::Errors::ChecksumError) end it 'raises when checksum validation fails on multipart object' do client.stub_responses(:get_object, { body: 'body', checksum_sha1: 'invalid' }) expect(Thread).to receive(:new).and_yield.and_return(double(value: nil)) - expect { subject.download(path, bucket: 'foo', key: 'parts') }.to raise_error(Aws::Errors::ChecksumError) + expect { subject.download(path, parts_params) }.to raise_error(Aws::Errors::ChecksumError) end it 'raises when ETAG does not match during multipart get by ranges' do @@ -186,7 +187,7 @@ module S3 'PreconditionFailed' }) expect(Thread).to receive(:new).and_yield.and_return(double(value: nil)) - expect { subject.download(path, bucket: 'foo', key: 'range', chunk_size: one_meg, mode: 'get_range') } + expect { subject.download(path, range_params.merge(chunk_size: one_meg, mode: 'get_range')) } .to raise_error(Aws::S3::Errors::PreconditionFailed) end @@ -198,34 +199,33 @@ module S3 }) expect(Thread).to receive(:new).and_yield.and_return(double(value: nil)) - expect { subject.download(path, bucket: 'foo', key: 'parts') } - .to raise_error(Aws::S3::Errors::PreconditionFailed) + expect { subject.download(path, parts_params) }.to raise_error(Aws::S3::Errors::PreconditionFailed) end it 'raises when given an invalid mode' do - expect { subject.download(path, bucket: 'foo', key: 'parts', mode: 'invalid_mode') } + expect { subject.download(path, parts_params.merge(mode: 'invalid_mode')) } .to raise_error(ArgumentError, /Invalid mode invalid_mode provided/) end it 'raises when given an "get_range" mode without :chunk_size' do - expect { subject.download(path, bucket: 'foo', key: 'range', mode: 'get_range') } + expect { subject.download(path, range_params.merge(mode: 'get_range')) } .to raise_error(ArgumentError, /In get_range mode, :chunk_size must be provided/) end it 'raises when given :chunk_size is larger than file size' do - expect { subject.download(path, bucket: 'foo', key: 'parts', chunk_size: 50 * one_meg) } + expect { subject.download(path, range_params.merge(chunk_size: 50 * one_meg)) } .to raise_error(ArgumentError, /:chunk_size shouldn't exceed total file size/) end it 'raises when :on_checksum_validated is not callable' do - expect { subject.download(path, bucket: 'foo', key: 'parts', on_checksum_validated: 'string') } + expect { subject.download(path, parts_params.merge(on_checksum_validated: 'string')) } .to raise_error(ArgumentError, /:on_checksum_validated must be callable/) end it 'raises when range validation fails' do client.stub_responses(:get_object, { body: 'body', content_range: 'bytes 0-3/4' }) expect(Thread).to receive(:new).and_yield.and_return(double(value: nil)) - expect { subject.download(path, bucket: 'foo', key: 'parts', mode: 'get_range', chunk_size: one_meg) } + expect { subject.download(path, range_params.merge(mode: 'get_range', chunk_size: one_meg)) } .to raise_error(Aws::S3::MultipartDownloadError) end @@ -242,7 +242,7 @@ module S3 }) expect(Thread).to receive(:new).and_yield.and_return(double(value: nil)) - expect { subject.download(path, bucket: 'foo', key: 'range', chunk_size: 5 * one_meg, mode: 'get_range') } + expect { subject.download(path, range_params.merge(chunk_size: 5 * one_meg, mode: 'get_range')) } .to raise_error(Aws::S3::MultipartDownloadError) expect(File.exist?(path)).to be(true) expect(File.read(path)).to eq('existing content') diff --git a/gems/aws-sdk-s3/spec/file_uploader_spec.rb b/gems/aws-sdk-s3/spec/file_uploader_spec.rb index ce23750fe77..b9693b1d88b 100644 --- a/gems/aws-sdk-s3/spec/file_uploader_spec.rb +++ b/gems/aws-sdk-s3/spec/file_uploader_spec.rb @@ -9,6 +9,8 @@ module S3 let(:client) { S3::Client.new(stub_responses: true) } let(:subject) { FileUploader.new(client: client) } let(:params) { { bucket: 'bucket', key: 'key' } } + let(:one_meg) { 1024 * 1024 } + let(:one_mb) { '.' * one_meg } describe '#initialize' do it 'constructs a default s3 client when not given' do @@ -24,22 +26,13 @@ module S3 end it 'sets a custom multipart threshold' do - five_mb = 5 * 1024 * 1024 + five_mb = 5 * one_meg uploader = FileUploader.new(client: client, multipart_threshold: five_mb) expect(uploader.multipart_threshold).to be(five_mb) end end describe '#upload' do - let(:one_meg) { 1024 * 1024 } - let(:one_mb) { '.' * one_meg } - let(:one_meg_file) do - Tempfile.new('one-meg-file').tap do |f| - f.write(one_mb) - f.rewind - end - end - let(:ten_meg_file) do Tempfile.new('ten-meg-file').tap do |f| 10.times { f.write(one_mb) } @@ -47,21 +40,24 @@ module S3 end end - let(:one_hundred_seventeen_meg_file) do - Tempfile.new('one-hundred-seventeen-meg-file').tap do |f| - 117.times { f.write(one_mb) } + it 'uploads a small file using Client#put_object' do + file = Tempfile.new('one-meg-file').tap do |f| + f.write(one_mb) f.rewind end - end - it 'uploads a small file using Client#put_object' do - expect(client).to receive(:put_object).with({ bucket: 'bucket', key: 'key', body: one_meg_file }) - subject.upload(one_meg_file, params) + expect(client).to receive(:put_object).with({ bucket: 'bucket', key: 'key', body: file }) + subject.upload(file, params) end it 'delegates the large file to use multipart upload' do - expect_any_instance_of(MultipartFileUploader).to receive(:upload).with(one_hundred_seventeen_meg_file, params) - subject.upload(one_hundred_seventeen_meg_file, params) + large_file = Tempfile.new('one-hundred-seventeen-meg-file').tap do |f| + 117.times { f.write(one_mb) } + f.rewind + end + + expect_any_instance_of(MultipartFileUploader).to receive(:upload).with(large_file, params) + subject.upload(large_file, params) end it 'reports progress for a small file' do @@ -74,6 +70,7 @@ module S3 expect(bytes).to eq([ten_meg_file.size]) expect(totals).to eq([ten_meg_file.size]) end + subject.upload(ten_meg_file, params.merge(progress_callback: callback)) end @@ -81,15 +78,16 @@ module S3 file = double('file') expect(File).to receive(:open).with(ten_meg_file.path, 'rb').and_yield(file) expect(client).to receive(:put_object).with(params.merge(body: file)) + subject.upload(ten_meg_file.path, params) end it 'does not fail when given :thread_count' do expect(client).to receive(:put_object).with(params.merge(body: ten_meg_file)) + subject.upload(ten_meg_file, params.merge(thread_count: 1)) end end end end end - diff --git a/gems/aws-sdk-s3/spec/multipart_file_uploader_spec.rb b/gems/aws-sdk-s3/spec/multipart_file_uploader_spec.rb index a966359f190..6369f61ba99 100644 --- a/gems/aws-sdk-s3/spec/multipart_file_uploader_spec.rb +++ b/gems/aws-sdk-s3/spec/multipart_file_uploader_spec.rb @@ -31,21 +31,7 @@ module S3 describe '#upload' do let(:one_mb) { '.' * 1024 * 1024 } - let(:one_meg_file) do - Tempfile.new('one-meg-file').tap do |f| - f.write(one_mb) - f.rewind - end - end - - let(:ten_meg_file) do - Tempfile.new('ten-meg-file').tap do |f| - 10.times { f.write(one_mb) } - f.rewind - end - end - - let(:one_hundred_seventeen_meg_file) do + let(:large_file) do Tempfile.new('one-hundred-seventeen-meg-file').tap do |f| 117.times { f.write(one_mb) } f.rewind @@ -56,40 +42,40 @@ module S3 client.stub_responses(:create_multipart_upload, upload_id: 'id') client.stub_responses(:upload_part, etag: 'etag', checksum_crc32: 'checksum') expect(client).to receive(:complete_multipart_upload).with( - bucket: 'bucket', - key: 'key', - upload_id: 'id', - multipart_upload: { - parts: [ - { checksum_crc32: 'checksum', etag: 'etag', part_number: 1 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 2 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 3 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 4 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 5 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 6 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 7 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 8 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 9 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 10 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 11 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 12 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 13 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 14 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 15 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 16 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 17 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 18 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 19 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 20 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 21 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 22 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 23 }, - { checksum_crc32: 'checksum', etag: 'etag', part_number: 24 } - ] - }, - mpu_object_size: one_hundred_seventeen_meg_file.size + params.merge( + upload_id: 'id', + multipart_upload: { + parts: [ + { checksum_crc32: 'checksum', etag: 'etag', part_number: 1 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 2 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 3 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 4 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 5 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 6 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 7 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 8 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 9 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 10 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 11 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 12 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 13 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 14 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 15 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 16 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 17 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 18 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 19 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 20 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 21 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 22 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 23 }, + { checksum_crc32: 'checksum', etag: 'etag', part_number: 24 } + ] + }, + mpu_object_size: large_file.size + ) ) - subject.upload(one_hundred_seventeen_meg_file, params.merge(content_type: 'text/plain')) + subject.upload(large_file, params.merge(content_type: 'text/plain')) end it 'allows for full object checksums' do @@ -103,13 +89,8 @@ module S3 expect(client).to receive(:complete_multipart_upload) .with(hash_including(checksum_type: 'FULL_OBJECT', checksum_crc32: 'checksum')) .and_call_original - client.stub_responses(:create_multipart_upload, upload_id: 'id') - client.stub_responses(:upload_part, etag: 'etag', checksum_crc32: 'part') - subject.upload( - one_hundred_seventeen_meg_file, - params.merge(content_type: 'text/plain', checksum_crc32: 'checksum') - ) + subject.upload(large_file, params.merge(content_type: 'text/plain', checksum_crc32: 'checksum')) end it 'reports progress for multipart uploads' do @@ -125,14 +106,16 @@ module S3 expect(totals.size).to eq(24) end - subject.upload( - one_hundred_seventeen_meg_file, - params.merge(content_type: 'text/plain', progress_callback: callback) - ) + subject.upload(large_file, params.merge(content_type: 'text/plain', progress_callback: callback)) end it 'raises when given a file smaller than 5MB' do - expect { subject.upload(one_meg_file, params) } + file = Tempfile.new('one-meg-file').tap do |f| + f.write(one_mb) + f.rewind + end + + expect { subject.upload(file, params) } .to raise_error(ArgumentError, /unable to multipart upload files smaller than 5MB/) end @@ -149,9 +132,7 @@ module S3 ) expect(client).to receive(:abort_multipart_upload).with(params.merge(upload_id: 'MultipartUploadId')) - expect do - subject.upload(one_hundred_seventeen_meg_file, params) - end.to raise_error(/multipart upload failed: part 3 failed/) + expect { subject.upload(large_file, params) }.to raise_error(/multipart upload failed: part 3 failed/) end it 'reports when it is unable to abort a failed multipart upload' do @@ -170,17 +151,16 @@ module S3 ) client.stub_responses(:abort_multipart_upload, [RuntimeError.new('network-error')]) expect do - subject.upload(one_hundred_seventeen_meg_file, params) + subject.upload(large_file, params) end.to raise_error(/failed to abort multipart upload: network-error. Multipart upload failed: part failed/) end it 'aborts multipart upload when upload fails to complete' do client.stub_responses(:complete_multipart_upload, RuntimeError.new('network-error')) + expect(client).to receive(:abort_multipart_upload).with(params.merge(upload_id: 'MultipartUploadId')) - expect do - subject.upload(one_hundred_seventeen_meg_file, params) - end.to raise_error(Aws::S3::MultipartUploadError) + expect { subject.upload(large_file, params) }.to raise_error(Aws::S3::MultipartUploadError) end end end diff --git a/gems/aws-sdk-s3/spec/multipart_stream_uploader_spec.rb b/gems/aws-sdk-s3/spec/multipart_stream_uploader_spec.rb index f10a99420eb..fd74ab904e8 100644 --- a/gems/aws-sdk-s3/spec/multipart_stream_uploader_spec.rb +++ b/gems/aws-sdk-s3/spec/multipart_stream_uploader_spec.rb @@ -9,6 +9,8 @@ module S3 let(:client) { S3::Client.new(stub_responses: true) } let(:subject) { MultipartStreamUploader.new(client: client) } let(:params) { { bucket: 'bucket', key: 'key' } } + let(:one_mb) { '.' * 1024 * 1024 } + let(:seventeen_mb) { one_mb * 17 } describe '#initialize' do it 'constructs a default s3 client when none provided' do @@ -26,7 +28,7 @@ module S3 end it 'sets provided configurations' do - ten_mb = 10 * 1024 * 1024 + ten_mb = one_mb * 10 subject = MultipartStreamUploader.new(client: client, tempfile: true, part_size: ten_mb, thread_count: 1) expect(subject.client).to be(client) @@ -37,11 +39,6 @@ module S3 end describe '#upload_stream', :jruby_flaky do - let(:params) { { bucket: 'bucket', key: 'key' } } - let(:one_mb) { '.' * 1024 * 1024 } - let(:ten_mb) { one_mb * 10 } - let(:seventeen_mb) { one_mb * 17 } - it 'can upload empty stream' do client.stub_responses(:create_multipart_upload, upload_id: 'id') client.stub_responses(:upload_part, etag: 'etag') @@ -75,7 +72,6 @@ module S3 it 'uploads the correct parts' do client.stub_responses(:create_multipart_upload, upload_id: 'id') - client.stub_responses(:upload_part, etag: 'etag') 4.times.each do |p| expect(client) .to receive(:upload_part) @@ -89,7 +85,6 @@ module S3 it 'uploads the correct parts when input is chunked' do client.stub_responses(:create_multipart_upload, upload_id: 'id') - client.stub_responses(:upload_part, etag: 'etag') client.stub_responses(:complete_multipart_upload) 4.times.each do |p| expect(client) @@ -106,7 +101,6 @@ module S3 it 'passes stringios with correct contents to upload_part' do client.stub_responses(:create_multipart_upload, upload_id: 'id') - client.stub_responses(:upload_part, etag: 'etag') client.stub_responses(:complete_multipart_upload) result = [] mutex = Mutex.new @@ -169,10 +163,8 @@ module S3 RuntimeError.new('part failed') ] ) - client.stub_responses( - :abort_multipart_upload, - [RuntimeError.new('network-error')] - ) + client.stub_responses(:abort_multipart_upload, RuntimeError.new('network-error')) + expect do subject.upload(params) { |write_stream| write_stream << seventeen_mb } end.to raise_error(S3::MultipartUploadError, /failed to abort multipart upload: network-error/) @@ -201,7 +193,6 @@ module S3 it 'uploads the correct parts' do client.stub_responses(:create_multipart_upload, upload_id: 'id') - client.stub_responses(:upload_part, etag: 'etag') 4.times.each do |p| expect(client) .to receive(:upload_part) @@ -270,10 +261,8 @@ module S3 RuntimeError.new('part failed') ] ) - client.stub_responses( - :abort_multipart_upload, - [RuntimeError.new('network-error')] - ) + client.stub_responses(:abort_multipart_upload, RuntimeError.new('network-error')) + expect do subject.upload(params) { |write_stream| write_stream << seventeen_mb } end.to raise_error(S3::MultipartUploadError, /failed to abort multipart upload: network-error/) diff --git a/gems/aws-sdk-s3/spec/object/download_file_spec.rb b/gems/aws-sdk-s3/spec/object/download_file_spec.rb index 1e7fa6f2bfb..dda08d89fc0 100644 --- a/gems/aws-sdk-s3/spec/object/download_file_spec.rb +++ b/gems/aws-sdk-s3/spec/object/download_file_spec.rb @@ -7,51 +7,34 @@ module Aws module S3 describe Object do let(:client) { S3::Client.new(stub_responses: true) } - let(:tmpdir) { Dir.tmpdir } + let(:subject) { S3::Object.new(bucket_name: 'bucket', key: 'key', client: client) } describe '#download_file', :jruby_flaky do let(:path) { Tempfile.new('destination').path } - let(:one_meg) { 1024 * 1024 } + let(:one_mb_size) { 1024 * 1024 } - before(:each) do - allow(Dir).to receive(:tmpdir).and_return(tmpdir) + before do + client.stub_responses(:head_object, content_length: one_mb_size, parts_count: nil) + client.stub_responses(:get_object, { body: 'hello-world' }) end - it 'downloads a single file' do - client.stub_responses(:head_object, { content_length: one_meg, parts_count: nil }) - expected_params = { bucket: 'bucket', key: 'small', response_target: path } - expect(client).to receive(:get_object).with(expected_params).exactly(1).times - - object = S3::Object.new(bucket_name: 'bucket', key: 'small', client: client) - object.download_file(path) + it 'returns true when download succeeds' do + expect(subject.download_file(path)).to be(true) + expect(File.read(path)).to eq('hello-world') end - it 'downloads a large file in parts' do - parts = 0 - client.stub_responses(:head_object, { content_length: 20 * one_meg, parts_count: 4 }) - client.stub_responses(:get_object, lambda { |_ctx| - parts += 1 - { body: 'body', content_range: 'bytes 0-3/4' } - }) - - object = S3::Object.new(bucket_name: 'bucket', key: 'large', client: client) - object.download_file(path) - expect(parts).to eq(4) + it 'raises when download errors' do + client.stub_responses(:head_object, 'NoSuchKey') + expect { subject.download_file(path) }.to raise_error(Aws::S3::Errors::NoSuchKey) end - it 'downloads a large file in ranges' do - client.stub_responses(:head_object, { content_length: 15 * one_meg, parts_count: nil }) - client.stub_responses(:get_object, lambda { |context| - responses = { - 'bytes=0-5242879' => { body: 'body', content_range: 'bytes 0-5242879/15728640' }, - 'bytes=5242880-10485759' => { body: 'body', content_range: 'bytes 5242880-10485759/15728640' }, - 'bytes=10485760-15728639' => { body: 'body', content_range: 'bytes 10485760-15728639/15728640' } - } - responses[context.params[:range]] - }) - - object = S3::Object.new(bucket_name: 'bucket', key: 'single', client: client) - object.download_file(path, chunk_size: 5 * one_meg, mode: 'get_range') + it 'calls progress callback when given' do + n_calls = 0 + callback = proc { |_b, _p, _t| n_calls += 1 } + expect(client).to receive(:get_object) { |args| args[:on_chunk_received]&.call('chunk', 1024, 1024) } + + subject.download_file(path, progress_callback: callback) + expect(n_calls).to eq(1) end end end diff --git a/gems/aws-sdk-s3/spec/object/upload_file_spec.rb b/gems/aws-sdk-s3/spec/object/upload_file_spec.rb index 906241b5ed3..b16df1f3f0f 100644 --- a/gems/aws-sdk-s3/spec/object/upload_file_spec.rb +++ b/gems/aws-sdk-s3/spec/object/upload_file_spec.rb @@ -7,73 +7,54 @@ module Aws module S3 describe Object do let(:client) { S3::Client.new(stub_responses: true) } + let(:subject) { S3::Object.new(bucket_name: 'bucket', key: 'key', client: client) } describe '#upload_file' do - let(:expected_params) { { bucket: 'bucket', key: 'key' } } - let(:one_meg) { 1024 * 1024 } - let(:object) { S3::Object.new(bucket_name: 'bucket', key: 'key', client: client) } - let(:one_mb) { '.' * 1024 * 1024 } + let(:mb_size) { 1024 * 1024 } + let(:mb_content) { '.' * mb_size } - let(:one_meg_file) do - Tempfile.new('one-meg-file').tap do |f| - f.write(one_mb) - f.rewind - end - end - - let(:ten_meg_file) do + let(:file) do Tempfile.new('ten-meg-file').tap do |f| - 10.times { f.write(one_mb) } + 10.times { f.write(mb_content) } f.rewind end end - let(:one_hundred_seventeen_meg_file) do + let(:large_file) do Tempfile.new('one-hundred-seventeen-meg-file').tap do |f| - 117.times { f.write(one_mb) } + 117.times { f.write(mb_content) } f.rewind end end - it 'uploads objects with custom options without mutating them' do - options = {}.freeze - expect(client).to receive(:put_object).with(expected_params.merge(body: one_meg_file)) - object.upload_file(one_meg_file, options) + it 'returns true when upload succeeds' do + expect(subject.upload_file(file)).to be(true) + end + + it 'raises when upload errors' do + client.stub_responses(:put_object, 'AccessDenied') + expect { subject.upload_file(file) }.to raise_error(Aws::S3::Errors::AccessDenied) end it 'yields the response to the given block' do - object.upload_file(ten_meg_file) do |response| + subject.upload_file(file) do |response| expect(response).to be_kind_of(Seahorse::Client::Response) expect(response.etag).to eq('ETag') end end - it 'uploads a small object' do - expect(client).to receive(:put_object).with(expected_params.merge(body: ten_meg_file)) - object.upload_file(ten_meg_file) - end + it 'calls progress callback when given' do + n_calls = 0 + callback = proc { |_b, _t| n_calls += 1 } + expect(client).to receive(:put_object) { |args| args[:on_chunk_sent]&.call('chunk', 1024, 1024) } - it 'uploads a large object' do - expect(client).to receive(:complete_multipart_upload).with( - expected_params.merge( - upload_id: 'id', - multipart_upload: { - parts: [ - { checksum_crc32: 'part', etag: 'etag', part_number: 1 }, - { checksum_crc32: 'part', etag: 'etag', part_number: 2 } - ] - }, - mpu_object_size: ten_meg_file.size - ) - ) - client.stub_responses(:create_multipart_upload, upload_id: 'id') - client.stub_responses(:upload_part, etag: 'etag', checksum_crc32: 'part') - object.upload_file(ten_meg_file, multipart_threshold: 5 * one_meg) + subject.upload_file(file, progress_callback: callback) + expect(n_calls).to eq(1) end it 'accepts an alternative multipart file threshold' do - expect(client).to receive(:put_object).with(expected_params.merge(body: one_hundred_seventeen_meg_file)) - object.upload_file(one_hundred_seventeen_meg_file, multipart_threshold: 200 * one_meg) + expect(client).to receive(:put_object).with({ bucket: 'bucket', key: 'key', body: large_file }) + subject.upload_file(large_file, multipart_threshold: 200 * mb_size) end end end diff --git a/gems/aws-sdk-s3/spec/object/upload_stream_spec.rb b/gems/aws-sdk-s3/spec/object/upload_stream_spec.rb index 3597d497b70..269b91746eb 100644 --- a/gems/aws-sdk-s3/spec/object/upload_stream_spec.rb +++ b/gems/aws-sdk-s3/spec/object/upload_stream_spec.rb @@ -7,30 +7,22 @@ module Aws module S3 describe Object do let(:client) { S3::Client.new(stub_responses: true) } + let(:subject) { S3::Object.new(bucket_name: 'bucket', key: 'key', client: client) } describe '#upload_stream', :jruby_flaky do - let(:object) { S3::Object.new(bucket_name: 'bucket', key: 'key', client: client) } let(:params) { { bucket: 'bucket', key: 'key' } } - let(:one_mb) { '.' * 1024 * 1024 } - let(:seventeen_mb) { one_mb * 17 } + let(:seventeen_mb) { '.' * 1024 * 1024 * 17 } - it 'uploads stream' do - client.stub_responses(:create_multipart_upload, upload_id: 'id') - client.stub_responses(:upload_part, etag: 'etag') - expect(client).to receive(:complete_multipart_upload).with( - bucket: 'bucket', - key: 'key', - upload_id: 'id', - multipart_upload: { - parts: [ - { etag: 'etag', part_number: 1 }, - { etag: 'etag', part_number: 2 }, - { etag: 'etag', part_number: 3 }, - { etag: 'etag', part_number: 4 } - ] - } - ).once - object.upload_stream(content_type: 'text/plain') { |write_stream| write_stream << seventeen_mb } + it 'returns true when succeeds' do + resp = subject.upload_stream(content_type: 'text/plain') { |write_stream| write_stream << seventeen_mb } + expect(resp).to be(true) + end + + it 'raises when errors' do + client.stub_responses(:upload_part, RuntimeError.new('part failed')) + expect do + subject.upload_stream { |write_stream| write_stream << seventeen_mb } + end.to raise_error(Aws::S3::MultipartUploadError, /part failed/) end it 'respects the thread_count option' do @@ -38,63 +30,19 @@ module S3 expect(Thread).to receive(:new).exactly(custom_thread_count).times.and_return(double(value: nil)) client.stub_responses(:create_multipart_upload, upload_id: 'id') client.stub_responses(:complete_multipart_upload) - object.upload_stream(thread_count: custom_thread_count) { |_write_stream| } + subject.upload_stream(thread_count: custom_thread_count) { |_write_stream| } end it 'respects the tempfile option' do client.stub_responses(:create_multipart_upload, upload_id: 'id') - client.stub_responses(:upload_part, etag: 'etag') - expect(client).to receive(:complete_multipart_upload).with( - params.merge( - upload_id: 'id', - multipart_upload: { - parts: [ - { etag: 'etag', part_number: 1 }, - { etag: 'etag', part_number: 2 }, - { etag: 'etag', part_number: 3 }, - { etag: 'etag', part_number: 4 } - ] - } - ) - ).once - object.upload_stream(tempfile: true) { |write_stream| write_stream << seventeen_mb } - end - - it 'uploads correct parts when chunked with custom part_size' do - client.stub_responses(:create_multipart_upload, upload_id: 'id') - client.stub_responses(:complete_multipart_upload) - 3.times.each do |p| + 4.times.each do |p| expect(client) .to receive(:upload_part) - .with(params.merge(upload_id: 'id', body: instance_of(StringIO), part_number: p + 1)) + .with(params.merge(upload_id: 'id', body: instance_of(Tempfile), part_number: p + 1)) .once .and_return(double(:upload_part, etag: 'etag')) end - - object.upload_stream(part_size: 7 * 1024 * 1024) do |write_stream| - 17.times { write_stream << one_mb } - end - end - - it 'passes stringios with correct contents with custom part_size' do - client.stub_responses(:create_multipart_upload, upload_id: 'id') - client.stub_responses(:complete_multipart_upload) - result = [] - mutex = Mutex.new - allow(client).to receive(:upload_part) do |part| - mutex.synchronize { result << [part[:part_number], part[:body].read.size] } - end.and_return(double(:upload_part, etag: 'etag')) - - object.upload_stream(part_size: 7 * 1024 * 1024) do |write_stream| - 17.times { write_stream << one_mb } - end - expect(result.sort_by(&:first)).to eq( - [ - [1, 7 * 1024 * 1024], - [2, 7 * 1024 * 1024], - [3, 3 * 1024 * 1024] - ] - ) + subject.upload_stream(tempfile: true) { |write_stream| write_stream << seventeen_mb } end end end diff --git a/gems/aws-sdk-s3/spec/transfer_manager_spec.rb b/gems/aws-sdk-s3/spec/transfer_manager_spec.rb index 5d4d084f2a3..a752b35f0f6 100644 --- a/gems/aws-sdk-s3/spec/transfer_manager_spec.rb +++ b/gems/aws-sdk-s3/spec/transfer_manager_spec.rb @@ -1,37 +1,119 @@ # frozen_string_literal: true require_relative 'spec_helper' +require 'tempfile' module Aws module S3 describe TransferManager do + let(:client) { S3::Client.new(stub_responses: true) } + let(:subject) { TransferManager.new(client: client) } + let(:one_mb_size) { 1024 * 1024 } + let(:one_mb_content) { '.' * one_mb_size } + describe '#initialize' do - it 'constructs a default s3 client when one is not given' do + it 'constructs a default s3 client when not given' do + client = double('client') + expect(S3::Client).to receive(:new).and_return(client) + + tm = TransferManager.new + expect(tm.client).to be(client) end end - describe '#download_file' do - it 'downloads single file' do - # TODO + describe '#download_file', :jruby_flaky do + let(:path) { Tempfile.new('destination').path } + + before do + client.stub_responses(:head_object, content_length: one_mb_size, parts_count: nil) + client.stub_responses(:get_object, { body: 'hello-world' }) end - it 'downloads larger files in parts' do - # TODO + it 'returns true when download succeeds' do + expect(subject.download_file(path, bucket: 'bucket', key: 'key')).to be(true) + expect(File.read(path)).to eq('hello-world') end - it 'downloads larger files in ranges' do - # TODO + it 'raises when download errors' do + client.stub_responses(:head_object, 'NoSuchKey') + expect { subject.download_file(path, bucket: 'bucket', key: 'missing-key') } + .to raise_error(Aws::S3::Errors::NoSuchKey) + end + + it 'calls progress callback when given' do + n_calls = 0 + callback = proc { |_b, _p, _t| n_calls += 1 } + expect(client).to receive(:get_object) { |args| args[:on_chunk_received]&.call('chunk', 1024, 1024) } + + subject.download_file(path, bucket: 'bucket', key: 'key', progress_callback: callback) + expect(n_calls).to eq(1) end end describe '#upload_file' do - # TODO + let(:file) do + Tempfile.new('ten-meg-file').tap do |f| + 10.times { f.write(one_mb_content) } + f.rewind + end + end + + let(:large_file) do + Tempfile.new('one-hundred-seventeen-meg-file').tap do |f| + 117.times { f.write(one_mb_content) } + f.rewind + end + end + + it 'returns true when upload succeeds' do + expect(subject.upload_file(file, bucket: 'bucket', key: 'key')).to be(true) + end + + it 'raises when upload errors' do + client.stub_responses(:put_object, 'AccessDenied') + expect { subject.upload_file(file, bucket: 'forbidden-bucket', key: 'key') } + .to raise_error(Aws::S3::Errors::AccessDenied) + end + + it 'yields response when block given' do + subject.upload_file(file, bucket: 'bucket', key: 'key') do |response| + expect(response).to be_kind_of(Seahorse::Client::Response) + expect(response.etag).to eq('ETag') + end + end + + it 'calls progress callback when given' do + n_calls = 0 + callback = proc { |_b, _t| n_calls += 1 } + expect(client).to receive(:put_object) { |args| args[:on_chunk_sent]&.call('chunk', 1024, 1024) } + + subject.upload_file(file, bucket: 'bucket', key: 'key', progress_callback: callback) + expect(n_calls).to eq(1) + end + + it 'accepts an alternative multipart file threshold' do + expect(client).to receive(:put_object).with({ bucket: 'bucket', key: 'key', body: large_file }) + subject.upload_file(large_file, bucket: 'bucket', key: 'key', multipart_threshold: 200 * one_mb_size) + end end - describe '#upload_stream' do - # TODO + describe '#upload_stream', :jruby_flaky do + let(:seventeen_mb) { one_mb_content * 17 } + + it 'returns true when succeeds' do + resp = subject.upload_stream(bucket: 'bucket', key: 'key', content_type: 'text/plain') do |write_stream| + write_stream << seventeen_mb + end + expect(resp).to be(true) + end + + it 'raises when errors' do + client.stub_responses(:upload_part, RuntimeError.new('part failed')) + expect do + subject.upload_stream(bucket: 'bucket', key: 'key') { |write_stream| write_stream << seventeen_mb } + end.to raise_error(Aws::S3::MultipartUploadError, /part failed/) + end end end end end - From e48f418bcda0eb92069e50ce7e6f749f8c76ab4f Mon Sep 17 00:00:00 2001 From: Juli Tera Date: Thu, 14 Aug 2025 10:38:04 -0700 Subject: [PATCH 17/27] Minor clean ups --- .../lib/aws-sdk-s3/file_uploader.rb | 9 ++++--- .../lib/aws-sdk-s3/multipart_file_uploader.rb | 9 ++++++- .../aws-sdk-s3/multipart_stream_uploader.rb | 5 ++-- .../lib/aws-sdk-s3/transfer_manager.rb | 24 ++++++++++--------- gems/aws-sdk-s3/spec/file_uploader_spec.rb | 2 +- 5 files changed, 29 insertions(+), 20 deletions(-) diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/file_uploader.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/file_uploader.rb index 7937de9bc66..0833d9086f1 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/file_uploader.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/file_uploader.rb @@ -7,7 +7,8 @@ module S3 # @api private class FileUploader - ONE_HUNDRED_MEGABYTES = 100 * 1024 * 1024 + # @api private + DEFAULT_MULTIPART_THRESHOLD = 100 * 1024 * 1024 # @param [Hash] options # @option options [Client] :client @@ -15,15 +16,13 @@ class FileUploader def initialize(options = {}) @options = options @client = options[:client] || Client.new - @multipart_threshold = options[:multipart_threshold] || - ONE_HUNDRED_MEGABYTES + @multipart_threshold = options[:multipart_threshold] || DEFAULT_MULTIPART_THRESHOLD end # @return [Client] attr_reader :client - # @return [Integer] Files larger than or equal to this in bytes are uploaded - # using a {MultipartFileUploader}. + # @return [Integer] Files larger than or equal to this in bytes are uploaded using a {MultipartFileUploader}. attr_reader :multipart_threshold # @param [String, Pathname, File, Tempfile] source The file to upload. diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_file_uploader.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_file_uploader.rb index 27844c286f5..bfb84f53a43 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_file_uploader.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_file_uploader.rb @@ -8,18 +8,25 @@ module S3 # @api private class MultipartFileUploader + # @api private MIN_PART_SIZE = 5 * 1024 * 1024 # 5MB + # @api private MAX_PARTS = 10_000 + # @api private DEFAULT_THREAD_COUNT = 10 + # @api private CREATE_OPTIONS = Set.new(Client.api.operation(:create_multipart_upload).input.shape.member_names) + # @api private COMPLETE_OPTIONS = Set.new(Client.api.operation(:complete_multipart_upload).input.shape.member_names) + # @api private UPLOAD_PART_OPTIONS = Set.new(Client.api.operation(:upload_part).input.shape.member_names) + # @api private CHECKSUM_KEYS = Set.new( Client.api.operation(:upload_part).input.shape.members.map do |n, s| n if s.location == 'header' && s.location_name.start_with?('x-amz-checksum-') @@ -27,7 +34,7 @@ class MultipartFileUploader ) # @option options [Client] :client - # @option options [Integer] :thread_count (THREAD_COUNT) + # @option options [Integer] :thread_count (DEFAULT_THREAD_COUNT) def initialize(options = {}) @client = options[:client] || Client.new @thread_count = options[:thread_count] || DEFAULT_THREAD_COUNT diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb index 6f45d8fa36c..1a31f76a599 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb @@ -9,9 +9,10 @@ module Aws module S3 # @api private class MultipartStreamUploader - # api private + # @api private DEFAULT_PART_SIZE = 5 * 1024 * 1024 # 5MB + # @api private DEFAULT_THREAD_COUNT = 10 # @api private @@ -36,7 +37,7 @@ def initialize(options = {}) # @option options [required,String] :bucket # @option options [required,String] :key - # @option options [Integer] :thread_count (10) + # @option options [Integer] :thread_count (DEFAULT_THREAD_COUNT) # @return [Seahorse::Client::Response] - the CompleteMultipartUploadResponse def upload(options = {}, &block) Aws::Plugins::UserAgent.metric('S3_TRANSFER') do diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb index 40f52ea1084..ee331a5a0f4 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb @@ -6,9 +6,11 @@ module S3 # capabilities with automatic multipart handling, progress tracking, and # handling of large files. The following features are supported: # - # * upload a S3 object with multipart upload + # * upload a file with multipart upload + # * upload a stream with multipart upload # * download a S3 object with multipart download # * track transfer progress by using progress listener + # class TransferManager # @param [Hash] options # @option options [S3::Client] :client (S3::Client.new) @@ -25,12 +27,12 @@ def initialize(options = {}) # # # small files (< 5MB) are downloaded in a single API call # tm = TransferManager.new - # tm.download_file('/path/to/file', bucket: 'bucket-name', key: 'key-name') + # tm.download_file('/path/to/file', bucket: 'bucket', key: 'key') # # Files larger than 5MB are downloaded using multipart method: # # # large files are split into parts and the parts are downloaded in parallel - # tm.download_file('/path/to/large_file', bucket: 'bucket-name', key: 'key-name') + # tm.download_file('/path/to/large_file', bucket: 'bucket', key: 'key') # # You can provide a callback to monitor progress of the download: # @@ -41,7 +43,7 @@ def initialize(options = {}) # puts "Part #{i + 1}: #{b} / #{part_sizes[i]}".join(' ') + "Total: #{100.0 * bytes.sum / file_size}%" # end # end - # tm.download_file('/path/to/file', bucket: 'bucket-name', key: 'key-name', progress_callback: progress) + # tm.download_file('/path/to/file', bucket: 'bucket', key: 'key', progress_callback: progress) # # @param [String] destination # Where to download the file to. @@ -102,17 +104,17 @@ def download_file(destination, bucket:, key:, **options) # # # a small file are uploaded with PutObject API # tm = TransferManager.new - # tm.upload_file('/path/to/small_file', bucket: 'bucket-name', key: 'key-name') + # tm.upload_file('/path/to/small_file', bucket: 'bucket', key: 'key') # # Files larger than or equal to `:multipart_threshold` are uploaded using multipart upload APIs. # # # large files are automatically split into parts and the parts are uploaded in parallel - # tm.upload_file('/path/to/large_file', bucket: 'bucket-name', key: 'key-name') + # tm.upload_file('/path/to/large_file', bucket: 'bucket', key: 'key') # # The response of the S3 upload API is yielded if a block given. # # # API response will have etag value of the file - # tm.upload_file('/path/to/file', bucket: 'bucket-name', key: 'key-name') do |response| + # tm.upload_file('/path/to/file', bucket: 'bucket', key: 'key') do |response| # etag = response.etag # end # @@ -124,7 +126,7 @@ def download_file(destination, bucket:, key:, **options) # puts "Part #{i + 1}: #{b} / #{totals[i]} " + "Total: #{100.0 * bytes.sum / totals.sum}%" # end # end - # tm.upload_file('/path/to/file', bucket: 'bucket-name', key: 'key-name', progress_callback: progress) + # tm.upload_file('/path/to/file', bucket: 'bucket', key: 'key', progress_callback: progress) # # @param [String, Pathname, File, Tempfile] source # A file on the local file system that will be uploaded. This can either be a `String` or `Pathname` to the @@ -186,15 +188,15 @@ def upload_file(source, bucket:, key:, **options) # # @example Streaming chunks of data # tm = TransferManager.new - # tm.upload_stream(bucket: 'example-bucket', key: 'example-key') do |write_stream| + # tm.upload_stream(bucket: 'bucket', key: 'key') do |write_stream| # 10.times { write_stream << 'foo' } # end # @example Streaming chunks of data - # tm.upload_stream(bucket: 'example-bucket', key: 'example-key') do |write_stream| + # tm.upload_stream(bucket: 'bucket', key: 'key') do |write_stream| # IO.copy_stream(IO.popen('ls'), write_stream) # end # @example Streaming chunks of data - # tm.upload_stream(bucket: 'example-bucket', key: 'example-key') do |write_stream| + # tm.upload_stream(bucket: 'bucket', key: 'key') do |write_stream| # IO.copy_stream(STDIN, write_stream) # end # diff --git a/gems/aws-sdk-s3/spec/file_uploader_spec.rb b/gems/aws-sdk-s3/spec/file_uploader_spec.rb index b9693b1d88b..64dbaf7cdd2 100644 --- a/gems/aws-sdk-s3/spec/file_uploader_spec.rb +++ b/gems/aws-sdk-s3/spec/file_uploader_spec.rb @@ -22,7 +22,7 @@ module S3 end it 'sets a default multipart threshold when not given' do - expect(subject.multipart_threshold).to be(FileUploader::ONE_HUNDRED_MEGABYTES) + expect(subject.multipart_threshold).to be(FileUploader::DEFAULT_MULTIPART_THRESHOLD) end it 'sets a custom multipart threshold' do From 4784fd94b132cc47a6e61fd6e911c14e274fb489 Mon Sep 17 00:00:00 2001 From: Juli Tera Date: Thu, 14 Aug 2025 10:52:38 -0700 Subject: [PATCH 18/27] Update user agent tracking --- .../lib/aws-sdk-s3/customizations/object.rb | 5 +--- .../lib/aws-sdk-s3/file_downloader.rb | 23 +++++++------- .../lib/aws-sdk-s3/file_uploader.rb | 14 ++++----- .../aws-sdk-s3/multipart_stream_uploader.rb | 8 ++--- .../lib/aws-sdk-s3/object_copier.rb | 12 ++++---- .../lib/aws-sdk-s3/transfer_manager.rb | 30 +++++++++---------- 6 files changed, 42 insertions(+), 50 deletions(-) diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations/object.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations/object.rb index 4e7d4691723..213cc78e87a 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations/object.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations/object.rb @@ -400,10 +400,7 @@ def upload_stream(options = {}, &block) part_size: uploading_options.delete(:part_size) ) Aws::Plugins::UserAgent.metric('RESOURCE_MODEL') do - uploader.upload( - uploading_options.merge(bucket: bucket_name, key: key), - &block - ) + uploader.upload(uploading_options.merge(bucket: bucket_name, key: key), &block) end true end diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/file_downloader.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/file_downloader.rb index 536bbfa899b..c0492410d13 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/file_downloader.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/file_downloader.rb @@ -9,7 +9,10 @@ module S3 # @api private class FileDownloader + # @api private MIN_CHUNK_SIZE = 5 * 1024 * 1024 + + # @api private MAX_PARTS = 10_000 def initialize(options = {}) @@ -29,18 +32,16 @@ def download(destination, options = {}) @params = options validate! - Aws::Plugins::UserAgent.metric('S3_TRANSFER') do - case @mode - when 'auto' then multipart_download - when 'single_request' then single_request - when 'get_range' - raise ArgumentError, 'In get_range mode, :chunk_size must be provided' unless @chunk_size + case @mode + when 'auto' then multipart_download + when 'single_request' then single_request + when 'get_range' + raise ArgumentError, 'In get_range mode, :chunk_size must be provided' unless @chunk_size - resp = @client.head_object(@params) - multithreaded_get_by_ranges(resp.content_length, resp.etag) - else - raise ArgumentError, "Invalid mode #{@mode} provided, :mode should be single_request, get_range or auto" - end + resp = @client.head_object(@params) + multithreaded_get_by_ranges(resp.content_length, resp.etag) + else + raise ArgumentError, "Invalid mode #{@mode} provided, :mode should be single_request, get_range or auto" end File.rename(@temp_path, @path) if @temp_path ensure diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/file_uploader.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/file_uploader.rb index 0833d9086f1..f50b02bc525 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/file_uploader.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/file_uploader.rb @@ -36,14 +36,12 @@ def initialize(options = {}) # objects smaller than the multipart threshold. # @return [void] def upload(source, options = {}) - Aws::Plugins::UserAgent.metric('S3_TRANSFER') do - if File.size(source) >= multipart_threshold - MultipartFileUploader.new(@options).upload(source, options) - else - # remove multipart parameters not supported by put_object - options.delete(:thread_count) - put_object(source, options) - end + if File.size(source) >= @multipart_threshold + MultipartFileUploader.new(@options).upload(source, options) + else + # remove multipart parameters not supported by put_object + options.delete(:thread_count) + put_object(source, options) end end diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb index 1a31f76a599..d3486db2338 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb @@ -40,11 +40,9 @@ def initialize(options = {}) # @option options [Integer] :thread_count (DEFAULT_THREAD_COUNT) # @return [Seahorse::Client::Response] - the CompleteMultipartUploadResponse def upload(options = {}, &block) - Aws::Plugins::UserAgent.metric('S3_TRANSFER') do - upload_id = initiate_upload(options) - parts = upload_parts(upload_id, options, &block) - complete_upload(upload_id, parts, options) - end + upload_id = initiate_upload(options) + parts = upload_parts(upload_id, options, &block) + complete_upload(upload_id, parts, options) end private diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/object_copier.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/object_copier.rb index f81b5c4192d..ba3ddb302ca 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/object_copier.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/object_copier.rb @@ -28,13 +28,11 @@ def copy_object(source, target, options) options[:bucket] = target_bucket options[:key] = target_key options[:copy_source] = copy_source(source) - Aws::Plugins::UserAgent.metric('S3_TRANSFER') do - if options.delete(:multipart_copy) - apply_source_client(source, options) - ObjectMultipartCopier.new(@options).copy(options) - else - @object.client.copy_object(options) - end + if options.delete(:multipart_copy) + apply_source_client(source, options) + ObjectMultipartCopier.new(@options).copy(options) + else + @object.client.copy_object(options) end end diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb index ee331a5a0f4..e0244d71593 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb @@ -95,8 +95,9 @@ def initialize(options = {}) # @see Client#head_object def download_file(destination, bucket:, key:, **options) downloader = FileDownloader.new(client: @client) - # TODO: wrap with user-agent metric tracking - downloader.download(destination, options.merge(bucket: bucket, key: key)) + Aws::Plugins::UserAgent.metric('S3_TRANSFER') do + downloader.download(destination, options.merge(bucket: bucket, key: key)) + end true end @@ -168,13 +169,11 @@ def download_file(destination, bucket:, key:, **options) # @see Client#complete_multipart_upload # @see Client#upload_part def upload_file(source, bucket:, key:, **options) - uploading_options = options.dup - uploader = FileUploader.new( - multipart_threshold: uploading_options.delete(:multipart_threshold), - client: @client - ) - # TODO: wrap with user-agent metric tracking - response = uploader.upload(source, uploading_options.merge(bucket: bucket, key: key)) + upload_opts = options.dup + uploader = FileUploader.new(multipart_threshold: upload_opts.delete(:multipart_threshold), client: @client) + response = Aws::Plugins::UserAgent.metric('S3_TRANSFER') do + uploader.upload(source, upload_opts.merge(bucket: bucket, key: key)) + end yield response if block_given? true end @@ -231,15 +230,16 @@ def upload_file(source, bucket:, key:, **options) # @see Client#complete_multipart_upload # @see Client#upload_part def upload_stream(bucket:, key:, **options, &block) - uploading_options = options.dup + upload_opts = options.dup uploader = MultipartStreamUploader.new( client: @client, - thread_count: uploading_options.delete(:thread_count), - tempfile: uploading_options.delete(:tempfile), - part_size: uploading_options.delete(:part_size) + thread_count: upload_opts.delete(:thread_count), + tempfile: upload_opts.delete(:tempfile), + part_size: upload_opts.delete(:part_size) ) - # TODO: wrap with user-agent metric tracking - uploader.upload(uploading_options.merge(bucket: bucket, key: key), &block) + Aws::Plugins::UserAgent.metric('S3_TRANSFER') do + uploader.upload(upload_opts.merge(bucket: bucket, key: key), &block) + end true end end From 58d11859276b195b6a42792173c2341d00024d80 Mon Sep 17 00:00:00 2001 From: Juli Tera Date: Fri, 15 Aug 2025 07:57:33 -0700 Subject: [PATCH 19/27] Feedback - reuse existing deprecation message --- .../lib/aws-sdk-s3/customizations/object.rb | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations/object.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations/object.rb index 213cc78e87a..3977605655d 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations/object.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations/object.rb @@ -8,15 +8,6 @@ class Object # Make the method redefinable alias_method :copy_from, :copy_from - # @api private - def self.deprecation_msg(method) - "#################### DEPRECATION WARNING ####################\n"\ - "Called deprecated method `#{method}` of #{self}.\n"\ - "Use method `#{method}` from Aws::S3::TransferManager instead.\n"\ - "#{self} support will be removed in next major version.\n"\ - '#############################################################' - end - # Copies another object to this object. Use `multipart_copy: true` # for large objects. This is required for objects that exceed 5GB. # @@ -404,7 +395,7 @@ def upload_stream(options = {}, &block) end true end - deprecated(:upload_stream, message: deprecation_msg(:upload_stream)) + deprecated(:upload_stream, use: `Aws::S3::TransferManager.new.upload_stream`, version: 'next major version') # Uploads a file from disk to the current object in S3. # @@ -472,7 +463,7 @@ def upload_file(source, options = {}) yield response if block_given? true end - deprecated(:upload_file, message: deprecation_msg(:upload_file)) + deprecated(:upload_file, use: 'Aws::S3::TransferManager.new.upload_file', version: 'next major version') # Downloads a file in S3 to a path on disk. # @@ -542,7 +533,7 @@ def download_file(destination, options = {}) end true end - deprecated(:download_file, message: deprecation_msg(:download_file)) + deprecated(:download_file, use: 'Aws::S3::TransferManager.new.download_file', version: 'next major version') class Collection < Aws::Resources::Collection alias_method :delete, :batch_delete! From 7df69d4569d2b15faaad7f53fdbdde4ca7629696 Mon Sep 17 00:00:00 2001 From: Juli Tera Date: Fri, 15 Aug 2025 08:42:19 -0700 Subject: [PATCH 20/27] Feedback - remove unnecessary api private tags --- gems/aws-sdk-s3/lib/aws-sdk-s3/file_downloader.rb | 3 --- gems/aws-sdk-s3/lib/aws-sdk-s3/file_uploader.rb | 1 - .../lib/aws-sdk-s3/multipart_file_uploader.rb | 13 ------------- .../lib/aws-sdk-s3/multipart_stream_uploader.rb | 10 +--------- 4 files changed, 1 insertion(+), 26 deletions(-) diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/file_downloader.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/file_downloader.rb index c0492410d13..226a1172499 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/file_downloader.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/file_downloader.rb @@ -9,10 +9,7 @@ module S3 # @api private class FileDownloader - # @api private MIN_CHUNK_SIZE = 5 * 1024 * 1024 - - # @api private MAX_PARTS = 10_000 def initialize(options = {}) diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/file_uploader.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/file_uploader.rb index f50b02bc525..2d88520386a 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/file_uploader.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/file_uploader.rb @@ -7,7 +7,6 @@ module S3 # @api private class FileUploader - # @api private DEFAULT_MULTIPART_THRESHOLD = 100 * 1024 * 1024 # @param [Hash] options diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_file_uploader.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_file_uploader.rb index bfb84f53a43..bcc05f1fc9e 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_file_uploader.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_file_uploader.rb @@ -8,25 +8,12 @@ module S3 # @api private class MultipartFileUploader - # @api private MIN_PART_SIZE = 5 * 1024 * 1024 # 5MB - - # @api private MAX_PARTS = 10_000 - - # @api private DEFAULT_THREAD_COUNT = 10 - - # @api private CREATE_OPTIONS = Set.new(Client.api.operation(:create_multipart_upload).input.shape.member_names) - - # @api private COMPLETE_OPTIONS = Set.new(Client.api.operation(:complete_multipart_upload).input.shape.member_names) - - # @api private UPLOAD_PART_OPTIONS = Set.new(Client.api.operation(:upload_part).input.shape.member_names) - - # @api private CHECKSUM_KEYS = Set.new( Client.api.operation(:upload_part).input.shape.members.map do |n, s| n if s.location == 'header' && s.location_name.start_with?('x-amz-checksum-') diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb index d3486db2338..9a6e72a245f 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb @@ -9,19 +9,11 @@ module Aws module S3 # @api private class MultipartStreamUploader - # @api private - DEFAULT_PART_SIZE = 5 * 1024 * 1024 # 5MB - # @api private + DEFAULT_PART_SIZE = 5 * 1024 * 1024 # 5MB DEFAULT_THREAD_COUNT = 10 - - # @api private CREATE_OPTIONS = Set.new(Client.api.operation(:create_multipart_upload).input.shape.member_names) - - # @api private UPLOAD_PART_OPTIONS = Set.new(Client.api.operation(:upload_part).input.shape.member_names) - - # @api private COMPLETE_UPLOAD_OPTIONS = Set.new(Client.api.operation(:complete_multipart_upload).input.shape.member_names) # @option options [Client] :client From 08622f16f5df27d8f43357c3c9b216fc8584b98d Mon Sep 17 00:00:00 2001 From: Juli Tera Date: Fri, 15 Aug 2025 09:42:21 -0700 Subject: [PATCH 21/27] Feedback - improve ordered parts in stream upload --- .../lib/aws-sdk-s3/multipart_stream_uploader.rb | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb index 9a6e72a245f..2950bdaa2b8 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb @@ -170,7 +170,13 @@ def create_completed_part(resp, part) end def ordered_parts(parts) - parts.size.times.map { parts.pop }.sort_by { |part| part[:part_number] } + sorted = [] + until parts.empty? + part = parts.pop + index = sorted.bsearch_index { |p| p[:part_number] >= part[:part_number] } || sorted.size + sorted.insert(index, part) + end + sorted end def clear_body(body) From 9846340ef02e24c39e8e170c64ac6b6eaa19b183 Mon Sep 17 00:00:00 2001 From: Juli Tera Date: Fri, 15 Aug 2025 10:09:51 -0700 Subject: [PATCH 22/27] Revert "Update user agent tracking" This reverts commit 4784fd94b132cc47a6e61fd6e911c14e274fb489. --- .../lib/aws-sdk-s3/customizations/object.rb | 5 +++- .../lib/aws-sdk-s3/file_downloader.rb | 22 +++++++------- .../lib/aws-sdk-s3/file_uploader.rb | 14 +++++---- .../aws-sdk-s3/multipart_stream_uploader.rb | 8 +++-- .../lib/aws-sdk-s3/object_copier.rb | 12 ++++---- .../lib/aws-sdk-s3/transfer_manager.rb | 30 +++++++++---------- 6 files changed, 51 insertions(+), 40 deletions(-) diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations/object.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations/object.rb index 3977605655d..123aa9d9adc 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations/object.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations/object.rb @@ -391,7 +391,10 @@ def upload_stream(options = {}, &block) part_size: uploading_options.delete(:part_size) ) Aws::Plugins::UserAgent.metric('RESOURCE_MODEL') do - uploader.upload(uploading_options.merge(bucket: bucket_name, key: key), &block) + uploader.upload( + uploading_options.merge(bucket: bucket_name, key: key), + &block + ) end true end diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/file_downloader.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/file_downloader.rb index 226a1172499..536bbfa899b 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/file_downloader.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/file_downloader.rb @@ -29,16 +29,18 @@ def download(destination, options = {}) @params = options validate! - case @mode - when 'auto' then multipart_download - when 'single_request' then single_request - when 'get_range' - raise ArgumentError, 'In get_range mode, :chunk_size must be provided' unless @chunk_size - - resp = @client.head_object(@params) - multithreaded_get_by_ranges(resp.content_length, resp.etag) - else - raise ArgumentError, "Invalid mode #{@mode} provided, :mode should be single_request, get_range or auto" + Aws::Plugins::UserAgent.metric('S3_TRANSFER') do + case @mode + when 'auto' then multipart_download + when 'single_request' then single_request + when 'get_range' + raise ArgumentError, 'In get_range mode, :chunk_size must be provided' unless @chunk_size + + resp = @client.head_object(@params) + multithreaded_get_by_ranges(resp.content_length, resp.etag) + else + raise ArgumentError, "Invalid mode #{@mode} provided, :mode should be single_request, get_range or auto" + end end File.rename(@temp_path, @path) if @temp_path ensure diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/file_uploader.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/file_uploader.rb index 2d88520386a..587066551ea 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/file_uploader.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/file_uploader.rb @@ -35,12 +35,14 @@ def initialize(options = {}) # objects smaller than the multipart threshold. # @return [void] def upload(source, options = {}) - if File.size(source) >= @multipart_threshold - MultipartFileUploader.new(@options).upload(source, options) - else - # remove multipart parameters not supported by put_object - options.delete(:thread_count) - put_object(source, options) + Aws::Plugins::UserAgent.metric('S3_TRANSFER') do + if File.size(source) >= multipart_threshold + MultipartFileUploader.new(@options).upload(source, options) + else + # remove multipart parameters not supported by put_object + options.delete(:thread_count) + put_object(source, options) + end end end diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb index 2950bdaa2b8..60a298c720b 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_stream_uploader.rb @@ -32,9 +32,11 @@ def initialize(options = {}) # @option options [Integer] :thread_count (DEFAULT_THREAD_COUNT) # @return [Seahorse::Client::Response] - the CompleteMultipartUploadResponse def upload(options = {}, &block) - upload_id = initiate_upload(options) - parts = upload_parts(upload_id, options, &block) - complete_upload(upload_id, parts, options) + Aws::Plugins::UserAgent.metric('S3_TRANSFER') do + upload_id = initiate_upload(options) + parts = upload_parts(upload_id, options, &block) + complete_upload(upload_id, parts, options) + end end private diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/object_copier.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/object_copier.rb index ba3ddb302ca..f81b5c4192d 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/object_copier.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/object_copier.rb @@ -28,11 +28,13 @@ def copy_object(source, target, options) options[:bucket] = target_bucket options[:key] = target_key options[:copy_source] = copy_source(source) - if options.delete(:multipart_copy) - apply_source_client(source, options) - ObjectMultipartCopier.new(@options).copy(options) - else - @object.client.copy_object(options) + Aws::Plugins::UserAgent.metric('S3_TRANSFER') do + if options.delete(:multipart_copy) + apply_source_client(source, options) + ObjectMultipartCopier.new(@options).copy(options) + else + @object.client.copy_object(options) + end end end diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb index e0244d71593..ee331a5a0f4 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb @@ -95,9 +95,8 @@ def initialize(options = {}) # @see Client#head_object def download_file(destination, bucket:, key:, **options) downloader = FileDownloader.new(client: @client) - Aws::Plugins::UserAgent.metric('S3_TRANSFER') do - downloader.download(destination, options.merge(bucket: bucket, key: key)) - end + # TODO: wrap with user-agent metric tracking + downloader.download(destination, options.merge(bucket: bucket, key: key)) true end @@ -169,11 +168,13 @@ def download_file(destination, bucket:, key:, **options) # @see Client#complete_multipart_upload # @see Client#upload_part def upload_file(source, bucket:, key:, **options) - upload_opts = options.dup - uploader = FileUploader.new(multipart_threshold: upload_opts.delete(:multipart_threshold), client: @client) - response = Aws::Plugins::UserAgent.metric('S3_TRANSFER') do - uploader.upload(source, upload_opts.merge(bucket: bucket, key: key)) - end + uploading_options = options.dup + uploader = FileUploader.new( + multipart_threshold: uploading_options.delete(:multipart_threshold), + client: @client + ) + # TODO: wrap with user-agent metric tracking + response = uploader.upload(source, uploading_options.merge(bucket: bucket, key: key)) yield response if block_given? true end @@ -230,16 +231,15 @@ def upload_file(source, bucket:, key:, **options) # @see Client#complete_multipart_upload # @see Client#upload_part def upload_stream(bucket:, key:, **options, &block) - upload_opts = options.dup + uploading_options = options.dup uploader = MultipartStreamUploader.new( client: @client, - thread_count: upload_opts.delete(:thread_count), - tempfile: upload_opts.delete(:tempfile), - part_size: upload_opts.delete(:part_size) + thread_count: uploading_options.delete(:thread_count), + tempfile: uploading_options.delete(:tempfile), + part_size: uploading_options.delete(:part_size) ) - Aws::Plugins::UserAgent.metric('S3_TRANSFER') do - uploader.upload(upload_opts.merge(bucket: bucket, key: key), &block) - end + # TODO: wrap with user-agent metric tracking + uploader.upload(uploading_options.merge(bucket: bucket, key: key), &block) true end end From 58130ee975e7523b8538551cfc5f0ce2747b845b Mon Sep 17 00:00:00 2001 From: Juli Tera Date: Fri, 15 Aug 2025 10:11:10 -0700 Subject: [PATCH 23/27] Remove comments --- gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb | 3 --- 1 file changed, 3 deletions(-) diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb index ee331a5a0f4..964aebbe7a7 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb @@ -95,7 +95,6 @@ def initialize(options = {}) # @see Client#head_object def download_file(destination, bucket:, key:, **options) downloader = FileDownloader.new(client: @client) - # TODO: wrap with user-agent metric tracking downloader.download(destination, options.merge(bucket: bucket, key: key)) true end @@ -173,7 +172,6 @@ def upload_file(source, bucket:, key:, **options) multipart_threshold: uploading_options.delete(:multipart_threshold), client: @client ) - # TODO: wrap with user-agent metric tracking response = uploader.upload(source, uploading_options.merge(bucket: bucket, key: key)) yield response if block_given? true @@ -238,7 +236,6 @@ def upload_stream(bucket:, key:, **options, &block) tempfile: uploading_options.delete(:tempfile), part_size: uploading_options.delete(:part_size) ) - # TODO: wrap with user-agent metric tracking uploader.upload(uploading_options.merge(bucket: bucket, key: key), &block) true end From 78cacb999f4661331ceb5f6b931e777656a7a0ba Mon Sep 17 00:00:00 2001 From: Juli Tera Date: Fri, 15 Aug 2025 10:16:09 -0700 Subject: [PATCH 24/27] Fix syntax --- gems/aws-sdk-s3/lib/aws-sdk-s3/customizations/object.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations/object.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations/object.rb index 123aa9d9adc..3bb47632099 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations/object.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations/object.rb @@ -398,7 +398,7 @@ def upload_stream(options = {}, &block) end true end - deprecated(:upload_stream, use: `Aws::S3::TransferManager.new.upload_stream`, version: 'next major version') + deprecated(:upload_stream, use: 'Aws::S3::TransferManager.new.upload_stream', version: 'next major version') # Uploads a file from disk to the current object in S3. # From c47a6efd9851ce3886ed9dc6ca9011e4d21cea80 Mon Sep 17 00:00:00 2001 From: Juli Tera Date: Mon, 18 Aug 2025 08:19:36 -0700 Subject: [PATCH 25/27] Feedback - update deprecated use method --- gems/aws-sdk-s3/lib/aws-sdk-s3/customizations/object.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations/object.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations/object.rb index 3bb47632099..6409a0fef8b 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations/object.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/customizations/object.rb @@ -398,7 +398,7 @@ def upload_stream(options = {}, &block) end true end - deprecated(:upload_stream, use: 'Aws::S3::TransferManager.new.upload_stream', version: 'next major version') + deprecated(:upload_stream, use: 'Aws::S3::TransferManager#upload_stream', version: 'next major version') # Uploads a file from disk to the current object in S3. # @@ -466,7 +466,7 @@ def upload_file(source, options = {}) yield response if block_given? true end - deprecated(:upload_file, use: 'Aws::S3::TransferManager.new.upload_file', version: 'next major version') + deprecated(:upload_file, use: 'Aws::S3::TransferManager#upload_file', version: 'next major version') # Downloads a file in S3 to a path on disk. # @@ -536,7 +536,7 @@ def download_file(destination, options = {}) end true end - deprecated(:download_file, use: 'Aws::S3::TransferManager.new.download_file', version: 'next major version') + deprecated(:download_file, use: 'Aws::S3::TransferManager#download_file', version: 'next major version') class Collection < Aws::Resources::Collection alias_method :delete, :batch_delete! From 36c428a4c6c72c5739fe71e83c7392eaac16f107 Mon Sep 17 00:00:00 2001 From: Juli Tera Date: Mon, 18 Aug 2025 08:30:30 -0700 Subject: [PATCH 26/27] Feedback - remove unnecessary specs --- .../spec/multipart_file_uploader_spec.rb | 9 --------- .../spec/multipart_stream_uploader_spec.rb | 16 ---------------- 2 files changed, 25 deletions(-) diff --git a/gems/aws-sdk-s3/spec/multipart_file_uploader_spec.rb b/gems/aws-sdk-s3/spec/multipart_file_uploader_spec.rb index 6369f61ba99..82e147986e3 100644 --- a/gems/aws-sdk-s3/spec/multipart_file_uploader_spec.rb +++ b/gems/aws-sdk-s3/spec/multipart_file_uploader_spec.rb @@ -18,15 +18,6 @@ module S3 uploader = MultipartFileUploader.new expect(uploader.client).to be(client) end - - it 'sets a default thread count when not given' do - expect(subject.instance_variable_get(:@thread_count)).to be(MultipartFileUploader::DEFAULT_THREAD_COUNT) - end - - it 'sets a custom thread count' do - subject = MultipartFileUploader.new(client: client, thread_count: 3) - expect(subject.instance_variable_get(:@thread_count)).to be(3) - end end describe '#upload' do diff --git a/gems/aws-sdk-s3/spec/multipart_stream_uploader_spec.rb b/gems/aws-sdk-s3/spec/multipart_stream_uploader_spec.rb index fd74ab904e8..e7c2860f5a7 100644 --- a/gems/aws-sdk-s3/spec/multipart_stream_uploader_spec.rb +++ b/gems/aws-sdk-s3/spec/multipart_stream_uploader_spec.rb @@ -20,22 +20,6 @@ module S3 uploader = MultipartStreamUploader.new expect(uploader.client).to be(client) end - - it 'sets default configuration values when none provided' do - expect(subject.instance_variable_get(:@tempfile)).to be_nil - expect(subject.instance_variable_get(:@part_size)).to eq(MultipartStreamUploader::DEFAULT_PART_SIZE) - expect(subject.instance_variable_get(:@thread_count)).to eq(MultipartStreamUploader::DEFAULT_THREAD_COUNT) - end - - it 'sets provided configurations' do - ten_mb = one_mb * 10 - subject = MultipartStreamUploader.new(client: client, tempfile: true, part_size: ten_mb, thread_count: 1) - - expect(subject.client).to be(client) - expect(subject.instance_variable_get(:@tempfile)).to be(true) - expect(subject.instance_variable_get(:@part_size)).to eq(ten_mb) - expect(subject.instance_variable_get(:@thread_count)).to eq(1) - end end describe '#upload_stream', :jruby_flaky do From 05eacf2fc44eec15a9b44f2605b343c7b7c5e9c0 Mon Sep 17 00:00:00 2001 From: Juli Tera Date: Mon, 18 Aug 2025 12:55:34 -0700 Subject: [PATCH 27/27] Testing implementation --- .../lib/aws-sdk-s3/directory_uploader.rb | 91 ++++++++++++++----- .../lib/aws-sdk-s3/multipart_file_uploader.rb | 69 +++++++++++++- .../lib/aws-sdk-s3/transfer_manager.rb | 10 +- 3 files changed, 143 insertions(+), 27 deletions(-) diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/directory_uploader.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/directory_uploader.rb index 2b6e95937a2..a4e11dc5249 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/directory_uploader.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/directory_uploader.rb @@ -11,6 +11,7 @@ class DirectoryUploader def initialize(options = {}) @client = options[:client] || Client.new @thread_count = options[:thread_count] || 10 + @executor = options[:executor] end # @return [Client] @@ -27,41 +28,83 @@ def upload(source, options = {}) @s3_delimiter = upload_opts.delete(:s3_delimiter) || '/' @filter_callback = upload_opts.delete(:filter_callback) || nil - uploader = FileUploader.new(multipart_threshold: upload_opts.delete(:multipart_threshold), client: @client) - - queue = SizedQueue.new(5) # TODO: random number + uploader = FileUploader.new( + multipart_threshold: upload_opts.delete(:multipart_threshold), + client: @client, + executor: @executor + ) + @file_queue = SizedQueue.new(5) # TODO: random number for now, intended to relive backpressure @disable_queue = false + _producer = Thread.new do if @recursive - stream_recursive_files(queue) + stream_recursive_files + else + stream_direct_files + end + + # signals queue being done + if @executor + @file_queue << :done else - stream_direct_files(queue) + @thread_count.times { @file_queue << :done } end - @thread_count.times { queue << :done } end - threads = [] - @thread_count.times do - thread = Thread.new do - while (file = queue.shift) != :done - path = File.join(@source, file) - # TODO: key to consider s3_prefix and custom delimiter - uploader.upload(path, upload_opts.merge(key: file)) + if @executor + upload_with_executor(uploader, upload_opts) + else + threads = [] + @thread_count.times do + thread = Thread.new do + return if @disable_queue + + while (file = @file_queue.shift) != :done + path = File.join(@source, file) + # TODO: key to consider s3_prefix and custom delimiter + uploader.upload(path, upload_opts.merge(key: file)) + end + nil + rescue StandardError => e # TODO: handle failure policies + @disable_queue = true + e end - nil - rescue StandardError => e # TODO: handle failure policies - @disable_queue = true - queue.clear - raise e + threads << thread end - threads << thread + threads.map(&:value).compact end - threads.map(&:value).compact end private - def stream_recursive_files(queue) + def upload_with_executor(uploader, upload_opts) + total_files = 0 + completion_queue = Queue.new + errors = [] + while (file = @file_queue.shift) != :done + total_files += 1 + @executor.post(file) do |f| + begin + next if @disable_queue + + path = File.join(@source, f) + # TODO: key to consider s3_prefix and custom delimiter + uploader.upload(path, upload_opts.merge(key: f)) + rescue StandardError => e # TODO: handle failure policies + @disable_queue = true + errors << e + end + ensure + completion_queue << :done + end + end + puts 'waiting for completion' + total_files.times { completion_queue.pop } + puts 'all done waiting!' + raise StandardError, 'directory upload failed' unless errors.empty? + end + + def stream_recursive_files visited = Set.new # TODO: add filter callback Find.find(@source) do |p| @@ -81,11 +124,11 @@ def stream_recursive_files(queue) visited << absolute_path # TODO: if non-default s3_delimiter is used, validate here and fail - queue << p.sub(%r{^#{Regexp.escape(@source)}/}, '') if File.file?(p) + @file_queue << p.sub(%r{^#{Regexp.escape(@source)}/}, '') if File.file?(p) end end - def stream_direct_files(queue) + def stream_direct_files # TODO: add filter callback4 Dir.each_child(@source) do |entry| break if @disable_queue @@ -94,7 +137,7 @@ def stream_direct_files(queue) next if !@follow_symlinks && File.symlink?(path) # TODO: if non-default s3_delimiter is used, validate here and fail - queue << entry if File.file?(path) + @file_queue << entry if File.file?(path) end end end diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_file_uploader.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_file_uploader.rb index bcc05f1fc9e..fdd0732e73a 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_file_uploader.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/multipart_file_uploader.rb @@ -24,6 +24,7 @@ class MultipartFileUploader # @option options [Integer] :thread_count (DEFAULT_THREAD_COUNT) def initialize(options = {}) @client = options[:client] || Client.new + @executor = options[:executor] @thread_count = options[:thread_count] || DEFAULT_THREAD_COUNT end @@ -66,7 +67,13 @@ def complete_upload(upload_id, parts, source, options) def upload_parts(upload_id, source, options) completed = PartList.new pending = PartList.new(compute_parts(upload_id, source, options)) - errors = upload_in_threads(pending, completed, options) + errors = + if @executor + puts "Executor route - using #{@executor}" + upload_in_executor(pending, completed, options) + else + upload_in_threads(pending, completed, options) + end if errors.empty? completed.to_a.sort_by { |part| part[:part_number] } else @@ -135,6 +142,62 @@ def upload_part_opts(options) end end + def upload_in_executor(pending, completed, options) + if (callback = options[:progress_callback]) + progress = MultipartProgress.new(pending, callback) + end + max_parts = pending.count + completion_queue = Queue.new + stop_work = false + errors = [] + counter = 0 + + puts "Submitting #{max_parts} tasks" + while (part = pending.shift) + counter += 1 + puts "Submitting #{counter} task to executor" + @executor.post(part) do |p| + if stop_work + puts 'Work stopped so skipping' + completion_queue << :done + next + end + + if progress + p[:on_chunk_sent] = + proc do |_chunk, bytes, _total| + progress.call(p[:part_number], bytes) + end + end + + begin + puts "Uploading #{p[:part_number]}" + + resp = @client.upload_part(p) + p[:body].close + completed_part = { etag: resp.etag, part_number: p[:part_number] } + algorithm = resp.context.params[:checksum_algorithm] + k = "checksum_#{algorithm.downcase}".to_sym + completed_part[k] = resp.send(k) + completed.push(completed_part) + rescue StandardError => e + puts "Encountered Error #{e}" + stop_work = true + errors << e + ensure + puts 'Adding to completion queue' + completion_queue << :done + end + nil + end + end + + puts "Waiting for #{counter} completion" + max_parts.times { completion_queue.pop } + puts "Done Waiting. Result: \n Completed:#{completed} \n Error: #{errors}" + errors + end + def upload_in_threads(pending, completed, options) threads = [] if (callback = options[:progress_callback]) @@ -189,6 +252,10 @@ def initialize(parts = []) @mutex = Mutex.new end + def count + @mutex.synchronize { @parts.count } + end + def push(part) @mutex.synchronize { @parts.push(part) } end diff --git a/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb b/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb index 0586a476861..0fb509534d7 100644 --- a/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb +++ b/gems/aws-sdk-s3/lib/aws-sdk-s3/transfer_manager.rb @@ -18,11 +18,15 @@ class TransferManager # will be created automatically. def initialize(options = {}) @client = options.delete(:client) || Client.new + @executor = options.delete(:executor) end # @return [S3::Client] attr_reader :client + # @return [Object] executor + attr_reader :executor + # Downloads a file in S3 to a path on disk. # # # small files (< 5MB) are downloaded in a single API call @@ -170,7 +174,8 @@ def upload_file(source, bucket:, key:, **options) uploading_options = options.dup uploader = FileUploader.new( multipart_threshold: uploading_options.delete(:multipart_threshold), - client: @client + client: @client, + executor: @executor ) response = uploader.upload(source, uploading_options.merge(bucket: bucket, key: key)) yield response if block_given? @@ -244,7 +249,8 @@ def upload_directory(source, options = {}) upload_directory_opts = options.dup directory_uploader = DirectoryUploader.new( client: @client, - thread_count: upload_directory_opts.delete(:thread_count) + thread_count: upload_directory_opts.delete(:thread_count), + executor: @executor ) directory_uploader.upload(source, upload_directory_opts) true # TODO: need to change depending on failure policy set