diff --git a/Sources/Hub/Downloader.swift b/Sources/Hub/Downloader.swift index f52c596..dd6a921 100644 --- a/Sources/Hub/Downloader.swift +++ b/Sources/Hub/Downloader.swift @@ -11,6 +11,7 @@ import Foundation class Downloader: NSObject, ObservableObject { private(set) var destination: URL + private(set) var sourceURL: URL private let chunkSize = 10 * 1024 * 1024 // 10MB @@ -24,25 +25,68 @@ class Downloader: NSObject, ObservableObject { enum DownloadError: Error { case invalidDownloadLocation case unexpectedError + case tempFileNotFound } private(set) lazy var downloadState: CurrentValueSubject = CurrentValueSubject(.notStarted) private var stateSubscriber: Cancellable? + + private(set) var tempFilePath: URL? + private(set) var expectedSize: Int? + private(set) var downloadedSize: Int = 0 private var urlSession: URLSession? = nil + + /// Creates the incomplete file path for a given destination URL + /// This is similar to the Hugging Face Hub approach of using .incomplete files + static func incompletePath(for destination: URL) -> URL { + destination.appendingPathExtension("incomplete") + } + + /// Check if an incomplete file exists for the destination and returns its size + /// - Parameter destination: The destination URL for the download + /// - Returns: Size of the incomplete file if it exists, otherwise 0 + static func checkForIncompleteFile(at destination: URL) -> Int { + let incompletePath = Self.incompletePath(for: destination) + + if FileManager.default.fileExists(atPath: incompletePath.path) { + if let attributes = try? FileManager.default.attributesOfItem(atPath: incompletePath.path), + let fileSize = attributes[.size] as? Int + { + return fileSize + } + } + + return 0 + } init( from url: URL, to destination: URL, using authToken: String? = nil, inBackground: Bool = false, - resumeSize: Int = 0, + resumeSize: Int = 0, // Can be specified manually, but will also check for incomplete files headers: [String: String]? = nil, expectedSize: Int? = nil, timeout: TimeInterval = 10, numRetries: Int = 5 ) { self.destination = destination + sourceURL = url + self.expectedSize = expectedSize + + // Create incomplete file path based on destination + tempFilePath = Downloader.incompletePath(for: destination) + + // If resume size wasn't specified, check for an existing incomplete file + let actualResumeSize: Int = if resumeSize > 0 { + resumeSize + } else { + Downloader.checkForIncompleteFile(at: destination) + } + + downloadedSize = actualResumeSize + super.init() let sessionIdentifier = "swift-transformers.hub.downloader" @@ -55,7 +99,7 @@ class Downloader: NSObject, ObservableObject { urlSession = URLSession(configuration: config, delegate: self, delegateQueue: nil) - setupDownload(from: url, with: authToken, resumeSize: resumeSize, headers: headers, expectedSize: expectedSize, timeout: timeout, numRetries: numRetries) + setUpDownload(from: url, with: authToken, resumeSize: actualResumeSize, headers: headers, expectedSize: expectedSize, timeout: timeout, numRetries: numRetries) } /// Sets up and initiates a file download operation @@ -68,7 +112,7 @@ class Downloader: NSObject, ObservableObject { /// - expectedSize: Expected file size in bytes for validation /// - timeout: Time interval before the request times out /// - numRetries: Number of retry attempts for failed downloads - private func setupDownload( + private func setUpDownload( from url: URL, with authToken: String?, resumeSize: Int, @@ -77,59 +121,83 @@ class Downloader: NSObject, ObservableObject { timeout: TimeInterval, numRetries: Int ) { - downloadState.value = .downloading(0) urlSession?.getAllTasks { tasks in // If there's an existing pending background task with the same URL, let it proceed. if let existing = tasks.filter({ $0.originalRequest?.url == url }).first { switch existing.state { case .running: - // print("Already downloading \(url)") return case .suspended: - // print("Resuming suspended download task for \(url)") existing.resume() return - case .canceling: - // print("Starting new download task for \(url), previous was canceling") - break - case .completed: - // print("Starting new download task for \(url), previous is complete but the file is no longer present (I think it's cached)") - break + case .canceling, .completed: + existing.cancel() @unknown default: - // print("Unknown state for running task; cancelling and creating a new one") existing.cancel() } } - var request = URLRequest(url: url) - // Use headers from argument else create an empty header dictionary - var requestHeaders = headers ?? [:] - - // Populate header auth and range fields - if let authToken { - requestHeaders["Authorization"] = "Bearer \(authToken)" - } - if resumeSize > 0 { - requestHeaders["Range"] = "bytes=\(resumeSize)-" - } - - request.timeoutInterval = timeout - request.allHTTPHeaderFields = requestHeaders - Task { do { - // Create a temp file to write - let tempURL = FileManager.default.temporaryDirectory.appendingPathComponent(UUID().uuidString) - FileManager.default.createFile(atPath: tempURL.path, contents: nil) - let tempFile = try FileHandle(forWritingTo: tempURL) + // Check if incomplete file exists and get its size + var existingSize = 0 + guard let incompleteFilePath = self.tempFilePath else { + throw DownloadError.unexpectedError + } + + let fileManager = FileManager.default + if fileManager.fileExists(atPath: incompleteFilePath.path) { + let attributes = try fileManager.attributesOfItem(atPath: incompleteFilePath.path) + existingSize = attributes[.size] as? Int ?? 0 + self.downloadedSize = existingSize + } else { + // Create parent directory if needed + try fileManager.createDirectory(at: incompleteFilePath.deletingLastPathComponent(), withIntermediateDirectories: true) + + // Create empty incomplete file + fileManager.createFile(atPath: incompleteFilePath.path, contents: nil) + } + + // Set up the request with appropriate headers + var request = URLRequest(url: url) + var requestHeaders = headers ?? [:] + + if let authToken { + requestHeaders["Authorization"] = "Bearer \(authToken)" + } + + // Set Range header if we're resuming + if existingSize > 0 { + requestHeaders["Range"] = "bytes=\(existingSize)-" + + // Calculate and show initial progress + if let expectedSize, expectedSize > 0 { + let initialProgress = Double(existingSize) / Double(expectedSize) + self.downloadState.value = .downloading(initialProgress) + } else { + self.downloadState.value = .downloading(0) + } + } else { + self.downloadState.value = .downloading(0) + } + + request.timeoutInterval = timeout + request.allHTTPHeaderFields = requestHeaders + + // Open the incomplete file for writing + let tempFile = try FileHandle(forWritingTo: incompleteFilePath) + + // If resuming, seek to end of file + if existingSize > 0 { + try tempFile.seekToEnd() + } defer { tempFile.closeFile() } - try await self.httpGet(request: request, tempFile: tempFile, resumeSize: resumeSize, numRetries: numRetries, expectedSize: expectedSize) + try await self.httpGet(request: request, tempFile: tempFile, resumeSize: self.downloadedSize, numRetries: numRetries, expectedSize: expectedSize) // Clean up and move the completed download to its final destination tempFile.closeFile() - try FileManager.default.moveDownloadedFile(from: tempURL, to: self.destination) - + try fileManager.moveDownloadedFile(from: incompleteFilePath, to: self.destination) self.downloadState.value = .completed(self.destination) } catch { self.downloadState.value = .failed(error) @@ -169,15 +237,14 @@ class Downloader: NSObject, ObservableObject { // Start the download and get the byte stream let (asyncBytes, response) = try await session.bytes(for: newRequest) - guard let response = response as? HTTPURLResponse else { + guard let httpResponse = response as? HTTPURLResponse else { throw DownloadError.unexpectedError } - - guard (200..<300).contains(response.statusCode) else { + guard (200..<300).contains(httpResponse.statusCode) else { throw DownloadError.unexpectedError } - var downloadedSize = resumeSize + downloadedSize = resumeSize // Create a buffer to collect bytes before writing to disk var buffer = Data(capacity: chunkSize) @@ -218,7 +285,7 @@ class Downloader: NSObject, ObservableObject { try await httpGet( request: request, tempFile: tempFile, - resumeSize: downloadedSize, + resumeSize: self.downloadedSize, numRetries: newNumRetries - 1, expectedSize: expectedSize ) diff --git a/Sources/Hub/Hub.swift b/Sources/Hub/Hub.swift index fe8f461..0634b69 100644 --- a/Sources/Hub/Hub.swift +++ b/Sources/Hub/Hub.swift @@ -51,13 +51,13 @@ public extension Hub { } } - enum RepoType: String { + enum RepoType: String, Codable { case models case datasets case spaces } - - struct Repo { + + struct Repo: Codable { public let id: String public let type: RepoType diff --git a/Sources/Hub/HubApi.swift b/Sources/Hub/HubApi.swift index 368b805..9be1f7f 100644 --- a/Sources/Hub/HubApi.swift +++ b/Sources/Hub/HubApi.swift @@ -426,14 +426,44 @@ public extension HubApi { try prepareDestination() try prepareMetadataDestination() - let downloader = Downloader(from: source, to: destination, using: hfToken, inBackground: backgroundSession, expectedSize: remoteSize) + // Check for an existing incomplete file + let incompleteFile = Downloader.incompletePath(for: destination) + var resumeSize = 0 + + if FileManager.default.fileExists(atPath: incompleteFile.path) { + if let fileAttributes = try? FileManager.default.attributesOfItem(atPath: incompleteFile.path) { + resumeSize = (fileAttributes[FileAttributeKey.size] as? Int) ?? 0 + } + } + + let downloader = Downloader( + from: source, + to: destination, + using: hfToken, + inBackground: backgroundSession, + resumeSize: resumeSize, + expectedSize: remoteSize + ) + let downloadSubscriber = downloader.downloadState.sink { state in - if case let .downloading(progress) = state { + switch state { + case let .downloading(progress): progressHandler(progress) + case .completed, .failed, .notStarted: + break } } - _ = try withExtendedLifetime(downloadSubscriber) { - try downloader.waitUntilDone() + do { + _ = try withExtendedLifetime(downloadSubscriber) { + try downloader.waitUntilDone() + } + + try HubApi.shared.writeDownloadMetadata(commitHash: remoteCommitHash, etag: remoteEtag, metadataPath: metadataDestination) + + return destination + } catch { + // If download fails, leave the incomplete file in place for future resume + throw error } try hub.writeDownloadMetadata(commitHash: remoteCommitHash, etag: remoteEtag, metadataPath: metadataDestination) diff --git a/Tests/HubTests/DownloaderTests.swift b/Tests/HubTests/DownloaderTests.swift index 2c6ed02..1ef7c0f 100644 --- a/Tests/HubTests/DownloaderTests.swift +++ b/Tests/HubTests/DownloaderTests.swift @@ -168,4 +168,28 @@ final class DownloaderTests: XCTestCase { throw error } } + + func testAutomaticIncompleteFileDetection() throws { + let url = URL(string: "https://huggingface.co/coreml-projects/sam-2-studio/resolve/main/SAM%202%20Studio%201.1.zip")! + let destination = tempDir.appendingPathComponent("SAM%202%20Studio%201.1.zip") + + // Create a sample incomplete file with test content + let incompletePath = Downloader.incompletePath(for: destination) + try FileManager.default.createDirectory(at: incompletePath.deletingLastPathComponent(), withIntermediateDirectories: true) + let testContent = Data(repeating: 65, count: 1024) // 1KB of data + FileManager.default.createFile(atPath: incompletePath.path, contents: testContent) + + // Create a downloader for the same destination + // It should automatically detect and use the incomplete file + let downloader = Downloader( + from: url, + to: destination + ) + + // Verify the downloader found and is using the incomplete file + XCTAssertEqual(downloader.downloadedSize, 1024, "Should have detected the incomplete file and set resumeSize") + + // Clean up + try? FileManager.default.removeItem(at: incompletePath) + } }