diff --git a/Package.swift b/Package.swift index faee4068..ae497619 100644 --- a/Package.swift +++ b/Package.swift @@ -41,6 +41,12 @@ let package = Package( "Anthropic" ] ), + .library( + name: "ElevenLabs", + targets: [ + "ElevenLabs" + ] + ), .library( name: "HumeAI", targets: [ @@ -92,6 +98,7 @@ let package = Package( ], dependencies: [ .package(url: "https://github.com/vmanot/CorePersistence.git", branch: "main"), + .package(url: "https://github.com/vmanot/Media", branch: "main"), .package(url: "https://github.com/vmanot/Merge.git", branch: "master"), .package(url: "https://github.com/vmanot/NetworkKit.git", branch: "master"), .package(url: "https://github.com/vmanot/Swallow.git", branch: "master"), @@ -115,7 +122,7 @@ let package = Package( "Merge", "NetworkKit", "Swallow", - "SwiftUIX", + "SwiftUIX" ], path: "Sources/LargeLanguageModels", resources: [ @@ -191,7 +198,8 @@ let package = Package( "LargeLanguageModels", "Merge", "NetworkKit", - "Swallow" + "Swallow", + "Media" ], path: "Sources/_Gemini", swiftSettings: [ @@ -392,6 +400,7 @@ let package = Package( "Ollama", "OpenAI", "Swallow", + "NeetsAI", ], path: "Sources/AI", swiftSettings: [ @@ -563,6 +572,17 @@ let package = Package( swiftSettings: [ .enableExperimentalFeature("AccessLevelOnImport") ] + ), + .testTarget( + name: "TogetherAITests", + dependencies: [ + "AI", + "Swallow" + ], + path: "Tests/TogetherAI", + swiftSettings: [ + .enableExperimentalFeature("AccessLevelOnImport") + ] ) ] ) diff --git a/Sources/AI/AnySpeechSynthesisRequestHandling.swift b/Sources/AI/AnySpeechSynthesisRequestHandling.swift new file mode 100644 index 00000000..f6f82af9 --- /dev/null +++ b/Sources/AI/AnySpeechSynthesisRequestHandling.swift @@ -0,0 +1,44 @@ +// +// AnySpeechSynthesisRequestHandling.swift +// AI +// +// Created by Jared Davidson on 1/14/25. +// + +import ElevenLabs +import LargeLanguageModels +import NeetsAI + +// FIXME: - (@archetapp) Is this the best place to put this file? + +public struct AnySpeechSynthesisRequestHandling: Hashable { + private let _hashValue: Int + + public let base: any CoreMI._ServiceClientProtocol & SpeechSynthesisRequestHandling + + public var displayName: String { + switch base { + case is ElevenLabs.Client: + return "ElevenLabs" + case is NeetsAI.Client: + return "NeetsAI" + default: + fatalError() + } + } + + public init( + _ base: any CoreMI._ServiceClientProtocol & SpeechSynthesisRequestHandling + ) { + self.base = base + self._hashValue = ObjectIdentifier(base as AnyObject).hashValue + } + + public static func == (lhs: AnySpeechSynthesisRequestHandling, rhs: AnySpeechSynthesisRequestHandling) -> Bool { + lhs._hashValue == rhs._hashValue + } + + public func hash(into hasher: inout Hasher) { + hasher.combine(_hashValue) + } +} diff --git a/Sources/CoreMI/Intramodular/Foundation/CoreMI.RequestHandling.swift b/Sources/CoreMI/Intramodular/Foundation/CoreMI.RequestHandling.swift index 2596056a..b8ac0e05 100644 --- a/Sources/CoreMI/Intramodular/Foundation/CoreMI.RequestHandling.swift +++ b/Sources/CoreMI/Intramodular/Foundation/CoreMI.RequestHandling.swift @@ -3,6 +3,7 @@ // import Foundation +import SwallowMacrosClient extension CoreMI { /// A type that handles generative machine learning requests. @@ -12,7 +13,10 @@ extension CoreMI { /// `nil` if unknown. var _availableModels: [ModelIdentifier]? { get } - func consider(_ request: R) async throws -> CoreMI.RequestConsideration + @__unused_method + func consider( + _ request: R + ) async throws -> CoreMI.RequestConsideration func perform( _ request: Request, @@ -36,7 +40,7 @@ extension CoreMI.RequestHandling { _ request: Request, returning resultType: Result.Type ) async throws -> Result { - fatalError() + fatalError(.unimplemented) } } diff --git a/Sources/CoreMI/Intramodular/Model Identifier/ModelIdentifier.Provider.swift b/Sources/CoreMI/Intramodular/Model Identifier/ModelIdentifier.Provider.swift index 1d3b94ce..5308cb79 100644 --- a/Sources/CoreMI/Intramodular/Model Identifier/ModelIdentifier.Provider.swift +++ b/Sources/CoreMI/Intramodular/Model Identifier/ModelIdentifier.Provider.swift @@ -28,72 +28,9 @@ extension ModelIdentifier { case _Rime case _HumeAI case _NeetsAI + case _xAI case unknown(String) - - public static var apple: Self { - Self._Apple - } - - public static var fal: Self { - self._Fal - } - - public static var openAI: Self { - Self._OpenAI - } - - public static var anthropic: Self { - Self._Anthropic - } - - public static var groq: Self { - Self._Groq - } - - public static var gemini: Self { - Self._Gemini - } - - public static var perplexity: Self { - Self._Perplexity - } - - public static var jina: Self { - Self._Jina - } - - public static var voyageAI: Self { - Self._VoyageAI - } - - public static var cohere: Self { - Self._Cohere - } - - public static var elevenLabs: Self { - Self._ElevenLabs - } - - public static var togetherAI: Self { - Self._TogetherAI - } - - public static var playHT: Self { - Self._PlayHT - } - - public static var rime: Self { - Self._Rime - } - - public static var humeAI: Self { - self._HumeAI - } - - public static var neetsAI: Self { - self._NeetsAI - } } } @@ -138,6 +75,8 @@ extension ModelIdentifier.Provider: CustomStringConvertible { return "HumeAI" case ._NeetsAI: return "NeetsAI" + case ._xAI: + return "xAI" case .unknown(let provider): return provider } @@ -151,6 +90,8 @@ extension ModelIdentifier.Provider: RawRepresentable { return "anthropic" case ._Apple: return "apple" + case ._Cohere: + return "cohere" case ._Fal: return "fal" case ._Mistral: @@ -169,8 +110,6 @@ extension ModelIdentifier.Provider: RawRepresentable { return "jina" case ._VoyageAI: return "voyageAI" - case ._Cohere: - return "cohere" case ._ElevenLabs: return "elevenlabs" case ._TogetherAI: @@ -183,6 +122,8 @@ extension ModelIdentifier.Provider: RawRepresentable { return "humeAI" case ._NeetsAI: return "neetsAI" + case ._xAI: + return "xAI" case .unknown(let provider): return provider } @@ -194,36 +135,38 @@ extension ModelIdentifier.Provider: RawRepresentable { self = ._Anthropic case Self._Apple.rawValue: self = ._Apple + case Self._Cohere.rawValue: + self = ._Cohere case Self._Fal.rawValue: self = ._Fal - case Self._Mistral.rawValue: - self = ._Mistral + case Self._ElevenLabs.rawValue: + self = ._ElevenLabs case Self._Groq.rawValue: self = ._Groq case Self._OpenAI.rawValue: self = ._OpenAI case Self._Gemini.rawValue: self = ._Gemini - case Self._Perplexity.rawValue: - self = ._Perplexity + case Self._HumeAI.rawValue: + self = ._HumeAI case Self._Jina.rawValue: self = ._Jina - case Self._VoyageAI.rawValue: - self = ._VoyageAI - case Self._Cohere.rawValue: - self = ._Cohere - case Self._ElevenLabs.rawValue: - self = ._ElevenLabs - case Self._TogetherAI.rawValue: - self = ._TogetherAI + case Self._Mistral.rawValue: + self = ._Mistral + case Self._NeetsAI.rawValue: + self = ._NeetsAI + case Self._Perplexity.rawValue: + self = ._Perplexity case Self._PlayHT.rawValue: self = ._PlayHT case Self._Rime.rawValue: self = ._Rime - case Self._HumeAI.rawValue: - self = ._HumeAI - case Self._NeetsAI.rawValue: - self = ._NeetsAI + case Self._TogetherAI.rawValue: + self = ._TogetherAI + case Self._VoyageAI.rawValue: + self = ._VoyageAI + case Self._xAI.rawValue: + self = ._xAI default: self = .unknown(rawValue) } @@ -239,3 +182,71 @@ extension ModelIdentifier.Provider: Codable { try rawValue.encode(to: encoder) } } + +// MARK: - Supplementary + +extension ModelIdentifier.Provider { + public static var apple: Self { + Self._Apple + } + + public static var fal: Self { + self._Fal + } + + public static var openAI: Self { + Self._OpenAI + } + + public static var anthropic: Self { + Self._Anthropic + } + + public static var groq: Self { + Self._Groq + } + + public static var gemini: Self { + Self._Gemini + } + + public static var perplexity: Self { + Self._Perplexity + } + + public static var jina: Self { + Self._Jina + } + + public static var voyageAI: Self { + Self._VoyageAI + } + + public static var cohere: Self { + Self._Cohere + } + + public static var elevenLabs: Self { + Self._ElevenLabs + } + + public static var togetherAI: Self { + Self._TogetherAI + } + + public static var playHT: Self { + Self._PlayHT + } + + public static var rime: Self { + Self._Rime + } + + public static var humeAI: Self { + self._HumeAI + } + + public static var neetsAI: Self { + self._NeetsAI + } +} diff --git a/Sources/ElevenLabs/Intramodular/API/ElevenLabs.APISpecification.RequestBodies.swift b/Sources/ElevenLabs/Intramodular/API/ElevenLabs.APISpecification.RequestBodies.swift index 67b4f4e9..1ab5be7b 100644 --- a/Sources/ElevenLabs/Intramodular/API/ElevenLabs.APISpecification.RequestBodies.swift +++ b/Sources/ElevenLabs/Intramodular/API/ElevenLabs.APISpecification.RequestBodies.swift @@ -10,199 +10,380 @@ import SwiftAPI import Merge extension ElevenLabs.APISpecification { - public enum RequestBodies { - - } -} - -extension ElevenLabs.APISpecification.RequestBodies { - public struct SpeechRequest: Codable, Hashable, Equatable { - public let text: String - public let voiceSettings: ElevenLabs.VoiceSettings - public let model: ElevenLabs.Model - - private enum CodingKeys: String, CodingKey { - case text - case voiceSettings = "voice_settings" - case model = "model_id" - } - - public init( - text: String, - voiceSettings: ElevenLabs.VoiceSettings, - model: ElevenLabs.Model - ) { - self.text = text - self.voiceSettings = voiceSettings - self.model = model - } - } - - public struct TextToSpeechInput: Codable, Hashable { - public let voiceId: String - public let requestBody: SpeechRequest - - public init(voiceId: String, requestBody: SpeechRequest) { - self.voiceId = voiceId - self.requestBody = requestBody + enum RequestBodies { + public struct SpeechRequest: Codable, Hashable, Equatable { + public let text: String + public let languageCode: String? + public let voiceSettings: ElevenLabs.VoiceSettings + public let model: ElevenLabs.Model + + private enum CodingKeys: String, CodingKey { + case text + case voiceSettings = "voice_settings" + case model = "model_id" + case languageCode = "language_code" + } + + public init( + text: String, + languageCode: String?, + voiceSettings: ElevenLabs.VoiceSettings, + model: ElevenLabs.Model + ) { + self.text = text + self.languageCode = languageCode + self.voiceSettings = voiceSettings + self.model = model + } } - } - - public struct SpeechToSpeechInput: Codable, Hashable, HTTPRequest.Multipart.ContentConvertible, Equatable { - public let voiceId: String - public let audioURL: URL - public let model: ElevenLabs.Model - public let voiceSettings: ElevenLabs.VoiceSettings - public init( - voiceId: String, - audioURL: URL, - model: ElevenLabs.Model, - voiceSettings: ElevenLabs.VoiceSettings - ) { - self.voiceId = voiceId - self.audioURL = audioURL - self.model = model - self.voiceSettings = voiceSettings + public struct TextToSpeechInput: Codable, Hashable { + public let voiceId: String + public let requestBody: SpeechRequest + + public init(voiceId: String, requestBody: SpeechRequest) { + self.voiceId = voiceId + self.requestBody = requestBody + } } - public func __conversion() throws -> HTTPRequest.Multipart.Content { - var result = HTTPRequest.Multipart.Content() + public struct SpeechToSpeechInput: Codable, Hashable, HTTPRequest.Multipart.ContentConvertible, Equatable { + public let voiceId: String + public let audioURL: URL + public let languageCode: String? + public let model: ElevenLabs.Model + public let voiceSettings: ElevenLabs.VoiceSettings - result.append( - .text( - named: "model_id", - value: model.rawValue - ) - ) + public init( + voiceId: String, + audioURL: URL, + languageCode: String?, + model: ElevenLabs.Model, + voiceSettings: ElevenLabs.VoiceSettings + ) { + self.voiceId = voiceId + self.audioURL = audioURL + self.languageCode = languageCode + self.model = model + self.voiceSettings = voiceSettings + } - let encoder = JSONEncoder() - encoder.keyEncodingStrategy = .convertToSnakeCase - if let voiceSettingsData = try? encoder.encode(voiceSettings), - let voiceSettingsString = String( - data: voiceSettingsData, - encoding: .utf8 - ) { + public func __conversion() throws -> HTTPRequest.Multipart.Content { + var result = HTTPRequest.Multipart.Content() + result.append( .text( - named: "voice_settings", - value: voiceSettingsString + named: "model_id", + value: model.rawValue ) ) + + if let languageCode { + result.append( + .text( + named: "language_code", + value: languageCode + ) + ) + } + + let encoder = JSONEncoder() + encoder.keyEncodingStrategy = .convertToSnakeCase + if let voiceSettingsData = try? encoder.encode(voiceSettings), + let voiceSettingsString = String( + data: voiceSettingsData, + encoding: .utf8 + ) { + result.append( + .text( + named: "voice_settings", + value: voiceSettingsString + ) + ) + } + + if let fileData = try? Data(contentsOf: audioURL) { + result.append( + .file( + named: "audio", + data: fileData, + filename: audioURL.lastPathComponent, + contentType: .mpeg + ) + ) + } + + return result + } + } + + public struct AddVoiceInput: Codable, Hashable, HTTPRequest.Multipart.ContentConvertible, Equatable { + public let name: String + public let description: String + public let fileURL: URL + + public init( + name: String, + description: String, + fileURL: URL + ) { + self.name = name + self.description = description + self.fileURL = fileURL } - if let fileData = try? Data(contentsOf: audioURL) { + public func __conversion() throws -> HTTPRequest.Multipart.Content { + var result = HTTPRequest.Multipart.Content() + result.append( - .file( - named: "audio", - data: fileData, - filename: audioURL.lastPathComponent, - contentType: .mpeg + .text( + named: "name", + value: name ) ) + + result.append( + .text( + named: "description", + value: description + ) + ) + + if let fileData = try? Data(contentsOf: fileURL) { + result.append( + .file( + named: "files", + data: fileData, + filename: fileURL.lastPathComponent, + contentType: .m4a + ) + ) + } + + return result } - - return result - } - } - - public struct AddVoiceInput: Codable, Hashable, HTTPRequest.Multipart.ContentConvertible, Equatable { - public let name: String - public let description: String - public let fileURL: URL - - public init( - name: String, - description: String, - fileURL: URL - ) { - self.name = name - self.description = description - self.fileURL = fileURL } - public func __conversion() throws -> HTTPRequest.Multipart.Content { - var result = HTTPRequest.Multipart.Content() - - result.append( - .text( - named: "name", - value: name - ) - ) + public struct EditVoiceInput: Codable, Hashable, HTTPRequest.Multipart.ContentConvertible, Equatable { + public let voiceId: String + public let name: String + public let description: String? + public let fileURL: URL? - result.append( - .text( - named: "description", - value: description - ) - ) + public init( + voiceId: String, + name: String, + description: String? = nil, + fileURL: URL? = nil + ) { + self.voiceId = voiceId + self.name = name + self.description = description + self.fileURL = fileURL + } - if let fileData = try? Data(contentsOf: fileURL) { + public func __conversion() throws -> HTTPRequest.Multipart.Content { + var result = HTTPRequest.Multipart.Content() + result.append( - .file( - named: "files", - data: fileData, - filename: fileURL.lastPathComponent, - contentType: .m4a + .text( + named: "name", + value: name ) ) + + if let description = description { + result.append( + .text( + named: "description", + value: description + ) + ) + } + + if let fileURL = fileURL, + let fileData = try? Data(contentsOf: fileURL) { + result.append( + .file( + named: "files", + data: fileData, + filename: fileURL.lastPathComponent, + contentType: .m4a + ) + ) + } + + return result } - - return result } - } - - public struct EditVoiceInput: Codable, Hashable, HTTPRequest.Multipart.ContentConvertible, Equatable { - public let voiceId: String - public let name: String - public let description: String? - public let fileURL: URL? - public init( - voiceId: String, - name: String, - description: String? = nil, - fileURL: URL? = nil - ) { - self.voiceId = voiceId - self.name = name - self.description = description - self.fileURL = fileURL + public struct DubbingRequest: Codable, Hashable, HTTPRequest.Multipart.ContentConvertible { + public let name: String? + public let sourceURL: URL? + public let sourceLang: String? + public let targetLang: String + public let numSpeakers: Int? + public let watermark: Bool? + public let startTime: Int? + public let endTime: Int? + public let highestResolution: Bool? + public let dropBackgroundAudio: Bool? + public let useProfanityFilter: Bool? + public let fileData: Data? + + public init( + name: String? = nil, + sourceURL: URL? = nil, + sourceLang: String? = nil, + targetLang: String, + numSpeakers: Int? = nil, + watermark: Bool? = nil, + startTime: Int? = nil, + endTime: Int? = nil, + highestResolution: Bool? = nil, + dropBackgroundAudio: Bool? = nil, + useProfanityFilter: Bool? = nil, + fileData: Data? = nil + ) { + self.name = name + self.sourceURL = sourceURL + self.sourceLang = sourceLang + self.targetLang = targetLang + self.numSpeakers = numSpeakers + self.watermark = watermark + self.startTime = startTime + self.endTime = endTime + self.highestResolution = highestResolution + self.dropBackgroundAudio = dropBackgroundAudio + self.useProfanityFilter = useProfanityFilter + self.fileData = fileData + } + + public func __conversion() throws -> HTTPRequest.Multipart.Content { + var result = HTTPRequest.Multipart.Content() + + if let name { + result.append(.text(named: "name", value: name)) + } + + if let sourceURL { + result.append(.text(named: "source_url", value: sourceURL.absoluteString)) + } + + if let sourceLang { + result.append(.text(named: "source_lang", value: sourceLang)) + } + + result.append(.text(named: "target_lang", value: targetLang)) + + if let numSpeakers { + result.append(.text(named: "num_speakers", value: String(numSpeakers))) + } + + if let watermark { + result.append(.text(named: "watermark", value: String(watermark))) + } + + if let startTime { + result.append(.text(named: "start_time", value: String(startTime))) + } + + if let endTime { + result.append(.text(named: "end_time", value: String(endTime))) + } + + if let highestResolution { + result.append(.text(named: "highest_resolution", value: String(highestResolution))) + } + + if let dropBackgroundAudio { + result.append(.text(named: "drop_background_audio", value: String(dropBackgroundAudio))) + } + + if let useProfanityFilter { + result.append(.text(named: "use_profanity_filter", value: String(useProfanityFilter))) + } + + if let fileData { + result.append( + .file( + named: "file", + data: fileData, + filename: "input.mp4", + contentType: .mp4 + ) + ) + } + + return result + } } - - public func __conversion() throws -> HTTPRequest.Multipart.Content { - var result = HTTPRequest.Multipart.Content() + public struct DubbingInput: Codable, Hashable, HTTPRequest.Multipart.ContentConvertible { + public let voiceId: String + public let audioURL: URL + public let languageCode: String + public let model: ElevenLabs.Model + public let voiceSettings: ElevenLabs.VoiceSettings - result.append( - .text( - named: "name", - value: name - ) - ) + public init( + voiceId: String, + audioURL: URL, + languageCode: String, + model: ElevenLabs.Model, + voiceSettings: ElevenLabs.VoiceSettings + ) { + self.voiceId = voiceId + self.audioURL = audioURL + self.languageCode = languageCode + self.model = model + self.voiceSettings = voiceSettings + } - if let description = description { + public func __conversion() throws -> HTTPRequest.Multipart.Content { + var result = HTTPRequest.Multipart.Content() + result.append( .text( - named: "description", - value: description + named: "model_id", + value: model.rawValue ) ) - } - - if let fileURL = fileURL, - let fileData = try? Data(contentsOf: fileURL) { + result.append( - .file( - named: "files", - data: fileData, - filename: fileURL.lastPathComponent, - contentType: .m4a + .text( + named: "language_code", + value: languageCode ) ) + + let encoder = JSONEncoder() + encoder.keyEncodingStrategy = .convertToSnakeCase + if let voiceSettingsData = try? encoder.encode(voiceSettings), + let voiceSettingsString = String( + data: voiceSettingsData, + encoding: .utf8 + ) { + result.append( + .text( + named: "voice_settings", + value: voiceSettingsString + ) + ) + } + + if let fileData = try? Data(contentsOf: audioURL) { + result.append( + .file( + named: "audio", + data: fileData, + filename: audioURL.lastPathComponent, + contentType: .mpeg + ) + ) + } + + return result } - - return result } } } diff --git a/Sources/ElevenLabs/Intramodular/API/ElevenLabs.APISpecification.ResponseBodies.swift b/Sources/ElevenLabs/Intramodular/API/ElevenLabs.APISpecification.ResponseBodies.swift index b80abbd5..658d8de4 100644 --- a/Sources/ElevenLabs/Intramodular/API/ElevenLabs.APISpecification.ResponseBodies.swift +++ b/Sources/ElevenLabs/Intramodular/API/ElevenLabs.APISpecification.ResponseBodies.swift @@ -16,5 +16,28 @@ extension ElevenLabs.APISpecification { public struct VoiceID: Codable { public let voiceId: String } + + public struct DubbingResponse: Codable { + public let dubbingId: String + public let expectedDurationSec: Double + } + + public struct DubbingStatus: Codable { + public enum State: String, Codable { + case processing + case completed + case failed + } + + public let state: State + public let failure_reason: String? + public let progress: Double? + } + + public struct DubbingProgress: Codable { + public let status: DubbingStatus + public let expectedDuration: TimeInterval + public let dubbingId: String + } } } diff --git a/Sources/ElevenLabs/Intramodular/API/ElevenLabs.APISpecification.swift b/Sources/ElevenLabs/Intramodular/API/ElevenLabs.APISpecification.swift index fa442c34..bcb95454 100644 --- a/Sources/ElevenLabs/Intramodular/API/ElevenLabs.APISpecification.swift +++ b/Sources/ElevenLabs/Intramodular/API/ElevenLabs.APISpecification.swift @@ -94,6 +94,24 @@ extension ElevenLabs { "/v1/voices/\(context.input)" }) var deleteVoice = Endpoint() + + // Dubbing + @POST + @Path("/v1/dubbing") + @Body(multipart: .input) + var initiateDubbing = Endpoint() + + @GET + @Path({ context -> String in + "/v1/dubbing/\(context.input)/status" + }) + var getDubbingStatus = Endpoint() + + @GET + @Path({ context -> String in + "/v1/dubbing/\(context.input)" + }) + var getDubbingResult = Endpoint() } } @@ -128,9 +146,6 @@ extension ElevenLabs.APISpecification { context: DecodeOutputContext ) throws -> Output { do { - if Input.self == RequestBodies.EditVoiceInput.self { - print("TEsts") - } try response.validate() } catch { let apiError: Error diff --git a/Sources/ElevenLabs/Intramodular/ElevenLabs.Client+Dubbing.swift b/Sources/ElevenLabs/Intramodular/ElevenLabs.Client+Dubbing.swift new file mode 100644 index 00000000..35ca928c --- /dev/null +++ b/Sources/ElevenLabs/Intramodular/ElevenLabs.Client+Dubbing.swift @@ -0,0 +1,83 @@ +// +// ElevenLabs.Client+Dubbing.swift +// AI +// +// Created by Jared Davidson on 1/7/25. +// + +import Foundation + +extension ElevenLabs.Client { + public func dub( + fileData: Data? = nil, + sourceURL: URL? = nil, + name: String? = nil, + sourceLang: String? = nil, + targetLang: String,app + numSpeakers: Int? = nil, + options: DubbingOptions = .init(), + progress: @escaping (DubbingProgress) async -> Void + ) async throws -> DubbingResult { + guard fileData != nil || sourceURL != nil else { + throw NSError(domain: "ElevenLabs", code: -1, userInfo: [ + NSLocalizedDescriptionKey: "Either fileData or sourceURL must be provided" + ]) + } + + let request = ElevenLabs.APISpecification.RequestBodies.DubbingRequest( + name: name, + sourceURL: sourceURL, + sourceLang: sourceLang, + targetLang: targetLang, + numSpeakers: numSpeakers, + watermark: options.watermark, + startTime: options.startTime, + endTime: options.endTime, + highestResolution: options.highestResolution, + dropBackgroundAudio: options.dropBackgroundAudio, + useProfanityFilter: options.useProfanityFilter, + fileData: fileData + ) + + // Start dubbing process + let response = try await run(\.initiateDubbing, with: request) + let dubbingId = response.dubbingId + let expectedDuration = response.expectedDurationSec + + // Poll for status + let pollingInterval: TimeInterval = 5 // seconds + let maxAttempts = Int(ceil(expectedDuration / pollingInterval)) + 10 // Add some buffer attempts + + for _ in 0.. [ElevenLabs.Voice] { try await run(\.listVoices).voices @@ -42,27 +66,30 @@ extension ElevenLabs.Client { public func speech( for text: String, voiceID: String, + languageCode: String?, voiceSettings: ElevenLabs.VoiceSettings, model: ElevenLabs.Model ) async throws -> Data { let requestBody = ElevenLabs.APISpecification.RequestBodies.SpeechRequest( text: text, + languageCode: languageCode, voiceSettings: voiceSettings, model: model ) - return try await run(\.textToSpeech, with: .init(voiceId: voiceID, requestBody: requestBody)) } public func speechToSpeech( inputAudioURL: URL, voiceID: String, + languageCode: String?, voiceSettings: ElevenLabs.VoiceSettings, model: ElevenLabs.Model ) async throws -> Data { let input = ElevenLabs.APISpecification.RequestBodies.SpeechToSpeechInput( voiceId: voiceID, audioURL: inputAudioURL, + languageCode: languageCode, model: model, voiceSettings: voiceSettings ) @@ -107,3 +134,79 @@ extension ElevenLabs.Client { try await run(\.deleteVoice, with: voice.rawValue) } } + +// MARK: - Conformances + +extension ElevenLabs.Client: SpeechSynthesisRequestHandling { + public func availableVoices() async throws -> [AbstractVoice] { + return try await self.availableVoices().map({try $0.__conversion()}) + } + + public func speech( + for text: String, + voiceID: String, + voiceSettings: AbstractVoiceSettings, + model: String + ) async throws -> Data { + try await self.speech( + for: text, + voiceID: voiceID, + languageCode: nil, + voiceSettings: .init(settings: voiceSettings), + model: ElevenLabs.Model(rawValue: model).unwrap() + ) + } + + public func speechToSpeech( + inputAudioURL: URL, + voiceID: String, + voiceSettings: AbstractVoiceSettings, + model: String + ) async throws -> Data { + try await self.speechToSpeech( + inputAudioURL: inputAudioURL, + voiceID: voiceID, + languageCode: nil, + voiceSettings: .init(settings: voiceSettings), + model: ElevenLabs.Model(rawValue: model).unwrap() + ) + } + + public func upload( + voiceWithName name: String, + description: String, + fileURL: URL + ) async throws -> AbstractVoice.ID { + let voice: ElevenLabs.Voice.ID = try await self.upload( + voiceWithName: name, + description: description, + fileURL: fileURL + ) + + return .init(rawValue: voice.rawValue) + } + + public func edit( + voice: AbstractVoice.ID, + name: String, + description: String, + fileURL: URL? + ) async throws -> Bool { + try await self.edit( + voice: ElevenLabs.Voice.ID(rawValue: voice.rawValue), + name: name, + description: description, + fileURL: fileURL + ) + } + + public func delete( + voice: AbstractVoice.ID + ) async throws { + try await self.delete( + voice: ElevenLabs.Voice.ID( + rawValue: voice.rawValue + ) + ) + } +} diff --git a/Sources/ElevenLabs/Intramodular/ElevenLabs.Model.swift b/Sources/ElevenLabs/Intramodular/ElevenLabs.Model.swift index 6d181027..ca418154 100644 --- a/Sources/ElevenLabs/Intramodular/ElevenLabs.Model.swift +++ b/Sources/ElevenLabs/Intramodular/ElevenLabs.Model.swift @@ -20,6 +20,8 @@ extension ElevenLabs { case MultilingualV1 = "eleven_multilingual_v1" case EnglishSTSV2 = "eleven_english_sts_v2" + + case FlashV2_5 = "eleven_flash_v2_5" } } @@ -33,7 +35,7 @@ extension ElevenLabs.Model: CustomStringConvertible { extension ElevenLabs.Model: ModelIdentifierRepresentable { public init(from identifier: ModelIdentifier) throws { - guard identifier.provider == ._Groq, identifier.revision == nil else { + guard identifier.provider == ._ElevenLabs, identifier.revision == nil else { throw Never.Reason.illegal } @@ -46,7 +48,7 @@ extension ElevenLabs.Model: ModelIdentifierRepresentable { public func __conversion() -> ModelIdentifier { ModelIdentifier( - provider: ._Groq, + provider: ._ElevenLabs, name: rawValue, revision: nil ) diff --git a/Sources/ElevenLabs/Intramodular/Models/ElevenLabs.DubbingOptions.swift b/Sources/ElevenLabs/Intramodular/Models/ElevenLabs.DubbingOptions.swift new file mode 100644 index 00000000..3b7b0373 --- /dev/null +++ b/Sources/ElevenLabs/Intramodular/Models/ElevenLabs.DubbingOptions.swift @@ -0,0 +1,35 @@ +// +// ElevenLabs.DubbingOptions.swift +// AI +// +// Created by Jared Davidson on 1/7/25. +// + +import Foundation + +extension ElevenLabs.Client { + public struct DubbingOptions { + public var watermark: Bool? + public var startTime: Int? + public var endTime: Int? + public var highestResolution: Bool? + public var dropBackgroundAudio: Bool? + public var useProfanityFilter: Bool? + + public init( + watermark: Bool? = nil, + startTime: Int? = nil, + endTime: Int? = nil, + highestResolution: Bool? = nil, + dropBackgroundAudio: Bool? = nil, + useProfanityFilter: Bool? = nil + ) { + self.watermark = watermark + self.startTime = startTime + self.endTime = endTime + self.highestResolution = highestResolution + self.dropBackgroundAudio = dropBackgroundAudio + self.useProfanityFilter = useProfanityFilter + } + } +} diff --git a/Sources/ElevenLabs/Intramodular/Models/ElevenLabs.DubbingProgress.swift b/Sources/ElevenLabs/Intramodular/Models/ElevenLabs.DubbingProgress.swift new file mode 100644 index 00000000..9001e7db --- /dev/null +++ b/Sources/ElevenLabs/Intramodular/Models/ElevenLabs.DubbingProgress.swift @@ -0,0 +1,16 @@ +// +// ElevenLabs.DubbingProgress.swift +// AI +// +// Created by Jared Davidson on 1/7/25. +// + +import Foundation + +extension ElevenLabs.Client { + public struct DubbingProgress { + public let status: ElevenLabs.APISpecification.ResponseBodies.DubbingStatus + public let expectedDuration: TimeInterval + public let dubbingId: String + } +} diff --git a/Sources/ElevenLabs/Intramodular/Models/ElevenLabs.DubbingResult.swift b/Sources/ElevenLabs/Intramodular/Models/ElevenLabs.DubbingResult.swift new file mode 100644 index 00000000..634a75c2 --- /dev/null +++ b/Sources/ElevenLabs/Intramodular/Models/ElevenLabs.DubbingResult.swift @@ -0,0 +1,16 @@ +// +// ElevenLabs.DubbingResult.swift +// AI +// +// Created by Jared Davidson on 1/7/25. +// + +import Foundation + +extension ElevenLabs.Client { + public struct DubbingResult { + public let data: Data + public let dubbingId: String + public let totalDuration: TimeInterval + } +} diff --git a/Sources/ElevenLabs/Intramodular/ElevenLabs.Voice.swift b/Sources/ElevenLabs/Intramodular/Models/ElevenLabs.Voice.swift similarity index 63% rename from Sources/ElevenLabs/Intramodular/ElevenLabs.Voice.swift rename to Sources/ElevenLabs/Intramodular/Models/ElevenLabs.Voice.swift index 3a54532f..dbe29d63 100644 --- a/Sources/ElevenLabs/Intramodular/ElevenLabs.Voice.swift +++ b/Sources/ElevenLabs/Intramodular/Models/ElevenLabs.Voice.swift @@ -4,6 +4,7 @@ import Foundation import Swift +import LargeLanguageModels extension ElevenLabs { public struct Voice: Hashable, Identifiable, Sendable { @@ -42,3 +43,24 @@ extension ElevenLabs.Voice: Codable { case isOwner } } + +extension ElevenLabs.Voice: AbstractVoiceConvertible { + public func __conversion() throws -> AbstractVoice { + return AbstractVoice( + voiceID: self.voiceID, + name: self.name, + description: self.description + ) + } +} + +extension ElevenLabs.Voice: AbstractVoiceInitiable { + public init(voice: AbstractVoice) throws { + self.init( + voiceID: voice.voiceID, + name: voice.name, + description: voice.description, + isOwner: nil + ) + } +} diff --git a/Sources/ElevenLabs/Intramodular/ElevenLabs.VoiceSettings.swift b/Sources/ElevenLabs/Intramodular/Models/ElevenLabs.VoiceSettings.swift similarity index 81% rename from Sources/ElevenLabs/Intramodular/ElevenLabs.VoiceSettings.swift rename to Sources/ElevenLabs/Intramodular/Models/ElevenLabs.VoiceSettings.swift index 1ffb7947..f0a6b825 100644 --- a/Sources/ElevenLabs/Intramodular/ElevenLabs.VoiceSettings.swift +++ b/Sources/ElevenLabs/Intramodular/Models/ElevenLabs.VoiceSettings.swift @@ -3,6 +3,7 @@ // import Foundation +import LargeLanguageModels extension ElevenLabs { public struct VoiceSettings: Codable, Sendable, Hashable { @@ -98,3 +99,29 @@ extension ElevenLabs.VoiceSettings { ) } } + +// MARK: - Conformances + +extension ElevenLabs.VoiceSettings: AbstractVoiceSettingsConvertible { + public func __conversion() throws -> AbstractVoiceSettings { + return .init( + stability: stability, + similarityBoost: similarityBoost, + styleExaggeration: styleExaggeration, + speakerBoost: speakerBoost, + removeBackgroundNoise: removeBackgroundNoise + ) + } +} + +extension ElevenLabs.VoiceSettings: AbstractVoiceSettingsInitiable { + public init(settings: AbstractVoiceSettings) throws { + self.init( + stability: settings.stability, + similarityBoost: settings.similarityBoost, + styleExaggeration: settings.styleExaggeration, + speakerBoost: settings.speakerBoost, + removeBackgroundNoise: settings.removeBackgroundNoise + ) + } +} diff --git a/Sources/HumeAI/Intramodular/API/HumeAI.APISpecification.swift b/Sources/HumeAI/Intramodular/API/HumeAI.APISpecification.swift index b78e11a9..cb71af84 100644 --- a/Sources/HumeAI/Intramodular/API/HumeAI.APISpecification.swift +++ b/Sources/HumeAI/Intramodular/API/HumeAI.APISpecification.swift @@ -496,8 +496,6 @@ extension HumeAI.APISpecification { from response: HTTPResponse, context: DecodeOutputContext ) throws -> Output { - - print(response) do { try response.validate() } catch { diff --git a/Sources/HumeAI/Intramodular/HumeAI.Client+SpeechSynthesisRequestHandling.swift b/Sources/HumeAI/Intramodular/HumeAI.Client+SpeechSynthesisRequestHandling.swift new file mode 100644 index 00000000..cd5a4e8f --- /dev/null +++ b/Sources/HumeAI/Intramodular/HumeAI.Client+SpeechSynthesisRequestHandling.swift @@ -0,0 +1,44 @@ +// +// HumeAI+ElevenLabsClientProtocol.swift +// Voice +// +// Created by Jared Davidson on 11/22/24. +// + +import Foundation +import SwiftUI +import AVFoundation +import LargeLanguageModels + +extension HumeAI.Client: SpeechSynthesisRequestHandling { + public func availableVoices() async throws -> [AbstractVoice] { + return try await getAllAvailableVoices().map( + { voice in + return AbstractVoice( + voiceID: voice.id, + name: voice.name, + description: nil + ) + }) + } + + public func speech(for text: String, voiceID: String, voiceSettings: AbstractVoiceSettings, model: String) async throws -> Data { + throw HumeAI.APIError.unknown(message: "Text to speech not supported") + } + + public func speechToSpeech(inputAudioURL: URL, voiceID: String, voiceSettings: AbstractVoiceSettings, model: String) async throws -> Data { + throw HumeAI.APIError.unknown(message: "Speech to speech not supported") + } + + public func upload(voiceWithName name: String, description: String, fileURL: URL) async throws -> AbstractVoice.ID { + throw HumeAI.APIError.unknown(message: "Voice creation is not supported") + } + + public func edit(voice: AbstractVoice.ID, name: String, description: String, fileURL: URL?) async throws -> Bool { + throw HumeAI.APIError.unknown(message: "Voice creation is not supported") + } + + public func delete(voice: AbstractVoice.ID) async throws { + throw HumeAI.APIError.unknown(message: "Voice creation is not supported") + } +} diff --git a/Sources/HumeAI/Intramodular/Models/HumeAI.Chat.swift b/Sources/HumeAI/Intramodular/Models/HumeAI.Chat.swift index 8259acd0..9f04a260 100644 --- a/Sources/HumeAI/Intramodular/Models/HumeAI.Chat.swift +++ b/Sources/HumeAI/Intramodular/Models/HumeAI.Chat.swift @@ -5,6 +5,8 @@ // Created by Jared Davidson on 11/25/24. // +import Foundation + extension HumeAI { public struct Chat: Codable { public let id: String diff --git a/Sources/HumeAI/Intramodular/Models/HumeAI.ChatEvent.swift b/Sources/HumeAI/Intramodular/Models/HumeAI.ChatEvent.swift index 3d17547d..3e45ecf6 100644 --- a/Sources/HumeAI/Intramodular/Models/HumeAI.ChatEvent.swift +++ b/Sources/HumeAI/Intramodular/Models/HumeAI.ChatEvent.swift @@ -5,6 +5,8 @@ // Created by Jared Davidson on 11/25/24. // +import Foundation + extension HumeAI { public struct ChatEvent: Codable { public let id: String diff --git a/Sources/HumeAI/Intramodular/Models/HumeAI.ChatGroup.swift b/Sources/HumeAI/Intramodular/Models/HumeAI.ChatGroup.swift index 3037b590..ff0a0433 100644 --- a/Sources/HumeAI/Intramodular/Models/HumeAI.ChatGroup.swift +++ b/Sources/HumeAI/Intramodular/Models/HumeAI.ChatGroup.swift @@ -5,6 +5,8 @@ // Created by Jared Davidson on 11/25/24. // +import Foundation + extension HumeAI { public struct ChatGroup { public let id: String diff --git a/Sources/HumeAI/Intramodular/Models/HumeAI.ChatMessage.swift b/Sources/HumeAI/Intramodular/Models/HumeAI.ChatMessage.swift index 1085fbbb..cb091272 100644 --- a/Sources/HumeAI/Intramodular/Models/HumeAI.ChatMessage.swift +++ b/Sources/HumeAI/Intramodular/Models/HumeAI.ChatMessage.swift @@ -5,6 +5,8 @@ // Created by Jared Davidson on 11/25/24. // +import Foundation + extension HumeAI { public struct ChatMessage { public let role: String diff --git a/Sources/HumeAI/Intramodular/Models/HumeAI.ChatResponse.swift b/Sources/HumeAI/Intramodular/Models/HumeAI.ChatResponse.swift index 6353d5b2..2b6f2051 100644 --- a/Sources/HumeAI/Intramodular/Models/HumeAI.ChatResponse.swift +++ b/Sources/HumeAI/Intramodular/Models/HumeAI.ChatResponse.swift @@ -5,6 +5,8 @@ // Created by Jared Davidson on 11/25/24. // +import Foundation + extension HumeAI { public struct ChatResponse { public let id: String diff --git a/Sources/HumeAI/Intramodular/Models/HumeAI.Config.swift b/Sources/HumeAI/Intramodular/Models/HumeAI.Config.swift index 6cd8d926..f2debc1e 100644 --- a/Sources/HumeAI/Intramodular/Models/HumeAI.Config.swift +++ b/Sources/HumeAI/Intramodular/Models/HumeAI.Config.swift @@ -5,6 +5,8 @@ // Created by Jared Davidson on 11/25/24. // +import Foundation + extension HumeAI { public struct Config: Codable { public let id: String diff --git a/Sources/HumeAI/Intramodular/Models/HumeAI.Dataset.swift b/Sources/HumeAI/Intramodular/Models/HumeAI.Dataset.swift index ab06b510..23e315de 100644 --- a/Sources/HumeAI/Intramodular/Models/HumeAI.Dataset.swift +++ b/Sources/HumeAI/Intramodular/Models/HumeAI.Dataset.swift @@ -5,6 +5,8 @@ // Created by Jared Davidson on 11/25/24. // +import Foundation + extension HumeAI { public struct Dataset: Codable { public let id: String diff --git a/Sources/HumeAI/Intramodular/Models/HumeAI.File.swift b/Sources/HumeAI/Intramodular/Models/HumeAI.File.swift index 549e6589..56392e60 100644 --- a/Sources/HumeAI/Intramodular/Models/HumeAI.File.swift +++ b/Sources/HumeAI/Intramodular/Models/HumeAI.File.swift @@ -5,6 +5,8 @@ // Created by Jared Davidson on 11/25/24. // +import Foundation + extension HumeAI { public struct File: Codable { public let id: String diff --git a/Sources/HumeAI/Intramodular/Models/HumeAI.FileInput.swift b/Sources/HumeAI/Intramodular/Models/HumeAI.FileInput.swift index b3d57c99..9f2995fa 100644 --- a/Sources/HumeAI/Intramodular/Models/HumeAI.FileInput.swift +++ b/Sources/HumeAI/Intramodular/Models/HumeAI.FileInput.swift @@ -5,6 +5,8 @@ // Created by Jared Davidson on 11/25/24. // +import Foundation + extension HumeAI { public struct FileInput: Codable { public let url: String diff --git a/Sources/HumeAI/Intramodular/Models/HumeAI.Job.swift b/Sources/HumeAI/Intramodular/Models/HumeAI.Job.swift index 5d9587f3..0231784f 100644 --- a/Sources/HumeAI/Intramodular/Models/HumeAI.Job.swift +++ b/Sources/HumeAI/Intramodular/Models/HumeAI.Job.swift @@ -5,6 +5,8 @@ // Created by Jared Davidson on 11/25/24. // +import Foundation + extension HumeAI { // MARK: - Root Response public struct Job: Codable { diff --git a/Sources/HumeAI/Intramodular/Models/HumeAI.Models.swift b/Sources/HumeAI/Intramodular/Models/HumeAI.Models.swift index 307c30f4..0c22b653 100644 --- a/Sources/HumeAI/Intramodular/Models/HumeAI.Models.swift +++ b/Sources/HumeAI/Intramodular/Models/HumeAI.Models.swift @@ -5,6 +5,8 @@ // Created by Jared Davidson on 11/25/24. // +import Foundation + extension HumeAI { public struct Model: Codable { public let id: String diff --git a/Sources/HumeAI/Intramodular/Models/HumeAI.Prompt.swift b/Sources/HumeAI/Intramodular/Models/HumeAI.Prompt.swift index e3cd0dcd..7cdd85c1 100644 --- a/Sources/HumeAI/Intramodular/Models/HumeAI.Prompt.swift +++ b/Sources/HumeAI/Intramodular/Models/HumeAI.Prompt.swift @@ -5,6 +5,8 @@ // Created by Jared Davidson on 11/25/24. // +import Foundation + extension HumeAI { public struct Prompt: Codable { public let id: String diff --git a/Sources/HumeAI/Intramodular/Models/HumeAI.Tool.swift b/Sources/HumeAI/Intramodular/Models/HumeAI.Tool.swift index 24cf1df7..c4b6b84f 100644 --- a/Sources/HumeAI/Intramodular/Models/HumeAI.Tool.swift +++ b/Sources/HumeAI/Intramodular/Models/HumeAI.Tool.swift @@ -5,6 +5,8 @@ // Created by Jared Davidson on 11/25/24. // +import Foundation + extension HumeAI { public struct Tool: Codable { public let id: String diff --git a/Sources/LargeLanguageModels/Intramodular/AbstractVoice (WIP)/AbstractVoice.swift b/Sources/LargeLanguageModels/Intramodular/AbstractVoice (WIP)/AbstractVoice.swift new file mode 100644 index 00000000..80de4232 --- /dev/null +++ b/Sources/LargeLanguageModels/Intramodular/AbstractVoice (WIP)/AbstractVoice.swift @@ -0,0 +1,41 @@ +// +// AudioStore.swift +// Voice +// +// Created by Jared Davidson on 10/31/24. +// + +import CorePersistence +import SwiftUI +import AVFoundation +import UniformTypeIdentifiers + +public struct AbstractVoice: Codable, Hashable, Identifiable, Sendable { + public typealias ID = _TypeAssociatedID + + public let id: ID + public let voiceID: String + public let name: String + public let description: String? + + public init( + voiceID: String, + name: String, + description: String? + ) { + self.id = .init(rawValue: voiceID) + self.voiceID = voiceID + self.name = name + self.description = description + } +} + +// MARK: - Conformances + +public protocol AbstractVoiceInitiable { + init(voice: AbstractVoice) throws +} + +public protocol AbstractVoiceConvertible { + func __conversion() throws -> AbstractVoice +} diff --git a/Sources/LargeLanguageModels/Intramodular/AbstractVoice (WIP)/AbstractVoiceSettings.swift b/Sources/LargeLanguageModels/Intramodular/AbstractVoice (WIP)/AbstractVoiceSettings.swift new file mode 100644 index 00000000..b54b685f --- /dev/null +++ b/Sources/LargeLanguageModels/Intramodular/AbstractVoice (WIP)/AbstractVoiceSettings.swift @@ -0,0 +1,122 @@ +// +// VoiceStore.swift +// Voice +// +// Created by Jared Davidson on 10/30/24. +// + +import SwiftUIX +import CorePersistence + +public struct AbstractVoiceSettings: Codable, Sendable, Initiable, Equatable { + public init() { + self.init(stability: 1.0) + } + + + public enum Setting: String, Codable, Sendable { + case stability + case similarityBoost = "similarity_boost" + case styleExaggeration = "style" + case speakerBoost = "use_speaker_boost" + } + + /// Increasing stability will make the voice more consistent between re-generations, but it can also make it sounds a bit monotone. On longer text fragments it is recommended to lower this value. + /// This is a double between 0 (more variable) and 1 (more stable). + public var stability: Double + + /// Increasing the Similarity Boost setting enhances the overall voice clarity and targets speaker similarity. However, very high values can cause artifacts, so it is recommended to adjust this setting to find the optimal value. + /// This is a double between 0 (Low) and 1 (High). + public var similarityBoost: Double + + /// High values are recommended if the style of the speech should be exaggerated compared to the selected voice. Higher values can lead to more instability in the generated speech. Setting this to 0 will greatly increase generation speed and is the default setting. + public var styleExaggeration: Double + + /// Boost the similarity of the synthesized speech and the voice at the cost of some generation speed. + public var speakerBoost: Bool + + public var removeBackgroundNoise: Bool + + public init(stability: Double, + similarityBoost: Double, + styleExaggeration: Double, + speakerBoost: Bool, + removeBackgroundNoise: Bool) { + self.stability = max(0, min(1, stability)) + self.similarityBoost = max(0, min(1, similarityBoost)) + self.styleExaggeration = max(0, min(1, styleExaggeration)) + self.speakerBoost = speakerBoost + self.removeBackgroundNoise = removeBackgroundNoise + } + + public init(stability: Double? = nil, + similarityBoost: Double? = nil, + styleExaggeration: Double? = nil, + speakerBoost: Bool? = nil, + removeBackgroundNoise: Bool? = nil) { + self.stability = stability.map { max(0, min(1, $0)) } ?? 0.5 + self.similarityBoost = similarityBoost.map { max(0, min(1, $0)) } ?? 0.75 + self.styleExaggeration = styleExaggeration.map { max(0, min(1, $0)) } ?? 0 + self.speakerBoost = speakerBoost ?? true + self.removeBackgroundNoise = removeBackgroundNoise ?? false + } + + public init(stability: Double) { + self.init( + stability: stability, + similarityBoost: 0.75, + styleExaggeration: 0, + speakerBoost: true, + removeBackgroundNoise: false + ) + } + + public init(similarityBoost: Double) { + self.init( + stability: 0.5, + similarityBoost: similarityBoost, + styleExaggeration: 0, + speakerBoost: true, + removeBackgroundNoise: false + ) + } + + public init(styleExaggeration: Double) { + self.init( + stability: 0.5, + similarityBoost: 0.75, + styleExaggeration: styleExaggeration, + speakerBoost: true, + removeBackgroundNoise: false + ) + } + + public init(speakerBoost: Bool) { + self.init( + stability: 0.5, + similarityBoost: 0.75, + styleExaggeration: 0, + speakerBoost: speakerBoost, + removeBackgroundNoise: false + ) + } + + public func encode(to encoder: Encoder) throws { + var container = encoder.container(keyedBy: CodingKeys.self) + + try container.encode(stability, forKey: .stability) + try container.encode(similarityBoost, forKey: .similarityBoost) + try container.encode(styleExaggeration, forKey: .styleExaggeration) + try container.encode(speakerBoost, forKey: .speakerBoost) + try container.encode(removeBackgroundNoise, forKey: .removeBackgroundNoise) + } +} + + +public protocol AbstractVoiceSettingsInitiable { + init(settings: AbstractVoiceSettings) throws +} + +public protocol AbstractVoiceSettingsConvertible { + func __conversion() throws -> AbstractVoiceSettings +} diff --git a/Sources/LargeLanguageModels/Intramodular/AbstractVoice (WIP)/SpeechSynthesisRequestHandling.swift b/Sources/LargeLanguageModels/Intramodular/AbstractVoice (WIP)/SpeechSynthesisRequestHandling.swift new file mode 100644 index 00000000..56097b6a --- /dev/null +++ b/Sources/LargeLanguageModels/Intramodular/AbstractVoice (WIP)/SpeechSynthesisRequestHandling.swift @@ -0,0 +1,66 @@ +// +// SpeechSynthesisRequestHandling.swift +// Voice +// +// Created by Jared Davidson on 10/30/24. +// + +import Foundation +import SwiftUI + +public protocol SpeechToSpeechRequest { + +} + +public protocol SpeechToSpeechRequestHandling { + +} + +public protocol SpeechSynthesisRequestHandling: AnyObject { + func availableVoices() async throws -> [AbstractVoice] + + func speech( + for text: String, + voiceID: String, + voiceSettings: AbstractVoiceSettings, + model: String + ) async throws -> Data + + func speechToSpeech( + inputAudioURL: URL, + voiceID: String, + voiceSettings: AbstractVoiceSettings, + model: String + ) async throws -> Data + + func upload( + voiceWithName name: String, + description: String, + fileURL: URL + ) async throws -> AbstractVoice.ID + + func edit( + voice: AbstractVoice.ID, + name: String, + description: String, + fileURL: URL? + ) async throws -> Bool + + func delete(voice: AbstractVoice.ID) async throws +} + +// MARK: - Environment Key + +private struct AbstractClientKey: EnvironmentKey { + static let defaultValue: (any SpeechSynthesisRequestHandling)? = nil +} + +extension EnvironmentValues { + public var speechSynthesizer: (any SpeechSynthesisRequestHandling)? { + get { + self[AbstractClientKey.self] + } set { + self[AbstractClientKey.self] = newValue + } + } +} diff --git a/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationRequestHandling.swift b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationRequestHandling.swift new file mode 100644 index 00000000..3da60acf --- /dev/null +++ b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationRequestHandling.swift @@ -0,0 +1,71 @@ +// +// Copyright (c) Preternatural AI, Inc. +// + +import AVFoundation +import CoreMI +import Foundation +import SwiftUI + +public protocol VideoGenerationRequestHandling: CoreMI.RequestHandling { + func availableModels() async throws -> [VideoModel] + + func textToVideo( + text: String, + model: VideoModel, + settings: VideoGenerationSettings + ) async throws -> Data + + func imageToVideo( + imageURL: URL, + model: VideoModel, + settings: VideoGenerationSettings + ) async throws -> Data + + func videoToVideo( + videoURL: URL, + prompt: String, + model: VideoModel, + settings: VideoGenerationSettings + ) async throws -> Data +} + +private struct VideoGeneratorKey: EnvironmentKey { + public static let defaultValue: (any VideoGenerationRequestHandling)? = nil +} + +extension EnvironmentValues { + public var videoClient: (any VideoGenerationRequestHandling)? { + get { self[VideoGeneratorKey.self] } + set { self[VideoGeneratorKey.self] = newValue } + } +} + +public struct AnyVideoGenerationRequestHandling: Hashable { + public let base: any CoreMI._ServiceClientProtocol & VideoGenerationRequestHandling + private let _hashValue: Int + +// var displayName: String { +// switch base { +// case is FalVideoGenerationRequestHandling: +// return "Fal" +// default: +// fatalError() +// } +// } + + public init( + _ base: any CoreMI._ServiceClientProtocol & VideoGenerationRequestHandling + ) { + self.base = base + self._hashValue = ObjectIdentifier(base as AnyObject).hashValue + } + + public static func == (lhs: AnyVideoGenerationRequestHandling, rhs: AnyVideoGenerationRequestHandling) -> Bool { + lhs._hashValue == rhs._hashValue + } + + public func hash(into hasher: inout Hasher) { + hasher.combine(_hashValue) + } +} diff --git a/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.FrameRate.swift b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.FrameRate.swift new file mode 100644 index 00000000..da61c5dd --- /dev/null +++ b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.FrameRate.swift @@ -0,0 +1,16 @@ +// +// Copyright (c) Preternatural AI, Inc. +// + +import Foundation + +extension VideoGenerationSettings { + public enum FrameRate: Int, Codable, CaseIterable { + case fps8 = 8 + case fps16 = 16 + case fps24 = 24 + case fps30 = 30 + + public var fps: Int { rawValue } + } +} diff --git a/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.MotionSettings.swift b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.MotionSettings.swift new file mode 100644 index 00000000..addec423 --- /dev/null +++ b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.MotionSettings.swift @@ -0,0 +1,23 @@ +// +// Copyright (c) Preternatural AI, Inc. +// + +import Foundation + +extension VideoGenerationSettings { + public struct MotionSettings: Codable, Hashable { + public var stabilize: Bool + public var motionBucketId: Int // 0-127 + public var conditioningAugmentation: Double // 0.01-0.1 + + public init( + stabilize: Bool = true, + motionBucketId: Int = 127, + conditioningAugmentation: Double = 0.02 + ) { + self.stabilize = stabilize + self.motionBucketId = max(0, min(127, motionBucketId)) + self.conditioningAugmentation = max(0.01, min(0.1, conditioningAugmentation)) + } + } +} diff --git a/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.Quality.swift b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.Quality.swift new file mode 100644 index 00000000..5ce0de27 --- /dev/null +++ b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.Quality.swift @@ -0,0 +1,29 @@ +// +// Copyright (c) Preternatural AI, Inc. +// + +import Foundation + +extension VideoGenerationSettings { + public enum Quality: String, Codable, CaseIterable { + case draft = "draft" // 20 steps + case fast = "fast" // 30 steps + case balanced = "balanced" // 35 steps + case quality = "quality" // 40 steps + case max = "max" // 50 steps + + public var inferenceSteps: Int { + switch self { + case .draft: return 20 + case .fast: return 30 + case .balanced: return 35 + case .quality: return 40 + case .max: return 50 + } + } + + public var qualityValue: Double { + Double(inferenceSteps - 20) / 30 + } + } +} diff --git a/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.Resolution.swift b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.Resolution.swift new file mode 100644 index 00000000..a140a046 --- /dev/null +++ b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.Resolution.swift @@ -0,0 +1,163 @@ +// +// Copyright (c) Preternatural AI, Inc. +// + +import Foundation + +extension VideoGenerationSettings { + public enum Resolution: Codable, Hashable { + // Square Resolutions + case sd512x512 + case sd768x768 + case sd1024x1024 + + // Landscape HD Resolutions + case hd720p // 1280x720 + case hd1080p // 1920x1080 + case hd1440p // 2560x1440 + case uhd4k // 3840x2160 + + // Social Media Formats + case instagram // 1080x1080 + case story // 1080x1920 + case tiktok // 1080x1920 + case youtube // 1920x1080 + + // Custom Resolution + case custom(width: Int, height: Int) + + public static var allCases: [Resolution] { + [ + .sd512x512, .sd768x768, .sd1024x1024, + .hd720p, .hd1080p, .hd1440p, .uhd4k, + .instagram, .story, .tiktok, .youtube + ] + } + + public var dimensions: (width: Int, height: Int) { + switch self { + // Square Resolutions + case .sd512x512: + return (512, 512) + case .sd768x768: + return (768, 768) + case .sd1024x1024: + return (1024, 1024) + + // Landscape HD Resolutions + case .hd720p: + return (1280, 720) + case .hd1080p: + return (1920, 1080) + case .hd1440p: + return (2560, 1440) + case .uhd4k: + return (3840, 2160) + + // Social Media Formats + case .instagram: + return (1080, 1080) + case .story: + return (1080, 1920) + case .tiktok: + return (1080, 1920) + case .youtube: + return (1920, 1080) + + case .custom(let width, let height): + return (width, height) + } + } + + public var width: Int { dimensions.width } + public var height: Int { dimensions.height } + + public var aspectRatio: String { + let gcd = calculateGCD(width, height) + let simplifiedWidth = width / gcd + let simplifiedHeight = height / gcd + + // Check for common aspect ratios + switch (simplifiedWidth, simplifiedHeight) { + case (1, 1): return "1:1" // Square + case (16, 9): return "16:9" // Standard Widescreen + case (9, 16): return "9:16" // Vertical/Portrait + case (4, 3): return "4:3" // Traditional TV + case (21, 9): return "21:9" // Ultrawide + default: return "\(simplifiedWidth):\(simplifiedHeight)" + } + } + + public var resolution: String { + switch self { + case .uhd4k: + return "4K" + case .hd1440p: + return "1440p" + case .hd1080p, .youtube: + return "1080p" + case .hd720p: + return "720p" + case .instagram, .story, .tiktok: + return "1080p" + case .sd512x512: + return "512p" + case .sd768x768: + return "768p" + case .sd1024x1024: + return "1024p" + case .custom(let width, _): + if width >= 3840 { return "4K" } + if width >= 2560 { return "1440p" } + if width >= 1920 { return "1080p" } + if width >= 1280 { return "720p" } + return "\(width)p" + } + } + + public static func detectResolution(width: Int, height: Int) -> Resolution { + switch (width, height) { + case (512, 512): return .sd512x512 + case (768, 768): return .sd768x768 + case (1024, 1024): return .sd1024x1024 + case (1280, 720): return .hd720p + case (1920, 1080): return .hd1080p + case (2560, 1440): return .hd1440p + case (3840, 2160): return .uhd4k + case (1080, 1080): return .instagram + case (1080, 1920): return .story + default: return .custom(width: width, height: height) + } + } + + private func calculateGCD(_ a: Int, _ b: Int) -> Int { + var a = a + var b = b + while b != 0 { + let temp = b + b = a % b + a = temp + } + + return a + } + + public var displayName: String { + switch self { + case .sd512x512: return "512×512" + case .sd768x768: return "768×768" + case .sd1024x1024: return "1024×1024" + case .hd720p: return "HD 720p" + case .hd1080p: return "Full HD 1080p" + case .hd1440p: return "QHD 1440p" + case .uhd4k: return "4K UHD" + case .instagram: return "Instagram Square" + case .story: return "Instagram/TikTok Story" + case .tiktok: return "TikTok Video" + case .youtube: return "YouTube HD" + case .custom(let width, let height): + return "\(width)×\(height)" + } + } + } +} diff --git a/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.StyleStrength.swift b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.StyleStrength.swift new file mode 100644 index 00000000..1fdc10af --- /dev/null +++ b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.StyleStrength.swift @@ -0,0 +1,27 @@ +// +// Copyright (c) Preternatural AI, Inc. +// + +import Foundation + +extension VideoGenerationSettings { + public enum StyleStrength: String, Codable, CaseIterable { + case subtle = "subtle" // 1-5 + case balanced = "balanced" // 5-10 + case strong = "strong" // 10-15 + case extreme = "extreme" // 15-20 + + public var guidanceScale: Double { + switch self { + case .subtle: return 3.0 + case .balanced: return 7.5 + case .strong: return 12.5 + case .extreme: return 17.5 + } + } + + public var strengthValue: Double { + (guidanceScale - 1) / 19 + } + } +} diff --git a/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.swift b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.swift new file mode 100644 index 00000000..81a72cfb --- /dev/null +++ b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoGenerationSettings.swift @@ -0,0 +1,43 @@ +// +// Copyright (c) Preternatural AI, Inc. +// + +import Foundation + +public struct VideoGenerationSettings: Codable, Hashable, Equatable { + /// Duration of the generated video in seconds (1-60) + public var duration: Double { + didSet { + duration = max(1, min(60, duration)) + } + } + + public var resolution: Resolution + public var frameRate: FrameRate + public var quality: Quality + public var styleStrength: StyleStrength + public var motion: MotionSettings + public var negativePrompt: String + + public var fps: Int { frameRate.fps } + public var numInferenceSteps: Int { quality.inferenceSteps } + public var guidanceScale: Double { styleStrength.guidanceScale } + + public init( + duration: Double = 10.0, + resolution: Resolution = .sd512x512, + frameRate: FrameRate = .fps24, + quality: Quality = .balanced, + styleStrength: StyleStrength = .balanced, + motion: MotionSettings = MotionSettings(), + negativePrompt: String = "" + ) { + self.duration = max(1, min(60, duration)) + self.resolution = resolution + self.frameRate = frameRate + self.quality = quality + self.styleStrength = styleStrength + self.motion = motion + self.negativePrompt = negativePrompt + } +} diff --git a/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoModel.swift b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoModel.swift new file mode 100644 index 00000000..ed63bae8 --- /dev/null +++ b/Sources/LargeLanguageModels/Intramodular/VideoGeneration (WIP)/VideoModel.swift @@ -0,0 +1,35 @@ +// +// Copyright (c) Preternatural AI, Inc. +// + +import CorePersistence +import Foundation + +public struct VideoModel: Codable, Hashable, Identifiable { + public typealias ID = _TypeAssociatedID + + public let id: ID + public let endpoint: String + public let name: String + public let description: String? + public let capabilities: [Capability] + + public enum Capability: String, Codable { + case textToVideo + case imageToVideo + case videoToVideo + } + + public init( + endpoint: String, + name: String, + description: String?, + capabilities: [Capability] + ) { + self.id = .random() + self.endpoint = endpoint + self.name = name + self.description = description + self.capabilities = capabilities + } +} diff --git a/Sources/NeetsAI/Intramodular/API/NeetsAI.APISpecification.RequestBodies.swift b/Sources/NeetsAI/Intramodular/API/NeetsAI.APISpecification.RequestBodies.swift index 6c25fd1e..f48f0519 100644 --- a/Sources/NeetsAI/Intramodular/API/NeetsAI.APISpecification.RequestBodies.swift +++ b/Sources/NeetsAI/Intramodular/API/NeetsAI.APISpecification.RequestBodies.swift @@ -5,6 +5,8 @@ // Created by Jared Davidson on 11/22/24. // +import Foundation + extension NeetsAI.APISpecification { enum RequestBodies { struct TTSInput: Codable { diff --git a/Sources/NeetsAI/Intramodular/Models/NeetsAI.Voice.swift b/Sources/NeetsAI/Intramodular/Models/NeetsAI.Voice.swift index 2f035154..ee23943a 100644 --- a/Sources/NeetsAI/Intramodular/Models/NeetsAI.Voice.swift +++ b/Sources/NeetsAI/Intramodular/Models/NeetsAI.Voice.swift @@ -6,6 +6,7 @@ // import Foundation +import LargeLanguageModels extension NeetsAI { public struct Voice: Codable, Hashable { @@ -15,3 +16,24 @@ extension NeetsAI { public let supportedModels: [String] } } + +extension NeetsAI.Voice: AbstractVoiceConvertible { + public func __conversion() throws -> AbstractVoice { + return AbstractVoice( + voiceID: self.id, + name: self.title ?? "", + description: self.aliasOf + ) + } +} + +extension NeetsAI.Voice: AbstractVoiceInitiable { + public init(voice: AbstractVoice) throws { + self.init( + id: .init(voice.voiceID), + title: voice.name, + aliasOf: voice.description, + supportedModels: [] + ) + } +} diff --git a/Sources/NeetsAI/Intramodular/NeetsAI.Client+SpeechSynthesisRequestHandling.swift b/Sources/NeetsAI/Intramodular/NeetsAI.Client+SpeechSynthesisRequestHandling.swift new file mode 100644 index 00000000..973024bd --- /dev/null +++ b/Sources/NeetsAI/Intramodular/NeetsAI.Client+SpeechSynthesisRequestHandling.swift @@ -0,0 +1,53 @@ +// +// NeetsAI.Client+SpeechSynthesisRequestHandling.swift +// Voice +// + +import Foundation +import SwiftUI +import AVFoundation +import LargeLanguageModels + +extension NeetsAI.Client: SpeechSynthesisRequestHandling { + public func availableVoices() async throws -> [AbstractVoice] { + let voices = try await getAllAvailableVoices() + .map({ try $0.__conversion() }) + .filter({ !$0.name.isEmpty }) + .unique(by: \.name) + return voices + } + + public func speech(for text: String, voiceID: String, voiceSettings: LargeLanguageModels.AbstractVoiceSettings, model: String) async throws -> Data { + let audio = try await generateSpeech( + text: text, + voiceId: voiceID, + model: .init(rawValue: model) ?? .mistralai + ) + return audio + } + + public func speechToSpeech(inputAudioURL: URL, voiceID: String, voiceSettings: LargeLanguageModels.AbstractVoiceSettings, model: String) async throws -> Data { + throw NeetsAI.APIError.unknown(message: "Speech to speech not supported") + + } + + public func upload(voiceWithName name: String, description: String, fileURL: URL) async throws -> LargeLanguageModels.AbstractVoice.ID { + throw NeetsAI.APIError.unknown(message: "Uploading Voice is not supported") + } + + public func edit(voice: LargeLanguageModels.AbstractVoice.ID, name: String, description: String, fileURL: URL?) async throws -> Bool { + throw NeetsAI.APIError.unknown(message: "Editing Voice is not supported") + } + + public func delete(voice: LargeLanguageModels.AbstractVoice.ID) async throws { + throw NeetsAI.APIError.unknown(message: "Deleting Voice is not supported") + } +} + +// FIXME: - REMOVE ME +extension Sequence { + func unique(by keyPath: KeyPath) -> [Element] { + var seen = Set() + return filter { seen.insert($0[keyPath: keyPath]).inserted } + } +} diff --git a/Sources/NeetsAI/module.swift b/Sources/NeetsAI/module.swift index 1c4d3b99..5b26df46 100644 --- a/Sources/NeetsAI/module.swift +++ b/Sources/NeetsAI/module.swift @@ -5,3 +5,5 @@ // Created by Jared Davidson on 11/22/24. // +@_exported import Swallow +@_exported import SwallowMacrosClient diff --git a/Sources/Ollama/Intramodular/Ollama.swift b/Sources/Ollama/Intramodular/Ollama.swift index 0bbb9583..491fe4e6 100644 --- a/Sources/Ollama/Intramodular/Ollama.swift +++ b/Sources/Ollama/Intramodular/Ollama.swift @@ -86,7 +86,7 @@ extension Ollama: CoreMI._ServiceClientProtocol { } -extension CoreMI._ServiceClientProtocol where Self == Ollama { +extension PersistentlyRepresentableType where Self == Ollama { public init( account: (any CoreMI._ServiceAccountProtocol)? ) async throws { diff --git a/Sources/OpenAI/Intramodular/API Client/OpenAI.Client-Image.swift b/Sources/OpenAI/Intramodular/API Client/OpenAI.Client-Image.swift index 23124b55..7a8b3ca1 100644 --- a/Sources/OpenAI/Intramodular/API Client/OpenAI.Client-Image.swift +++ b/Sources/OpenAI/Intramodular/API Client/OpenAI.Client-Image.swift @@ -3,6 +3,7 @@ // import CorePersistence +import SwiftUIX import Swallow extension OpenAI.Client { @@ -40,4 +41,18 @@ extension OpenAI.Client { return response } + + public func createImageEdit( + image: Data, + prompt: String + ) async throws -> OpenAI.List { + let requestBody = OpenAI.APISpecification.RequestBodies.CreateImageEdit( + image: image, + prompt: prompt + ) + + let response = try await run(\.createImageEdit, with: requestBody) + + return response + } } diff --git a/Sources/OpenAI/Intramodular/API Specification/OpenAI.APISpecification.RequestBodies.swift b/Sources/OpenAI/Intramodular/API Specification/OpenAI.APISpecification.RequestBodies.swift index f0e988bf..b925fe3f 100644 --- a/Sources/OpenAI/Intramodular/API Specification/OpenAI.APISpecification.RequestBodies.swift +++ b/Sources/OpenAI/Intramodular/API Specification/OpenAI.APISpecification.RequestBodies.swift @@ -12,7 +12,7 @@ extension OpenAI.APISpecification { } extension OpenAI.APISpecification.RequestBodies { - struct CreateCompletion: Codable, Hashable { + public struct CreateCompletion: Codable, Hashable { let prompt: Either let model: OpenAI.Model let suffix: String? @@ -620,6 +620,120 @@ extension OpenAI.APISpecification.RequestBodies { } } +extension OpenAI.APISpecification.RequestBodies { + struct CreateImageEdit: Codable, HTTPRequest.Multipart.ContentConvertible { + enum CodingKeys: String, CodingKey { + case image + case prompt + case mask + case model + case numberOfImages = "n" + case size + case responseFormat = "response_format" + case user + } + + let image: Data + let prompt: String + let mask: Data? + let model: OpenAI.Model.DALL_E + let numberOfImages: Int + let size: OpenAI.Image.Size + let responseFormat: OpenAI.Client.ImageResponseFormat + let user: String? + + init( + image: Data, + prompt: String, + mask: Data? = nil, + model: OpenAI.Model.DALL_E = .dalle2, + numberOfImages: Int = 1, + size: OpenAI.Image.Size = .w1024h1024, + responseFormat: OpenAI.Client.ImageResponseFormat = .ephemeralURL, + user: String? = nil + ) { + self.image = image + self.prompt = prompt + self.mask = mask + self.model = model + self.numberOfImages = numberOfImages + self.size = size + self.responseFormat = responseFormat + self.user = user + } + + func __conversion() -> HTTPRequest.Multipart.Content { + var result = HTTPRequest.Multipart.Content() + + result.append( + .file( + named: "image", + data: image, + filename: "image.png", + contentType: .custom("image/png") + ) + ) + + result.append( + .text( + named: "prompt", + value: prompt + ) + ) + + if let mask = mask { + result.append( + .file( + named: "mask", + data: mask, + filename: "mask.png", + contentType: .custom("image/png") + ) + ) + } + + result.append( + .text( + named: "model", + value: model.rawValue + ) + ) + + result.append( + .text( + named: "n", + value: String(numberOfImages) + ) + ) + + result.append( + .text( + named: "size", + value: size.rawValue + ) + ) + + result.append( + .text( + named: "response_format", + value: responseFormat.rawValue + ) + ) + + if let user = user { + result.append( + .text( + named: "user", + value: user + ) + ) + } + + return result + } + } +} + extension OpenAI.APISpecification.RequestBodies { struct CreateVectorStore: Codable { enum CodingKeys: String, CodingKey { @@ -793,7 +907,7 @@ extension OpenAI.APISpecification.RequestBodies { /// The seed controls the reproducibility of the job. Passing in the same seed and job parameters should produce the same results, but may differ in rare cases. If a seed is not specified, one will be generated for you. let seed: Int? } - + } extension OpenAI.APISpecification.RequestBodies { diff --git a/Sources/OpenAI/Intramodular/API Specification/OpenAI.APISpecification.ResponseBodies.swift b/Sources/OpenAI/Intramodular/API Specification/OpenAI.APISpecification.ResponseBodies.swift index 640f4114..5ef07087 100644 --- a/Sources/OpenAI/Intramodular/API Specification/OpenAI.APISpecification.ResponseBodies.swift +++ b/Sources/OpenAI/Intramodular/API Specification/OpenAI.APISpecification.ResponseBodies.swift @@ -42,7 +42,7 @@ extension OpenAI.APISpecification.ResponseBodies { } } - struct CreateChatCompletion: Codable, Hashable, Sendable { + public struct CreateChatCompletion: Codable, Hashable, Sendable { public let message: OpenAI.ChatMessage } diff --git a/Sources/OpenAI/Intramodular/API Specification/OpenAI.APISpecification.swift b/Sources/OpenAI/Intramodular/API Specification/OpenAI.APISpecification.swift index 022f7506..3e62f991 100644 --- a/Sources/OpenAI/Intramodular/API Specification/OpenAI.APISpecification.swift +++ b/Sources/OpenAI/Intramodular/API Specification/OpenAI.APISpecification.swift @@ -203,6 +203,11 @@ extension OpenAI { @Body(json: .input, keyEncodingStrategy: .convertToSnakeCase) var createImage = Endpoint, Void>() + @POST + @Path("/v1/images/edits") + @Body(multipart: .input) + var createImageEdit = Endpoint, Void>() + // Vector Store @Header(["OpenAI-Beta": "assistants=v2"]) @POST diff --git a/Sources/OpenAI/Intramodular/Models/OpenAI.Model.swift b/Sources/OpenAI/Intramodular/Models/OpenAI.Model.swift index 23505a92..e50eeff4 100644 --- a/Sources/OpenAI/Intramodular/Models/OpenAI.Model.swift +++ b/Sources/OpenAI/Intramodular/Models/OpenAI.Model.swift @@ -405,6 +405,7 @@ extension OpenAI.Model { } case dalle3 = "dall-e-3" + case dalle2 = "dall-e-2" public var contextSize: Int? { 4000 @@ -414,6 +415,8 @@ extension OpenAI.Model { switch self { case .dalle3: "dall-e-3" + case .dalle2: + "dall-e-2" } } } diff --git a/Sources/Perplexity/Intramodular/Perplexity+LLMRequestHandling.swift b/Sources/Perplexity/Intramodular/Perplexity+LLMRequestHandling.swift index 7ecf63ba..2aa90bfd 100644 --- a/Sources/Perplexity/Intramodular/Perplexity+LLMRequestHandling.swift +++ b/Sources/Perplexity/Intramodular/Perplexity+LLMRequestHandling.swift @@ -70,7 +70,6 @@ extension Perplexity.Client: LLMRequestHandling { topP: parameters.temperatureOrTopP?.topProbabilityMass, topK: nil, maxTokens: parameters.tokenLimit?.fixedValue, - returnCitations: nil, returnImages: nil, stream: false, presencePenalty: nil, diff --git a/Sources/Perplexity/Intramodular/Perplexity.APISpecification/Perplexity.APISpecification.RequestBodies.swift b/Sources/Perplexity/Intramodular/Perplexity.APISpecification/Perplexity.APISpecification.RequestBodies.swift index e8104870..2f689abc 100644 --- a/Sources/Perplexity/Intramodular/Perplexity.APISpecification/Perplexity.APISpecification.RequestBodies.swift +++ b/Sources/Perplexity/Intramodular/Perplexity.APISpecification/Perplexity.APISpecification.RequestBodies.swift @@ -25,12 +25,18 @@ extension Perplexity.APISpecification.RequestBodies { /// The maximum number of completion tokens returned by the API. The total number of tokens requested in max_tokens plus the number of prompt tokens sent in messages must not exceed the context window token limit of model requested. If left unspecified, then the model will generate tokens until either it reaches its stop token or the end of its context window. public var maxTokens: Int? - /// Determines whether or not a request to an online model should return citations. Citations are in closed beta access. To gain access, apply at https://perplexity.typeform.com/to/j50rnNiB - public var returnCitations: Bool? + /// Given a list of domains, limit the citations used by the online model to URLs from the specified domains. Currently limited to only 3 domains for whitelisting and blacklisting. + public var searchDomainFilter: [String]? - /// Determines whether or not a request to an online model should return images. Images are in closed beta access. To gain access, apply at https://perplexity.typeform.com/to/j50rnNiB + /// Determines whether or not a request to an online model should return images. Images are in closed beta. public var returnImages: Bool? + /// Determines whether or not a request to an online model should return related questions. Related questions are in closed beta. + public var returnRelatedQuestions: Bool? + + /// Returns search results within the specified time interval - does not apply to images. Values include `month`, `week`, `day`, `hour`. + public var searchRecencyFilter: String? + /// Determines whether or not to incrementally stream the response with server-sent events with content-type: text/event-stream. public var stream: Bool? @@ -39,5 +45,35 @@ extension Perplexity.APISpecification.RequestBodies { /// A multiplicative penalty greater than 0. Values greater than 1.0 penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. A value of 1.0 means no penalty. Incompatible with presence_penalty. public var frequencyPenalty: Double? + + public init( + model: Perplexity.Model, + messages: [Perplexity.ChatMessage], + temperature: Double? = nil, + topP: Double? = nil, + topK: Int? = nil, + maxTokens: Int? = nil, + searchDomainFilter: [String]? = nil, + returnImages: Bool? = nil, + returnRelatedQuestions: Bool? = nil, + searchRecencyFilter: String? = nil, + stream: Bool? = nil, + presencePenalty: Double? = nil, + frequencyPenalty: Double? = nil + ) { + self.model = model + self.messages = messages + self.temperature = temperature + self.topP = topP + self.topK = topK + self.maxTokens = maxTokens + self.searchDomainFilter = searchDomainFilter + self.returnImages = returnImages + self.returnRelatedQuestions = returnRelatedQuestions + self.searchRecencyFilter = searchRecencyFilter + self.stream = stream + self.presencePenalty = presencePenalty + self.frequencyPenalty = frequencyPenalty + } } } diff --git a/Sources/Perplexity/Intramodular/Perplexity.APISpecification/Perplexity.APISpecification.ResponseBodies.swift b/Sources/Perplexity/Intramodular/Perplexity.APISpecification/Perplexity.APISpecification.ResponseBodies.swift index c96e4581..566d3145 100644 --- a/Sources/Perplexity/Intramodular/Perplexity.APISpecification/Perplexity.APISpecification.ResponseBodies.swift +++ b/Sources/Perplexity/Intramodular/Perplexity.APISpecification/Perplexity.APISpecification.ResponseBodies.swift @@ -36,6 +36,7 @@ extension Perplexity.APISpecification.ResponseBodies { public var object: String public var created: Date public var model: Perplexity.Model + public var citations: [String] public var choices: [Choice] public let usage: Usage } diff --git a/Sources/PlayHT/Intramodular/API/PlayHT.APISpecification.RequestBodies.swift b/Sources/PlayHT/Intramodular/API/PlayHT.APISpecification.RequestBodies.swift index 79ddf26e..563b91c1 100644 --- a/Sources/PlayHT/Intramodular/API/PlayHT.APISpecification.RequestBodies.swift +++ b/Sources/PlayHT/Intramodular/API/PlayHT.APISpecification.RequestBodies.swift @@ -15,7 +15,7 @@ extension PlayHT.APISpecification { public let text: String public let voice: String public let voiceEngine: PlayHT.Model - public let quality: String +// public let quality: String public let outputFormat: String // public let speed: Double? @@ -26,10 +26,11 @@ extension PlayHT.APISpecification { // public let voiceGuidance: Double? // public let styleGuidance: Double? // public let textGuidance: Double? - // public let language: String? + public let language: String? // private enum CodingKeys: String, CodingKey { - case text, voice, quality + case text, voice +// case quality case voiceEngine = "voice_engine" case outputFormat = "output_format" // case speed @@ -38,15 +39,15 @@ extension PlayHT.APISpecification { // case voiceGuidance = "voice_guidance" // case styleGuidance = "style_guidance" // case textGuidance = "text_guidance" - // case language + case language } public init( text: String, voice: String, voiceEngine: PlayHT.Model = .playHT2, - quality: String = "medium", - outputFormat: String = "mp3" +// quality: String = "medium", + outputFormat: String = "mp3", // speed: Double? = nil, // sampleRate: Int? = 48000, // seed: Int? = nil, @@ -55,12 +56,12 @@ extension PlayHT.APISpecification { // voiceGuidance: Double? = nil, // styleGuidance: Double? = nil, // textGuidance: Double? = nil, - // language: String? = nil + language: String? = nil ) { self.text = text self.voice = voice self.voiceEngine = voiceEngine - self.quality = quality +// self.quality = quality self.outputFormat = outputFormat // self.speed = speed // self.sampleRate = sampleRate @@ -70,7 +71,7 @@ extension PlayHT.APISpecification { // self.voiceGuidance = voiceGuidance // self.styleGuidance = styleGuidance // self.textGuidance = textGuidance - // self.language = language + self.language = language } } diff --git a/Sources/PlayHT/Intramodular/API/PlayHT.APISpecification.swift b/Sources/PlayHT/Intramodular/API/PlayHT.APISpecification.swift index 525ef06d..bbe1e997 100644 --- a/Sources/PlayHT/Intramodular/API/PlayHT.APISpecification.swift +++ b/Sources/PlayHT/Intramodular/API/PlayHT.APISpecification.swift @@ -71,7 +71,7 @@ extension PlayHT { @POST @Path("/tts/stream") @Body(json: \.input, keyEncodingStrategy: .convertToSnakeCase) - var streamTextToSpeech = Endpoint() + var streamTextToSpeech = Endpoint() @GET @Path("/cloned-voices") @@ -107,10 +107,10 @@ extension PlayHT.APISpecification { from: input, context: context ) - + request = request .header("X-USER-ID", context.root.configuration.userId) - .header("accept", "application/json") + .header(.accept(.mpeg)) .header("AUTHORIZATION", context.root.configuration.apiKey) .header(.contentType(.json)) @@ -122,6 +122,7 @@ extension PlayHT.APISpecification { context: DecodeOutputContext ) throws -> Output { do { + dump(response) try response.validate() } catch { let apiError: Error diff --git a/Sources/PlayHT/Intramodular/Models/PlayHT.Voice.swift b/Sources/PlayHT/Intramodular/Models/PlayHT.Voice.swift index 3ac8907f..7074fa2e 100644 --- a/Sources/PlayHT/Intramodular/Models/PlayHT.Voice.swift +++ b/Sources/PlayHT/Intramodular/Models/PlayHT.Voice.swift @@ -7,6 +7,7 @@ import Foundation import Swallow +import LargeLanguageModels extension PlayHT { public struct Voice: Codable, Hashable, Identifiable { @@ -16,7 +17,7 @@ extension PlayHT { public let name: String public let language: String? public let languageCode: String? - public let voiceEngine: String + public let voiceEngine: String? public let isCloned: Bool? public let gender: String? public let accent: String? @@ -26,11 +27,65 @@ extension PlayHT { public let texture: String? public let loudness: String? public let tempo: String? + + + init( + id: ID, + name: String, + language: String? = nil, + languageCode: String? = nil, + voiceEngine: String? = nil, + isCloned: Bool? = nil, + gender: String? = nil, + accent: String? = nil, + age: String? = nil, + style: String? = nil, + sample: String? = nil, + texture: String? = nil, + loudness: String? = nil, + tempo: String? = nil + ) { + self.id = id + self.name = name + self.language = language + self.languageCode = languageCode + self.voiceEngine = voiceEngine + self.isCloned = isCloned + self.gender = gender + self.accent = accent + self.age = age + self.style = style + self.sample = sample + self.texture = texture + self.loudness = loudness + self.tempo = tempo + } private enum CodingKeys: String, CodingKey { case id, name, language, languageCode, voiceEngine, isCloned case gender, accent, age, style, sample, texture, loudness, tempo } + + public init( + id: String, + name: String, + language: String + ) { + self.id = .init(rawValue: id) + self.name = name + self.language = language + self.languageCode = nil + self.voiceEngine = "" + self.isCloned = nil + self.gender = nil + self.accent = nil + self.age = nil + self.style = nil + self.sample = nil + self.texture = nil + self.loudness = nil + self.tempo = nil + } // Add custom decoding if needed to handle any special cases public init(from decoder: Decoder) throws { @@ -72,3 +127,24 @@ extension PlayHT { case flac = "flac" } } + +// MARK: - Conformances + +extension PlayHT.Voice: AbstractVoiceConvertible { + public func __conversion() throws -> AbstractVoice { + return AbstractVoice( + voiceID: self.id.rawValue, + name: self.name, + description: nil + ) + } +} + +extension PlayHT.Voice: AbstractVoiceInitiable { + public init(voice: AbstractVoice) throws { + self.init( + id: .init(rawValue: voice.id.rawValue), + name: voice.name + ) + } +} diff --git a/Sources/PlayHT/Intramodular/Models/PlayHT.VoiceSettings.swift b/Sources/PlayHT/Intramodular/Models/PlayHT.VoiceSettings.swift index ea90cdeb..e676b451 100644 --- a/Sources/PlayHT/Intramodular/Models/PlayHT.VoiceSettings.swift +++ b/Sources/PlayHT/Intramodular/Models/PlayHT.VoiceSettings.swift @@ -5,6 +5,8 @@ // Created by Jared Davidson on 11/20/24. // +import Foundation + extension PlayHT { public struct VoiceSettings: Codable, Hashable { public var speed: Double diff --git a/Sources/PlayHT/Intramodular/PlayHT.Client+SpeechSynthesisRequestHandling.swift b/Sources/PlayHT/Intramodular/PlayHT.Client+SpeechSynthesisRequestHandling.swift new file mode 100644 index 00000000..5ad471cd --- /dev/null +++ b/Sources/PlayHT/Intramodular/PlayHT.Client+SpeechSynthesisRequestHandling.swift @@ -0,0 +1,73 @@ +// +// PlayHT+SpeechSynthesisRequestHandling.swift +// Voice +// +// Created by Jared Davidson on 11/20/24. +// + +import Foundation +import SwiftUI +import AVFoundation +import LargeLanguageModels + +extension PlayHT.Client: SpeechSynthesisRequestHandling { + public func availableVoices() async throws -> [AbstractVoice] { + let voices: [AbstractVoice] = try await getAllAvailableVoices().map { try $0.__conversion() } + return voices + } + + public func speech( + for text: String, + voiceID: String, + voiceSettings: AbstractVoiceSettings, + model: String + ) async throws -> Data { + let data: Data = try await streamTextToSpeech( + text: text, + voice: voiceID, + settings: .init(), + model: .playHT2Turbo + ) + + return data + } + + public func speechToSpeech( + inputAudioURL: URL, + voiceID: String, + voiceSettings: LargeLanguageModels.AbstractVoiceSettings, + model: String + ) async throws -> Data { + throw PlayHT.APIError.unknown(message: "Speech to speech not supported") + } + + public func upload( + voiceWithName name: String, + description: String, + fileURL: URL + ) async throws -> AbstractVoice.ID { + let mp4URL = try await fileURL.convertAudioToMP4() + let fileURLString = mp4URL.absoluteString + let voiceID = try await instantCloneVoice( + sampleFileURL: fileURLString, + name: name + ) + + try? FileManager.default.removeItem(at: mp4URL) + + return .init(rawValue: voiceID.rawValue) + } + + public func edit( + voice: LargeLanguageModels.AbstractVoice.ID, + name: String, + description: String, + fileURL: URL? + ) async throws -> Bool { + throw PlayHT.APIError.unknown(message: "Voice editing not supported") + } + + public func delete(voice: LargeLanguageModels.AbstractVoice.ID) async throws { + try await deleteClonedVoice(voice: .init(rawValue: voice.rawValue)) + } +} diff --git a/Sources/PlayHT/Intramodular/PlayHT.Client.swift b/Sources/PlayHT/Intramodular/PlayHT.Client.swift index 66e6e80f..ac882f20 100644 --- a/Sources/PlayHT/Intramodular/PlayHT.Client.swift +++ b/Sources/PlayHT/Intramodular/PlayHT.Client.swift @@ -57,16 +57,16 @@ extension PlayHT.Client: CoreMI._ServiceClientProtocol { } extension PlayHT.Client { - public func getAllAvailableVoices() async throws -> [PlayHT.Voice] { - async let htVoices = availableVoices() - async let clonedVoices = clonedVoices() + async let htVoices = self.getAvailableVoices() + async let clonedVoices = self.clonedVoices() - let (available, cloned) = try await (htVoices, clonedVoices) - return available + cloned + let (_, cloned) = try await (htVoices, clonedVoices) + + return cloned } - public func availableVoices() async throws -> [PlayHT.Voice] { + public func getAvailableVoices() async throws -> [PlayHT.Voice] { try await run(\.listVoices).voices } @@ -86,33 +86,13 @@ extension PlayHT.Client { text: text, voice: voice, voiceEngine: model, - quality: outputSettings.quality.rawValue, +// quality: outputSettings.quality.rawValue, outputFormat: outputSettings.format.rawValue ) let responseData = try await run(\.streamTextToSpeech, with: input) - - guard let url = URL(string: responseData.href) else { - throw PlayHTError.invalidURL - } - - var request = URLRequest(url: url) - request.httpMethod = "GET" - request.addValue("application/json", forHTTPHeaderField: "Content-Type") - request.addValue(interface.configuration.userId ?? "", forHTTPHeaderField: "X-USER-ID") - request.addValue(interface.configuration.apiKey ?? "", forHTTPHeaderField: "AUTHORIZATION") - - let (audioData, response) = try await URLSession.shared.data(for: request) - - guard let httpResponse = response as? HTTPURLResponse, httpResponse.statusCode == 200 else { - throw PlayHTError.audioFetchFailed - } - - guard !audioData.isEmpty else { - throw PlayHTError.audioFetchFailed - } - - return audioData + + return responseData } diff --git a/Sources/PlayHT/Intramodular/PlayHT.Model.swift b/Sources/PlayHT/Intramodular/PlayHT.Model.swift index 9eeb0a28..20579363 100644 --- a/Sources/PlayHT/Intramodular/PlayHT.Model.swift +++ b/Sources/PlayHT/Intramodular/PlayHT.Model.swift @@ -18,6 +18,10 @@ extension PlayHT { case playHT1 = "PlayHT1.0" case playHT2Turbo = "PlayHT2.0-turbo" + + case play3_0Mini = "Play3.0-mini" + + case playDialog = "PlayDialog" } } diff --git a/Sources/PlayHT/Intramodular/URL++.swift b/Sources/PlayHT/Intramodular/URL++.swift new file mode 100644 index 00000000..622b5933 --- /dev/null +++ b/Sources/PlayHT/Intramodular/URL++.swift @@ -0,0 +1,78 @@ +// +// URL++.swift +// AI +// +// Created by Jared Davidson on 1/14/25. +// + +import AVFoundation +import AudioToolbox + +// FIXME: - This needs to be moved somewhere else (@archetapp) + +extension URL { + func convertAudioToMP4() async throws -> URL { + let outputURL = FileManager.default.temporaryDirectory + .appendingPathComponent(UUID().uuidString) + .appendingPathExtension("mp4") + + let asset = AVURLAsset(url: self) + + let composition = AVMutableComposition() + guard let compositionTrack = composition.addMutableTrack( + withMediaType: .audio, + preferredTrackID: kCMPersistentTrackID_Invalid + ) else { + throw NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Could not create composition track"]) + } + + guard let audioTrack = try await asset.loadTracks(withMediaType: .audio).first else { + throw NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "No audio track found"]) + } + + let timeRange = CMTimeRange(start: .zero, duration: try await asset.load(.duration)) + for i in 0..<4 { + try compositionTrack.insertTimeRange( + timeRange, + of: audioTrack, + at: CMTime(seconds: Double(i) * timeRange.duration.seconds, preferredTimescale: 600) + ) + } + + guard let exportSession = AVAssetExportSession( + asset: composition, + presetName: AVAssetExportPresetPassthrough + ) else { + throw NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Could not create export session"]) + } + + exportSession.outputURL = outputURL + exportSession.outputFileType = AVFileType.mp4 + exportSession.shouldOptimizeForNetworkUse = true + + // Create a tuple of values we need to check after export + try await withCheckedThrowingContinuation { continuation in + exportSession.exportAsynchronously { + Task { @MainActor in + switch exportSession.status { + case .completed: + continuation.resume() + case .failed: + continuation.resume(throwing: exportSession.error ?? NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Export failed"])) + case .cancelled: + continuation.resume(throwing: NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Export cancelled"])) + default: + continuation.resume(throwing: NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Unknown export error"])) + } + } + } + } + + let fileSize = try FileManager.default.attributesOfItem(atPath: outputURL.path)[.size] as? Int ?? 0 + if fileSize < 5000 { // 5KB minimum + throw NSError(domain: "AudioConversion", code: -1, userInfo: [NSLocalizedDescriptionKey: "Converted file too small"]) + } + + return outputURL + } +} diff --git a/Sources/Rime/Intramodular/Models/Rime.Voice.swift b/Sources/Rime/Intramodular/Models/Rime.Voice.swift index 1a341b41..459a19ba 100644 --- a/Sources/Rime/Intramodular/Models/Rime.Voice.swift +++ b/Sources/Rime/Intramodular/Models/Rime.Voice.swift @@ -6,10 +6,32 @@ // import Foundation +import CorePersistence import Swallow +import LargeLanguageModels extension Rime { public struct Voice: Hashable { + public typealias ID = _TypeAssociatedID + + public init( + name: String, + age: String?, + country: String?, + region: String?, + demographic: String?, + genre: [String]? + ) { + self.id = .init(rawValue: UUID().uuidString) + self.name = name + self.age = age + self.country = country + self.region = region + self.demographic = demographic + self.genre = genre + } + + public let id: ID public let name: String public let age: String? public let country: String? @@ -42,5 +64,30 @@ extension Rime.Voice: Codable { self.region = try container.decodeIfPresent(String.self, forKey: Rime.Voice.CodingKeys.region) self.demographic = try container.decodeIfPresent(String.self, forKey: Rime.Voice.CodingKeys.demographic) self.genre = try container.decodeIfPresent([String].self, forKey: Rime.Voice.CodingKeys.genre) + + self.id = .init(rawValue: UUID().uuidString) + } +} + +extension Rime.Voice: AbstractVoiceInitiable { + public init(voice: AbstractVoice) throws { + self.init( + name: voice.name, + age: nil, + country: nil, + region: nil, + demographic: nil, + genre: nil + ) + } +} + +extension Rime.Voice: AbstractVoiceConvertible { + public func __conversion() throws -> AbstractVoice { + return AbstractVoice( + voiceID: self.id.rawValue, + name: self.name, + description: nil + ) } } diff --git a/Sources/Rime/Intramodular/Rime.Client+SpeechSynthesisRequestHandling.swift b/Sources/Rime/Intramodular/Rime.Client+SpeechSynthesisRequestHandling.swift new file mode 100644 index 00000000..f4bd5023 --- /dev/null +++ b/Sources/Rime/Intramodular/Rime.Client+SpeechSynthesisRequestHandling.swift @@ -0,0 +1,42 @@ +// +// Rime+SpeechSynthesisRequestHandling.swift +// Voice +// +// Created by Jared Davidson on 11/21/24. +// + +import LargeLanguageModels +import Foundation +import SwiftUI +import AVFoundation + +extension Rime.Client: SpeechSynthesisRequestHandling { + public func availableVoices() async throws -> [AbstractVoice] { + return try await getAllAvailableVoiceDetails().map { try $0.__conversion() } + } + + public func speech(for text: String, voiceID: String, voiceSettings: AbstractVoiceSettings, model: String) async throws -> Data { + return try await streamTextToSpeech( + text: text, + voice: voiceID, + outputAudio: .MP3, + model: .mist + ) + } + + public func speechToSpeech(inputAudioURL: URL, voiceID: String, voiceSettings: AbstractVoiceSettings, model: String) async throws -> Data { + throw Rime.APIError.unknown(message: "Speech to speech not supported") + } + + public func upload(voiceWithName name: String, description: String, fileURL: URL) async throws -> AbstractVoice.ID { + throw Rime.APIError.unknown(message: "Voice creation is not supported") + } + + public func edit(voice: AbstractVoice.ID, name: String, description: String, fileURL: URL?) async throws -> Bool { + throw Rime.APIError.unknown(message: "Voice creation is not supported") + } + + public func delete(voice: AbstractVoice.ID) async throws { + throw Rime.APIError.unknown(message: "Voice creation is not supported") + } +} diff --git a/Sources/TogetherAI/Intramodular/TogetherAI.APISpecification.swift b/Sources/TogetherAI/Intramodular/TogetherAI.APISpecification.swift index 07b41904..0416b7a8 100644 --- a/Sources/TogetherAI/Intramodular/TogetherAI.APISpecification.swift +++ b/Sources/TogetherAI/Intramodular/TogetherAI.APISpecification.swift @@ -41,6 +41,10 @@ extension TogetherAI { @POST @Path("embeddings") public var createEmbeddings = Endpoint() + + @POST + @Path("completions") + public var createCompletion = Endpoint() } } @@ -122,7 +126,93 @@ extension TogetherAI.APISpecification { extension TogetherAI.APISpecification.RequestBodies { public struct CreateEmbedding: Codable, Hashable { - public let model: TogetherAI.Model - public let input: String + public let model: TogetherAI.Model.Embedding + public let input: [String] + } + + public struct CreateCompletion: Codable, Hashable { + + private enum CodingKeys: String, CodingKey { + case model + case prompt + case maxTokens = "max_tokens" + case stream + case stop + case temperature + case topP = "top_p" + case topK = "top_k" + case repetitionPenalty = "repetition_penalty" + case logprobs + case echo + case choices = "n" + case safetyModel = "safety_model" + } + + public let model: TogetherAI.Model.Completion + public let prompt: String + + // The maximum number of tokens to generate. + // Defaults to 200 + public let maxTokens: Int? + + // If true, stream tokens as Server-Sent Events as the model generates them instead of waiting for the full model response. If false, return a single JSON object containing the results. + public let stream: Bool? + + // A list of string sequences that will truncate (stop) inference text output. For example, "" will stop generation as soon as the model generates the given token. + public let stop: [String]? + + // A decimal number that determines the degree of randomness in the response. A value of 1 will always yield the same output. A temperature less than 1 favors more correctness and is appropriate for question answering or summarization. A value greater than 1 introduces more randomness in the output. + public let temperature: Double? + + // The top_p (nucleus) parameter is used to dynamically adjust the number of choices for each predicted token based on the cumulative probabilities. It specifies a probability threshold, below which all less likely tokens are filtered out. This technique helps to maintain diversity and generate more fluent and natural-sounding text. + public let topP: Double? + + // The top_k parameter is used to limit the number of choices for the next predicted word or token. It specifies the maximum number of tokens to consider at each step, based on their probability of occurrence. This technique helps to speed up the generation process and can improve the quality of the generated text by focusing on the most likely options. + public let topK: Double? + + // A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition. + public let repetitionPenalty: Double? + + // Number of top-k logprobs to return + public let logprobs: Int? + + // Echo prompt in output. Can be used with logprobs to return prompt logprobs. + public let echo: Bool? + + // How many completions to generate for each prompt + public let choices: Int? + + // A moderation model to validate tokens. Choice between available moderation models found here: https://docs.together.ai/docs/inference-models#moderation-models + public let safetyModel: String? + + public init( + model: TogetherAI.Model.Completion, + prompt: String, + maxTokens: Int?, + stream: Bool? = nil, + stop: [String]? = nil, + temperature: Double? = nil, + topP: Double? = nil, + topK: Double? = nil, + repetitionPenalty: Double? = nil, + logprobs: Int? = nil, + echo: Bool? = nil, + choices: Int? = nil, + safetyModel: String? = nil + ) { + self.model = model + self.prompt = prompt + self.maxTokens = maxTokens ?? 200 + self.stream = stream + self.stop = stop + self.temperature = temperature + self.topP = topP + self.topK = topK + self.repetitionPenalty = repetitionPenalty + self.logprobs = logprobs + self.echo = echo + self.choices = choices + self.safetyModel = safetyModel + } } } diff --git a/Sources/TogetherAI/Intramodular/TogetherAI.Client+LLMRequestHandling.swift b/Sources/TogetherAI/Intramodular/TogetherAI.Client+LLMRequestHandling.swift new file mode 100644 index 00000000..27526ffa --- /dev/null +++ b/Sources/TogetherAI/Intramodular/TogetherAI.Client+LLMRequestHandling.swift @@ -0,0 +1,128 @@ +// +// Copyright (c) Vatsal Manot +// + +import CoreMI +import CorePersistence +import Diagnostics +@_spi(Internal) import LargeLanguageModels +import Merge +import Swallow + +extension TogetherAI.Client: _TaskDependenciesExporting { + public var _exportedTaskDependencies: TaskDependencies { + var result = TaskDependencies() + + result[\.llm] = self + result[\.embedding] = self + + return result + } +} + +extension TogetherAI.Client: LLMRequestHandling { + private var _debugPrintCompletions: Bool { + false + } + + public var _availableModels: [ModelIdentifier]? { + return TogetherAI.Model.allCases.map({ $0.__conversion() }) + } + + public func complete( + prompt: Prompt, + parameters: Prompt.CompletionParameters + ) async throws -> Prompt.Completion { + let _completion: Any + + switch prompt { + case let prompt as AbstractLLM.TextPrompt: + _completion = try await _complete( + prompt: prompt, + parameters: try cast(parameters) + ) + default: + throw LLMRequestHandlingError.unsupportedPromptType(Prompt.self) + } + + return try cast(_completion) + } + + private func _complete( + prompt: AbstractLLM.TextPrompt, + parameters: AbstractLLM.TextCompletionParameters + ) async throws -> AbstractLLM.TextCompletion { + let parameters = try cast(parameters, to: AbstractLLM.TextCompletionParameters.self) + + let model = TogetherAI.Model.Completion.mixtral8x7b + + let promptText = try prompt.prefix.promptLiteral + let completion = try await + self.createCompletion( + for: model, + prompt: promptText._stripToText(), + maxTokens: parameters.tokenLimit.fixedValue, + stop: parameters.stops, + temperature: parameters.temperatureOrTopP?.temperature, + topP: parameters.temperatureOrTopP?.topProbabilityMass + ) + + let text = try completion.choices.toCollectionOfOne().first.text + + _debugPrint( + prompt: prompt.debugDescription + .delimited(by: .quotationMark) + .delimited(by: "\n") + , + completion: text + .delimited(by: .quotationMark) + .delimited(by: "\n") + ) + + + return .init(prefix: promptText, text: text) + } +} + +extension TogetherAI.Client { + private func _debugPrint(prompt: String, completion: String) { + guard _debugPrintCompletions else { + return + } + + guard _isDebugAssertConfiguration else { + return + } + + let description = String.concatenate(separator: "\n") { + "=== [PROMPT START] ===" + prompt.debugDescription + .delimited(by: .quotationMark) + .delimited(by: "\n") + "==== [COMPLETION] ====" + completion + .delimited(by: .quotationMark) + .delimited(by: "\n") + "==== [PROMPT END] ====" + } + + print(description) + } +} + +// MARK: - Auxiliary + +extension ModelIdentifier { + + public init( + from model: TogetherAI.Model.Completion + ) { + self.init(provider: .togetherAI, name: model.rawValue, revision: nil) + } + + public init( + from model: TogetherAI.Model.Embedding + ) { + self.init(provider: .togetherAI, name: model.rawValue, revision: nil) + } +} diff --git a/Sources/TogetherAI/Intramodular/TogetherAI.Client+TextEmbeddingsRequestHandling.swift b/Sources/TogetherAI/Intramodular/TogetherAI.Client+TextEmbeddingsRequestHandling.swift new file mode 100644 index 00000000..e7c6e816 --- /dev/null +++ b/Sources/TogetherAI/Intramodular/TogetherAI.Client+TextEmbeddingsRequestHandling.swift @@ -0,0 +1,40 @@ +// +// Copyright (c) Vatsal Manot +// + +import CoreMI +import CorePersistence + +extension TogetherAI.Client: TextEmbeddingsRequestHandling { + public func fulfill( + _ request: TextEmbeddingsRequest + ) async throws -> TextEmbeddings { + guard !request.input.isEmpty else { + return TextEmbeddings( + model: .init(from: TogetherAI.Model.Embedding.togetherM2Bert80M2KRetrieval), + data: [] + ) + } + + let model: ModelIdentifier = request.model ?? ModelIdentifier(from: TogetherAI.Model.Embedding.togetherM2Bert80M2KRetrieval) + let embeddingModel = try TogetherAI.Model.Embedding(rawValue: model.name).unwrap() + + let embeddings = try await createEmbeddings( + for: embeddingModel, + input: request.input + ).data + + try _tryAssert(request.input.count == embeddings.count) + + return TextEmbeddings( + model: .init(from: TogetherAI.Model.Embedding.togetherM2Bert80M2KRetrieval), + data: request.input.zip(embeddings).map { + TextEmbeddings.Element( + text: $0, + embedding: $1.embedding, + model: model + ) + } + ) + } +} diff --git a/Sources/TogetherAI/Intramodular/TogetherAI.Client.swift b/Sources/TogetherAI/Intramodular/TogetherAI.Client.swift index 6601259f..5948d898 100644 --- a/Sources/TogetherAI/Intramodular/TogetherAI.Client.swift +++ b/Sources/TogetherAI/Intramodular/TogetherAI.Client.swift @@ -53,8 +53,8 @@ extension TogetherAI.Client: CoreMI._ServiceClientProtocol { extension TogetherAI.Client { public func createEmbeddings( - for model: TogetherAI.Model, - input: String + for model: TogetherAI.Model.Embedding, + input: [String] ) async throws -> TogetherAI.Embeddings { try await run( \.createEmbeddings, @@ -64,4 +64,52 @@ extension TogetherAI.Client { ) ) } + + public func createEmbeddings( + for model: TogetherAI.Model.Embedding, + input: String + ) async throws -> TogetherAI.Embeddings { + try await run( + \.createEmbeddings, + with: .init( + model: model, + input: [input] + ) + ) + } + + public func createCompletion( + for model: TogetherAI.Model.Completion, + prompt: String, + maxTokens: Int? = nil, + stream: Bool? = nil, + stop: [String]? = nil, + temperature: Double? = nil, + topP: Double? = nil, + topK: Double? = nil, + repetitionPenalty: Double? = nil, + logprobs: Int? = nil, + echo: Bool? = nil, + choices: Int? = nil, + safetyModel: String? = nil + ) async throws -> TogetherAI.Completion { + try await run( + \.createCompletion, + with: .init( + model: model, + prompt: prompt, + maxTokens: maxTokens, + stream: stream, + stop: stop, + temperature: temperature, + topP: topP, + topK: topK, + repetitionPenalty: repetitionPenalty, + logprobs: logprobs, + echo: echo, + choices: choices, + safetyModel: safetyModel + ) + ) + } } diff --git a/Sources/TogetherAI/Intramodular/TogetherAI.Completion.swift b/Sources/TogetherAI/Intramodular/TogetherAI.Completion.swift new file mode 100644 index 00000000..e65c4391 --- /dev/null +++ b/Sources/TogetherAI/Intramodular/TogetherAI.Completion.swift @@ -0,0 +1,41 @@ +// +// Copyright (c) Vatsal Manot +// + +import NetworkKit +import Swift + +extension TogetherAI { + public final class Completion: Codable, Sendable { + private enum CodingKeys: String, CodingKey { + case id + case object + case model + case createdAt = "created" + case choices + case usage + } + + public struct Choice: Codable, Hashable, Sendable { + public let text: String + public let index: Int + public let seed: Double + public let finishReason: String + } + + public struct Usage: Codable, Hashable, Sendable { + public let promptTokens: Int + public let completionTokens: Int + public let totalTokens: Int + } + + + public let id: String + public let model: TogetherAI.Model.Completion + public let object: String + public let createdAt: Date + public let choices: [Choice] + public let usage: Usage + } +} + diff --git a/Sources/TogetherAI/Intramodular/TogetherAI.Embeddings.swift b/Sources/TogetherAI/Intramodular/TogetherAI.Embeddings.swift index a30e8576..02464834 100644 --- a/Sources/TogetherAI/Intramodular/TogetherAI.Embeddings.swift +++ b/Sources/TogetherAI/Intramodular/TogetherAI.Embeddings.swift @@ -15,7 +15,7 @@ extension TogetherAI { extension TogetherAI.Embeddings { public struct EmbeddingData: Codable, Hashable, Sendable { public let object: String - public let embedding: [Float] + public let embedding: [Double] public let index: Int } } diff --git a/Sources/TogetherAI/Intramodular/TogetherAI.Model.swift b/Sources/TogetherAI/Intramodular/TogetherAI.Model.swift index 316cf1d3..e12b71af 100644 --- a/Sources/TogetherAI/Intramodular/TogetherAI.Model.swift +++ b/Sources/TogetherAI/Intramodular/TogetherAI.Model.swift @@ -7,8 +7,61 @@ import CorePersistence import LargeLanguageModels import Swallow +public protocol _TogetherAI_ModelType: Codable, Hashable, RawRepresentable, Sendable where RawValue == String { + var contextSize: Int { get throws } +} + extension TogetherAI { - public enum Model: String, CaseIterable, Codable, Hashable, Named, Sendable { + public typealias _ModelType = _TogetherAI_ModelType +} + +extension TogetherAI { + public enum Model: CaseIterable, _TogetherAI_ModelType, Hashable { + public private(set) static var allCases: [Model] = { + var result: [Model] = [] + + result += Embedding.allCases.map({ Self.embedding($0 )}) + result += Completion.allCases.map({ Self.completion($0) }) + + return result + }() + + case completion(Completion) + case embedding(Embedding) + case unknown(String) + + public var name: String { + if let base = (base as? any Named) { + return base.name.description + } else { + return base.rawValue + } + } + + private var base: any TogetherAI._ModelType { + switch self { + case .completion(let value): + return value + case .embedding(let value): + return value + case .unknown: + assertionFailure(.unimplemented) + + return self + } + } + + public var contextSize: Int { + get throws { + try base.contextSize + } + } + } +} + +extension TogetherAI.Model { + public enum Embedding: String, TogetherAI._ModelType, CaseIterable { + // Together Models case togetherM2Bert80M2KRetrieval = "togethercomputer/m2-bert-80M-2k-retrieval" case togetherM2Bert80M8KRetrieval = "togethercomputer/m2-bert-80M-8k-retrieval" @@ -29,21 +82,21 @@ extension TogetherAI { public var name: String { switch self { - case .togetherM2Bert80M2KRetrieval: + case .togetherM2Bert80M2KRetrieval: return "M2-BERT-80M-2K-Retrieval" - case .togetherM2Bert80M8KRetrieval: + case .togetherM2Bert80M8KRetrieval: return "M2-BERT-80M-8K-Retrieval" - case .togetherM2Bert80M32KRetrieval: + case .togetherM2Bert80M32KRetrieval: return "M2-BERT-80M-32K-Retrieval" - case .whereIsAIUAELargeV1: + case .whereIsAIUAELargeV1: return "UAE-Large-v1" - case .baaiLargeENV15: + case .baaiLargeENV15: return "BGE-Large-EN-v1.5" - case .baaiBaseENV15: + case .baaiBaseENV15: return "BGE-Base-EN-v1.5" - case .sentenceBERT: + case .sentenceBERT: return "Sentence-BERT" - case .googleBERTBaseUncased: + case .googleBERTBaseUncased: return "BERT" } } @@ -72,47 +125,141 @@ extension TogetherAI { } } - public var contextWindow: Int { + public var contextSize: Int { switch self { - case .togetherM2Bert80M2KRetrieval: + case .togetherM2Bert80M2KRetrieval: return 2048 - case .togetherM2Bert80M8KRetrieval: + case .togetherM2Bert80M8KRetrieval: return 8192 - case .togetherM2Bert80M32KRetrieval: + case .togetherM2Bert80M32KRetrieval: return 32768 case .whereIsAIUAELargeV1, .baaiLargeENV15, .baaiBaseENV15, .sentenceBERT, .googleBERTBaseUncased: return 512 } } + + public init?(rawValue: String) { + switch rawValue { + case "togethercomputer/m2-bert-80M-2k-retrieval": + self = .togetherM2Bert80M2KRetrieval + case "togethercomputer/m2-bert-80M-8k-retrieval": + self = .togetherM2Bert80M8KRetrieval + case "togethercomputer/m2-bert-80M-32k-retrieval": + self = .togetherM2Bert80M32KRetrieval + case "WhereIsAI/UAE-Large-V1": + self = .whereIsAIUAELargeV1 + case "BAAI/bge-large-en-v1.5": + self = .baaiLargeENV15 + case "BAAI/bge-base-en-v1.5": + self = .baaiBaseENV15 + case "sentence-transformers/msmarco-bert-base-dot-v5": + self = .sentenceBERT + case "bert-base-uncased": + self = .googleBERTBaseUncased + default: + return nil + } + } + } +} + +extension TogetherAI.Model { + public enum Completion: String, TogetherAI._ModelType, CaseIterable { + + case llama2_70B = "meta-llama/Llama-2-70b-hf" + case mistral7b = "mistralai/Mistral-7B-v0.1" + case mixtral8x7b = "mistralai/Mixtral-8x7B-v0.1" + + public var name: String { + switch self { + case .llama2_70B: + return "LLaMA-2 (70B)" + case .mistral7b: + return "Mistral (7B)" + case .mixtral8x7b: + return "Mixtral-8x7B (46.7B)" + } + } + + public var contextSize: Int { + switch self { + case .llama2_70B: + return 4096 + case .mistral7b: + return 8192 + case .mixtral8x7b: + return 32768 + } + } + + public init?(rawValue: String) { + switch rawValue { + case "meta-llama/Llama-2-70b-hf": + self = .llama2_70B + case "mistralai/Mistral-7B-v0.1": + self = .mistral7b + case "mistralai/Mixtral-8x7B-v0.1": + self = .mixtral8x7b + default: + return nil + } + } } } // MARK: - Conformances -extension TogetherAI.Model: CustomStringConvertible { - public var description: String { - rawValue +extension TogetherAI.Model: Codable { + public init(from decoder: Decoder) throws { + self = try Self(rawValue: try String(from: decoder)).unwrap() + } + + public func encode(to encoder: Encoder) throws { + try rawValue.encode(to: encoder) } } extension TogetherAI.Model: ModelIdentifierRepresentable { - public init(from identifier: ModelIdentifier) throws { - guard identifier.provider == ._TogetherAI, identifier.revision == nil else { - throw Never.Reason.illegal - } - - guard let model = Self(rawValue: identifier.name) else { - throw Never.Reason.unexpected + private enum _DecodingError: Error { + case invalidModelProvider + } + + public init(from model: ModelIdentifier) throws { + guard model.provider == .togetherAI else { + throw _DecodingError.invalidModelProvider } - self = model + self = try Self(rawValue: model.name).unwrap() } public func __conversion() -> ModelIdentifier { ModelIdentifier( - provider: ._TogetherAI, + provider: .togetherAI, name: rawValue, revision: nil ) } } + +extension TogetherAI.Model: RawRepresentable { + public var rawValue: String { + switch self { + case .completion(let model): + return model.rawValue + case .embedding(let model): + return model.rawValue + case .unknown(let rawValue): + return rawValue + } + } + + public init?(rawValue: String) { + if let model = Completion(rawValue: rawValue) { + self = .completion(model) + } else if let model = Embedding(rawValue: rawValue) { + self = .embedding(model) + } else { + self = .unknown(rawValue) + } + } +} diff --git a/Sources/_Gemini/Intramodular/API/_Gemini.APISpecification.RequestBodies.swift b/Sources/_Gemini/Intramodular/API/_Gemini.APISpecification.RequestBodies.swift index 7003aebd..0eaaa91f 100644 --- a/Sources/_Gemini/Intramodular/API/_Gemini.APISpecification.RequestBodies.swift +++ b/Sources/_Gemini/Intramodular/API/_Gemini.APISpecification.RequestBodies.swift @@ -142,10 +142,31 @@ extension _Gemini.APISpecification { } } - public struct FileUploadInput: Codable, HTTPRequest.Multipart.ContentConvertible { + public struct FinalizeFileUploadInput { + public let data: Data + public let uploadUrl: String + public let fileSize: Int + + public init(data: Data, uploadUrl: String, fileSize: Int) { + self.data = data + self.uploadUrl = uploadUrl + self.fileSize = fileSize + } + } + + public struct StartFileUploadInput: Codable { + public struct UploadMetadata: Codable { + let file: FileMetadata + + struct FileMetadata: Codable { + let display_name: String + } + } + public let fileData: Data public let mimeType: String public let displayName: String + public let metadata: UploadMetadata public init( fileData: Data, @@ -155,11 +176,12 @@ extension _Gemini.APISpecification { self.fileData = fileData self.mimeType = mimeType self.displayName = displayName + self.metadata = .init(file: .init(display_name: displayName)) } - + /* public func __conversion() throws -> HTTPRequest.Multipart.Content { var result = HTTPRequest.Multipart.Content() - + // TODO: - Add this to `HTTPMediaType` @jared @vmanot let fileExtension: String = { guard let subtype = mimeType.split(separator: "/").last else { @@ -188,17 +210,11 @@ extension _Gemini.APISpecification { } }() - result.append( - .file( - named: "file", - data: fileData, - filename: "\(displayName).\(fileExtension)", - contentType: .init(rawValue: mimeType) - ) - ) + result.ap return result } + */ } public struct DeleteFileInput: Codable { diff --git a/Sources/_Gemini/Intramodular/API/_Gemini.APISpecification.swift b/Sources/_Gemini/Intramodular/API/_Gemini.APISpecification.swift index fe289d07..587217f2 100644 --- a/Sources/_Gemini/Intramodular/API/_Gemini.APISpecification.swift +++ b/Sources/_Gemini/Intramodular/API/_Gemini.APISpecification.swift @@ -70,22 +70,43 @@ extension _Gemini { "/v1beta/models/\(context.input.model):generateContent" }) @Body(json: \.requestBody) + @Query({ $0.root.configuration.apiKey.map { ["key": $0] } ?? [:] }) var generateContent = Endpoint() // Initial Upload Request endpoint @POST @Path("/upload/v1beta/files") - @Header([ - "X-Goog-Upload-Command": "start, upload, finalize" - ]) - @Body(multipart: .input) - var uploadFile = Endpoint() + @Query({ $0.root.configuration.apiKey.map { ["key": $0] } ?? [:] }) + @Header({ context in + [ + HTTPHeaderField(key: "X-Goog-Upload-Protocol", value: "resumable"), + HTTPHeaderField(key: "X-Goog-Upload-Command", value: "start"), + HTTPHeaderField(key: "X-Goog-Upload-Header-Content-Length", value: "\(context.input.fileData.count)"), + HTTPHeaderField(key: "X-Goog-Upload-Header-Content-Type", value: context.input.mimeType), + HTTPHeaderField.contentType(.json) + ] + }) + @Body(json: \RequestBodies.StartFileUploadInput.metadata) + var startFileUpload = Endpoint() + + @POST + @AbsolutePath({ $0.input.uploadUrl }) + @Header({ context in + [ + HTTPHeaderField(key: "Content-Length", value: "\(context.input.fileSize)"), + HTTPHeaderField(key: "X-Goog-Upload-Offset", value: "0"), + HTTPHeaderField(key: "X-Goog-Upload-Command", value: "upload, finalize") + ] + }) + @Body(data: \RequestBodies.FinalizeFileUploadInput.data) + var finalizeFileUpload = Endpoint() // File Status endpoint @GET @Path({ context -> String in "/v1beta/\(context.input.name.rawValue)" }) + @Query({ $0.root.configuration.apiKey.map { ["key": $0] } ?? [:] }) var getFile = Endpoint() @GET @@ -101,8 +122,13 @@ extension _Gemini { parameters["pageToken"] = pageToken } + if let apiKey = context.root.configuration.apiKey { + parameters["key"] = apiKey + } + return parameters }) + @Query({ $0.root.configuration.apiKey.map { ["key": $0] } ?? [:] }) var listFiles = Endpoint() // Delete File endpoint @@ -110,24 +136,28 @@ extension _Gemini { @Path({ context -> String in "/\(context.input.fileURL.path)" }) + @Query({ $0.root.configuration.apiKey.map { ["key": $0] } ?? [:] }) var deleteFile = Endpoint() - //Fine Tuning + // Fine Tuning @POST @Path("/v1beta/tunedModels") @Body(json: \.requestBody) + @Query({ $0.root.configuration.apiKey.map { ["key": $0] } ?? [:] }) var createTunedModel = Endpoint() @GET @Path({ context -> String in "/v1/\(context.input.operationName)" }) + @Query({ $0.root.configuration.apiKey.map { ["key": $0] } ?? [:] }) var getTuningOperation = Endpoint() @GET @Path({ context -> String in "/v1beta/\(context.input.modelName)" }) + @Query({ $0.root.configuration.apiKey.map { ["key": $0] } ?? [:] }) var getTunedModel = Endpoint() @POST @@ -135,6 +165,7 @@ extension _Gemini { "/v1beta/\(context.input.model):generateContent" // Use the model name directly }) @Body(json: \.requestBody) + @Query({ $0.root.configuration.apiKey.map { ["key": $0] } ?? [:] }) var generateTunedContent = Endpoint() @POST @@ -142,6 +173,7 @@ extension _Gemini { "/v1beta/models/\(context.input.model):embedContent" }) @Body(json: \.input) + @Query({ $0.root.configuration.apiKey.map { ["key": $0] } ?? [:] }) var generateEmbedding = Endpoint() } } @@ -152,15 +184,11 @@ extension _Gemini.APISpecification { from input: Input, context: BuildRequestContext ) throws -> Request { - var request = try super.buildRequestBase( + let request = try super.buildRequestBase( from: input, context: context ) - if let apiKey = context.root.configuration.apiKey { - request = request.query([.init(name: "key", value: apiKey)]) - } - return request } @@ -168,15 +196,33 @@ extension _Gemini.APISpecification { from response: Request.Response, context: DecodeOutputContext ) throws -> Output { - - print(response) - try response.validate() + if let options: _Gemini.APISpecification.Options = context.options as? _Gemini.APISpecification.Options, let headerKey = options.outputHeaderKey { + let stringValue: String? = response.headerFields.first (where: { $0.key == headerKey })?.value + + switch Output.self { + case String.self: + return (try stringValue.unwrap()) as! Output + case Optional.self: + return stringValue as! Output + default: + throw _Gemini.APIError.invalidContentType + } + } + return try response.decode( Output.self, keyDecodingStrategy: .convertFromSnakeCase ) } } + + public class Options { + var outputHeaderKey: HTTPHeaderField.Key? + + init(outputHeaderKey: HTTPHeaderField.Key? = nil) { + self.outputHeaderKey = outputHeaderKey + } + } } diff --git a/Sources/_Gemini/Intramodular/Models/_Gemini.GenerationConfig.swift b/Sources/_Gemini/Intramodular/Models/_Gemini.GenerationConfig.swift index e352e195..45b147af 100644 --- a/Sources/_Gemini/Intramodular/Models/_Gemini.GenerationConfig.swift +++ b/Sources/_Gemini/Intramodular/Models/_Gemini.GenerationConfig.swift @@ -6,6 +6,7 @@ // import Foundation +import CorePersistence extension _Gemini { public struct GenerationConfiguration: Codable { @@ -114,3 +115,25 @@ extension _Gemini.SchemaObject: Codable { } } } + +// MARK: - Conversion + +extension _Gemini.SchemaObject { + public init(from jsonSchema: JSONSchema) { + switch jsonSchema.type { + case .object: + let mappedProperties = jsonSchema.properties?.mapValues { _Gemini.SchemaObject(from: $0) } ?? [:] + self = .object(properties: mappedProperties) + case .array: + self = .array(items: _Gemini.SchemaObject(from: jsonSchema.items ?? JSONSchema(type: .string))) + case .string: + self = .string + case .number, .integer: + self = .number + case .boolean: + self = .boolean + case nil: + self = .object(properties: [:]) + } + } +} diff --git a/Sources/_Gemini/Intramodular/Models/_Gemini.GoogleSearchRetrieval.swift b/Sources/_Gemini/Intramodular/Models/_Gemini.GoogleSearchRetrieval.swift index 775464d9..0029e5b0 100644 --- a/Sources/_Gemini/Intramodular/Models/_Gemini.GoogleSearchRetrieval.swift +++ b/Sources/_Gemini/Intramodular/Models/_Gemini.GoogleSearchRetrieval.swift @@ -15,7 +15,7 @@ extension _Gemini { public let dynamicRetrievalConfiguration: DynamicRetrievalConfiguration - public init(dynamicRetrievalConfiguration: DynamicRetrievalConfiguration) { + public init(dynamicRetrievalConfiguration: DynamicRetrievalConfiguration = .init(dynamicThreshold: 0.3)) { self.dynamicRetrievalConfiguration = dynamicRetrievalConfiguration } } diff --git a/Sources/_Gemini/Intramodular/_Gemini.Client+ContentGeneration.swift b/Sources/_Gemini/Intramodular/_Gemini.Client+ContentGeneration.swift index 174b0e1e..7bb8f683 100644 --- a/Sources/_Gemini/Intramodular/_Gemini.Client+ContentGeneration.swift +++ b/Sources/_Gemini/Intramodular/_Gemini.Client+ContentGeneration.swift @@ -26,6 +26,7 @@ extension _Gemini.Client { public func generateContent( messages: [_Gemini.Message] = [], file: _Gemini.File? = nil, + tools: [_Gemini.Tool] = [], mimeType: HTTPMediaType? = nil, model: _Gemini.Model, configuration: _Gemini.GenerationConfiguration = configDefault @@ -58,6 +59,7 @@ extension _Gemini.Client { return try await generateContent( contents: contents, systemInstruction: systemInstruction, + tools: tools, model: model, configuration: configuration ) @@ -66,6 +68,7 @@ extension _Gemini.Client { public func generateContent( messages: [_Gemini.Message] = [], files: [_Gemini.File], + tools: [_Gemini.Tool] = [], model: _Gemini.Model, configuration: _Gemini.GenerationConfiguration = configDefault ) async throws -> _Gemini.Content { @@ -96,6 +99,7 @@ extension _Gemini.Client { return try await generateContent( contents: contents, systemInstruction: systemInstruction, + tools: tools, model: model, configuration: configuration ) @@ -104,6 +108,7 @@ extension _Gemini.Client { internal func generateContent( contents: [_Gemini.APISpecification.RequestBodies.Content], systemInstruction: _Gemini.APISpecification.RequestBodies.Content?, + tools: [_Gemini.Tool] = [], model: _Gemini.Model, configuration: _Gemini.GenerationConfiguration ) async throws -> _Gemini.Content { @@ -112,6 +117,7 @@ extension _Gemini.Client { requestBody: .init( contents: contents, generationConfig: configuration, + tools: tools, systemInstruction: systemInstruction ) ) diff --git a/Sources/_Gemini/Intramodular/_Gemini.Client+Files.swift b/Sources/_Gemini/Intramodular/_Gemini.Client+Files.swift index b6fa298c..020e3cde 100644 --- a/Sources/_Gemini/Intramodular/_Gemini.Client+Files.swift +++ b/Sources/_Gemini/Intramodular/_Gemini.Client+Files.swift @@ -6,9 +6,21 @@ import CoreMI import Dispatch import FoundationX import Merge +import Media import NetworkKit import Swallow +fileprivate enum TempError: CustomStringError, Error { + case fetchedResponse + + public var description: String { + switch self { + case .fetchedResponse: + return "Got response url from header" + } + } +} + extension _Gemini.Client { public func uploadFile( from data: Data, @@ -20,25 +32,27 @@ extension _Gemini.Client { throw FileProcessingError.invalidFileName } - do { - var mimeType: String? = mimeType?.rawValue ?? _MediaAssetFileType(data)?.mimeType - - if mimeType == nil, let swiftType { - mimeType = HTTPMediaType(_swiftType: swiftType)?.rawValue - } - - let input = _Gemini.APISpecification.RequestBodies.FileUploadInput( - fileData: data, - mimeType: try mimeType.unwrap(), - displayName: displayName - ) - - let response = try await run(\.uploadFile, with: input) - - return response.file - } catch { - throw _Gemini.APIError.unknown(message: "File upload failed: \(error.localizedDescription)") + var mimeType: String? = mimeType?.rawValue ?? _MediaAssetFileType(data)?.mimeType + + if mimeType == nil, let swiftType { + mimeType = HTTPMediaType(_swiftType: swiftType)?.rawValue } + + let input = _Gemini.APISpecification.RequestBodies.StartFileUploadInput( + fileData: data, + mimeType: try mimeType.unwrap(), + displayName: displayName + ) + + let uploadURLString: String = try await run(\.startFileUpload, with: input, options: _Gemini.APISpecification.Options(outputHeaderKey: .custom("x-goog-upload-url"))).value + + let result: _Gemini.APISpecification.ResponseBodies.FileUpload = try await run(\.finalizeFileUpload, with: _Gemini.APISpecification.RequestBodies.FinalizeFileUploadInput(data: data, uploadUrl: uploadURLString, fileSize: data.count)) + + return result.file + } + + public func upload(file: any MediaFile) async throws { + try await self.uploadFile(from: file.url, mimeType: HTTPMediaType(fileURL: file.url), displayName: file.name) } public func uploadFile( diff --git a/Sources/_Gemini/Intramodular/_Gemini.Model.swift b/Sources/_Gemini/Intramodular/_Gemini.Model.swift index 672dc5e0..bf217489 100644 --- a/Sources/_Gemini/Intramodular/_Gemini.Model.swift +++ b/Sources/_Gemini/Intramodular/_Gemini.Model.swift @@ -23,6 +23,7 @@ extension _Gemini { self.rawValue = rawValue } + public static let gemini_2_0_flash = Model(rawValue: "gemini-2.0-flash") public static let gemini_2_0_flash_exp = Model(rawValue: "gemini-2.0-flash-exp") public static let gemini_1_5_pro = Model(rawValue: "gemini-1.5-pro") public static let gemini_1_5_pro_latest = Model(rawValue: "gemini-1.5-pro-latest") diff --git a/Sources/xAI/Intramodular/API/xAI.APISpecification.RequestBodies.swift b/Sources/xAI/Intramodular/API/xAI.APISpecification.RequestBodies.swift new file mode 100644 index 00000000..1753bfaf --- /dev/null +++ b/Sources/xAI/Intramodular/API/xAI.APISpecification.RequestBodies.swift @@ -0,0 +1,144 @@ + + +import Foundation + +extension xAI.APISpecification.RequestBodies { + + /* https://docs.x.ai/api/endpoints#chat-completions */ + struct ChatCompletions: Codable, Hashable, Sendable { + private enum CodingKeys: String, CodingKey { + case model + case messages + case temperature + case topProbabilityMass = "top_p" + case choices = "n" + case stream + case stop + case maxTokens = "max_tokens" + case presencePenalty = "presence_penalty" + case frequencyPenalty = "frequency_penalty" + case logprobs = "logprobs" + case logitBias = "logit_bias" + case seed = "seed" + case topLogprobs = "top_logprobs" + case user + case tools + } + + /* Model name for the model to use. */ + var model: xAI.Model + + /* A list of messages that make up the the chat conversation. Different models support different message types, such as image and text.*/ + var messages: [xAI.ChatMessage] + + /* What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.*/ + var temperature: Double? + + /* An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. It is generally recommended to alter this or `temperature` but not both.*/ + var topProbabilityMass: Double? + + /*The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.*/ + var maxTokens: Int? + + /* If set, partial message deltas will be sent. Tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a `data: [DONE]` message.*/ + var stream: Bool? + + /* If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result. Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend.*/ + var seed: Int? + + /* Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. */ + var frequencyPenalty: Double? + + /* A JSON object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.*/ + var logitBias: [String: Int]? + + /* Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message. */ + var logprobs: Bool? + + /* How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep n as 1 to minimize costs. */ + var choices: Int? + + /* Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. */ + var presencePenalty: Double? + + /* Up to 4 sequences where the API will stop generating further tokens. */ + var stop: [String]? + + /* An integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to true if this parameter is used. */ + var topLogprobs: Int? + + /* A unique identifier representing your end-user, which can help xAI to monitor and detect abuse. */ + var user: String? + + /* A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for. A max of 128 functions are supported. */ + var tools: [xAI.Tool]? + + init( + messages: [xAI.ChatMessage], + model: xAI.Model, + frequencyPenalty: Double? = nil, + logitBias: [String : Int]? = nil, + logprobs: Bool? = nil, + topLogprobs: Int? = nil, + maxTokens: Int? = nil, + choices: Int? = nil, + presencePenalty: Double? = nil, + seed: Int? = nil, + stop: [String]? = nil, + stream: Bool? = nil, + temperature: Double? = nil, + topProbabilityMass: Double? = nil, + user: String? = nil, + functions: [xAI.ChatFunctionDefinition]? = nil + ) { + self.messages = messages + self.model = model + self.frequencyPenalty = frequencyPenalty + self.logitBias = logitBias + self.logprobs = logprobs + self.topLogprobs = topLogprobs + self.maxTokens = maxTokens + self.choices = choices + self.presencePenalty = presencePenalty + self.seed = seed + self.stop = stop + self.stream = stream + self.temperature = temperature + self.topProbabilityMass = topProbabilityMass + self.user = user + self.tools = functions?.map { xAI.Tool.function($0) } + + } + + init( + user: String?, + messages: [xAI.ChatMessage], + model: xAI.Model, + temperature: Double?, + topProbabilityMass: Double?, + choices: Int?, + stream: Bool?, + stop: [String]?, + maxTokens: Int?, + presencePenalty: Double?, + frequencyPenalty: Double? + ) { + self.user = user + self.messages = messages + self.model = model + self.temperature = temperature + self.topProbabilityMass = topProbabilityMass + self.choices = choices + self.stream = stream + self.stop = stop + self.maxTokens = maxTokens + self.presencePenalty = presencePenalty + self.frequencyPenalty = frequencyPenalty + + self.logitBias = nil + self.logprobs = nil + self.topLogprobs = nil + self.seed = nil + } + } +} diff --git a/Sources/xAI/Intramodular/API/xAI.APISpecification.ResponseBodies.swift b/Sources/xAI/Intramodular/API/xAI.APISpecification.ResponseBodies.swift new file mode 100644 index 00000000..6ca529f0 --- /dev/null +++ b/Sources/xAI/Intramodular/API/xAI.APISpecification.ResponseBodies.swift @@ -0,0 +1,6 @@ + +import Foundation + +extension xAI.APISpecification.ResponseBodies { + +} diff --git a/Sources/xAI/Intramodular/API/xAI.APISpecification.swift b/Sources/xAI/Intramodular/API/xAI.APISpecification.swift new file mode 100644 index 00000000..89be500e --- /dev/null +++ b/Sources/xAI/Intramodular/API/xAI.APISpecification.swift @@ -0,0 +1,120 @@ + + +import NetworkKit +import FoundationX +import Swallow + +extension xAI { + public enum APIError: APIErrorProtocol { + public typealias API = xAI.APISpecification + + case apiKeyMissing + case incorrectAPIKeyProvided + case rateLimitExceeded + case badRequest(request: API.Request?, error: API.Request.Error) + case runtime(AnyError) + + public var traits: ErrorTraits { + [.domain(.networking)] + } + } + + public struct APISpecification: RESTAPISpecification { + public typealias Error = APIError + + public struct Configuration: Codable, Hashable { + public var apiKey: String? + } + + public let configuration: Configuration + + public var host: URL { + URL(string: "https://api.x.ai/v1")! + } + + public var id: some Hashable { + configuration + } + + @POST + @Path("chat/completions") + var chatCompletions = Endpoint() + } +} + +extension xAI.APISpecification { + public final class Endpoint: BaseHTTPEndpoint { + override public func buildRequestBase( + from input: Input, + context: BuildRequestContext + ) throws -> Request { + let configuration = context.root.configuration + + return try super + .buildRequestBase(from: input, context: context) + .jsonBody(input, keyEncodingStrategy: .convertToSnakeCase) + .header(.contentType(.json)) + .header(.accept(.json)) + .header(.authorization(.bearer, configuration.apiKey.unwrap())) + } + + struct _ErrorWrapper: Codable, Hashable, Sendable { + struct Error: Codable, Hashable, Sendable { + let type: String + let param: AnyCodable? + let message: String + } + + let error: Error + } + + override public func decodeOutputBase( + from response: Request.Response, + context: DecodeOutputContext + ) throws -> Output { + do { + try response.validate() + } catch { + let apiError: Error + + if let error = error as? Request.Error { + if let error = try? response.decode( + _ErrorWrapper.self, + keyDecodingStrategy: .convertFromSnakeCase + ).error { + if error.message.contains("You didn't provide an API key") { + throw Error.apiKeyMissing + } else if error.message.contains("Incorrect API key provided") { + throw Error.incorrectAPIKeyProvided + } + } + + if response.statusCode.rawValue == 429 { + apiError = .rateLimitExceeded + } else { + apiError = .badRequest(error) + } + } else { + apiError = .runtime(error) + } + + throw apiError + } + + return try response.decode( + Output.self, + keyDecodingStrategy: .convertFromSnakeCase + ) + } + } +} + +extension xAI.APISpecification { + public enum RequestBodies: _StaticSwift.Namespace { + + } + + public enum ResponseBodies: _StaticSwift.Namespace { + + } +} diff --git a/Sources/xAI/Intramodular/Models/xAI.ChatCompletion.swift b/Sources/xAI/Intramodular/Models/xAI.ChatCompletion.swift new file mode 100644 index 00000000..084aebdc --- /dev/null +++ b/Sources/xAI/Intramodular/Models/xAI.ChatCompletion.swift @@ -0,0 +1,36 @@ + + +import Foundation + +extension xAI { + public struct ChatCompletion: Codable, Hashable, Sendable { + + public struct Choice: Codable, Hashable, Sendable { + public enum FinishReason: String, Codable, Hashable, Sendable { + case stop = "stop" + case length = "length" + case modelLength = "model_length" + case toolCalls = "tool_calls" + } + + public let index: Int + public let message: ChatMessage + public let finishReason: FinishReason + } + + public struct Usage: Codable, Hashable, Sendable { + public let promptTokens: Int + public let completionTokens: Int + public let totalTokens: Int + } + + public var id: String + public var object: String + public var created: Date + public var model: Model + public var choices: [Choice] + public let usage: Usage + public let systemFingerprint: String + } +} + diff --git a/Sources/xAI/Intramodular/Models/xAI.ChatFunctionDefinition.swift b/Sources/xAI/Intramodular/Models/xAI.ChatFunctionDefinition.swift new file mode 100644 index 00000000..ca4b8169 --- /dev/null +++ b/Sources/xAI/Intramodular/Models/xAI.ChatFunctionDefinition.swift @@ -0,0 +1,17 @@ + +import CorePersistence + +extension xAI { + public struct ChatFunctionDefinition: Codable, Hashable, Sendable { + public let name: String + public let description: String + public let parameters: JSONSchema + + public init(name: String, description: String, parameters: JSONSchema) { + self.name = name + self.description = description + self.parameters = parameters + } + } +} + diff --git a/Sources/xAI/Intramodular/Models/xAI.ChatMessage.swift b/Sources/xAI/Intramodular/Models/xAI.ChatMessage.swift new file mode 100644 index 00000000..13b3f651 --- /dev/null +++ b/Sources/xAI/Intramodular/Models/xAI.ChatMessage.swift @@ -0,0 +1,210 @@ + +import CorePersistence +import Diagnostics +import LargeLanguageModels +import Swallow + +extension xAI { + public struct ChatMessage: Hashable, Sendable { + public typealias ID = String + + public let id: ID + public let role: ChatRole + public var body: ChatMessageBody + + public init( + id: ID? = nil, + role: ChatRole, + body: ChatMessageBody + ) { + switch body { + case .text: + assert(role != .function) + case .content: + assert(role != .function) + case .functionCall: + assert(role == .assistant) + case .toolCalls(_): + assert(role == .assistant) + case .functionInvocation: + assert(role == .function) + } + + self.id = id ?? UUID().stringValue // FIXME: !!! + self.role = role + self.body = body + } + } + + public enum FunctionCallingStrategy: Codable, Hashable, Sendable { + enum CodingKeys: String, CodingKey { + case none = "none" + case auto = "auto" + case function = "name" + } + + case none + case auto + case function(String) + + public init(from decoder: Decoder) throws { + switch try decoder._determineContainerKind() { + case .singleValue: + let rawValue = try decoder.singleValueContainer().decode(String.self) + + switch rawValue { + case CodingKeys.none.rawValue: + self = .none + case CodingKeys.auto.rawValue: + self = .auto + default: + throw DecodingError.dataCorrupted(.init(codingPath: [])) + } + case .keyed: + let container = try decoder.container(keyedBy: CodingKeys.self) + + self = try .function(container.decode(String.self, forKey: .function)) + default: + throw DecodingError.dataCorrupted(.init(codingPath: [])) + } + } + + public func encode(to encoder: Encoder) throws { + switch self { + case .none: + var container = encoder.singleValueContainer() + + try container.encode(CodingKeys.none.rawValue) + case .auto: + var container = encoder.singleValueContainer() + + try container.encode(CodingKeys.auto.rawValue) + case .function(let name): + var container = encoder.container(keyedBy: CodingKeys.self) + + try container.encode(name, forKey: .function) + } + } + } +} + +// MARK: - Conformances + +extension xAI.ChatMessage: AbstractLLM.ChatMessageConvertible { + public func __conversion() throws -> AbstractLLM.ChatMessage { + .init( + id: .init(rawValue: id), + role: try role.__conversion(), + content: try PromptLiteral(from: self) + ) + } +} + +extension xAI.ChatMessage: Codable { + public enum CodingKeys: CodingKey { + case id + case role + case content + case name + case functionCall + case toolCalls + } + + public init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + print(try JSON(from: decoder).prettyPrintedDescription) + + + self.id = try container.decodeIfPresent(String.self, forKey: .id) ?? UUID().stringValue // FIXME + self.role = try container.decode(xAI.ChatRole.self, forKey: .role) + + switch role { + case .function: + self.body = .functionInvocation( + .init( + name: try container.decode(String.self, forKey: .name), + response: try container.decode(String.self, forKey: .name) + ) + ) + case .assistant: + if let toolCalls = try container.decodeIfPresent([xAI.ToolCall].self, forKey: .toolCalls) { + if let function = toolCalls.first?.function { + self.body = .functionCall(function) + } else { + self.body = .toolCalls(toolCalls) + } + + } else { + self.body = try .content(container.decode(String.self, forKey: .content)) + } + default: + self.body = try .content(container.decode(String.self, forKey: .content)) + } + } + + public func encode(to encoder: Encoder) throws { + var container = encoder.container(keyedBy: CodingKeys.self) + + try container.encode(role, forKey: .role) + + switch body { + case .text(let content): + try container.encode(content, forKey: .content) + case .content(let content): + try container.encode(content, forKey: .content) + case .functionCall(let call): + try _tryAssert(role == .assistant) + + try container.encode(call, forKey: .functionCall) + try container.encodeNil(forKey: .content) + case .toolCalls(let calls): + try _tryAssert(role == .assistant) + + try container.encode(calls, forKey: .toolCalls) + try container.encodeNil(forKey: .content) + case .functionInvocation(let invocation): + try _tryAssert(role == .function) + + try container.encode(invocation.name, forKey: .name) + try container.encode(invocation.response, forKey: .content) + } + } +} + +// MARK: - Initializers + +extension xAI.ChatMessage { + public init( + id: ID? = nil, + role: xAI.ChatRole, + body: String + ) { + self.init( + id: id, + role: role, + body: .content(body) + ) + } + + public init( + role: xAI.ChatRole, + content: String + ) { + self.init( + role: role, + body: content + ) + } + + public static func system( + _ content: String + ) -> Self { + Self(id: UUID().stringValue, role: .system, body: .content(content)) + } + + public static func user( + _ content: String + ) -> Self { + Self(id: UUID().stringValue, role: .user, body: .content(content)) + } +} diff --git a/Sources/xAI/Intramodular/Models/xAI.ChatMessageBody.swift b/Sources/xAI/Intramodular/Models/xAI.ChatMessageBody.swift new file mode 100644 index 00000000..1f9122e1 --- /dev/null +++ b/Sources/xAI/Intramodular/Models/xAI.ChatMessageBody.swift @@ -0,0 +1,215 @@ + + +import CorePersistence +import Diagnostics +import Swift + +extension xAI { + public enum ChatMessageBody: Hashable, Sendable { + + + public struct FunctionCall: Codable, Hashable, Sendable { + public let name: String + public let arguments: String + + public init(name: String, arguments: String) { + self.name = name + self.arguments = arguments + } + } + + public struct FunctionInvocation: Codable, Hashable, Sendable { + public let name: String + public let response: String + + public init(name: String, response: String) { + self.name = name + self.response = response + } + } + + case text(String) + case content([_Content]) + /// The call made to a function provided to the LLM. + case functionCall(FunctionCall) + case toolCalls([ToolCall]) + /// The result of a function call of a function that was provided to the LLM. + case functionInvocation(FunctionInvocation) + } +} + +// MARK: - Initializers + +extension xAI.ChatMessageBody { + public static func content(_ text: String) -> Self { + .text(text) + } +} + +// MARK: - Extensions + +extension xAI.ChatMessageBody { + public var isEmpty: Bool { + switch self { + case .text(let text): + return text.isEmpty + case .content(let content): + return content.isEmpty + case .functionCall: + return false + case .toolCalls(let toolCalls): + return false + case .functionInvocation: + return false + } + } + + var _textValue: String? { + guard case .text(let string) = self else { + return nil + } + + return string + } + + public mutating func append(_ newText: String) throws { + switch self { + case .text(let text): + self = .text(text.appending(contentsOf: newText)) + case .content(let content): + self = .content(content.appending(.text(newText))) + case .functionCall: + throw Never.Reason.illegal + case .toolCalls(let toolCalls): + throw Never.Reason.illegal + case .functionInvocation: + throw Never.Reason.illegal + } + } + + public static func += (lhs: inout Self, rhs: String) throws { + try lhs.append(rhs) + } +} + +// MARK: - Auxiliary + +extension xAI.ChatMessageBody { + enum _ContentType: String, Codable, Hashable, Sendable { + case text = "text" + case imageURL = "image_url" + } + + public enum _Content: Sendable { + public struct ImageURL: Codable, Hashable, Sendable { + /// https://platform.openai.com/docs/guides/vision/low-or-high-fidelity-image-understanding + public enum ImageDetail: String, Codable, Hashable, Sendable { + case low + case high + case auto + } + + public let url: URL + public let detail: ImageDetail + + public init(url: URL, detail: ImageDetail = .auto) { + self.url = url + self.detail = detail + } + } + + case text(String) + case imageURL(ImageURL) + + public static func imageURL(_ url: URL) -> Self { + Self.imageURL(ImageURL(url: url, detail: .auto)) + } + } +} + +// MARK: - Conformances + +extension xAI.ChatMessageBody._Content: Codable { + fileprivate enum CodingKeys: String, CodingKey { + case type + case text + case imageURL = "image_url" + } + + public init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + print(try JSON(from: decoder).prettyPrintedDescription) + + let contentType = try container.decode(xAI.ChatMessageBody._ContentType.self, forKey: .type) + + switch contentType { + case .text: + self = .text(try container.decode(String.self, forKey: .text)) + case .imageURL: + self = .imageURL(try container.decode(ImageURL.self, forKey: .imageURL)) + } + } + + public func encode(to encoder: Encoder) throws { + var container = encoder.container(keyedBy: CodingKeys.self) + switch self { + case .text(let text): + try container.encode("text", forKey: .type) + try container.encode(text, forKey: .text) + case .imageURL(let imageURL): + try container.encode("image_url", forKey: .type) + try container.encode(imageURL, forKey: .imageURL) + } + } +} + +extension xAI.ChatMessageBody: CustomStringConvertible { + public var description: String { + switch self { + case .text(let text): + return text.description + case .content(let content): + return content.description + case .functionCall(let call): + return "\(call.name)(\(call.arguments))" + case .toolCalls(let calls): + return calls.map { "\($0.function.name)(\($0.function.arguments))" }.joined(separator: ", ") + case .functionInvocation(let invocation): + return "\(invocation.name)(...) = \(invocation.response)" + } + } +} + +extension xAI.ChatMessageBody._Content: CustomStringConvertible { + public var description: String { + switch self { + case .text(let text): + return text.description + case .imageURL(let imageURL): + return imageURL.url.description + } + } +} + +extension xAI.ChatMessageBody._Content: Hashable { + public func hash(into hasher: inout Hasher) { + switch self { + case .text(let string): + hasher.combine(string) + case .imageURL(let url): + hasher.combine(url) + } + } + + public static func == (lhs: Self, rhs: Self) -> Bool { + switch (lhs, rhs) { + case let (.text(a), .text(b)): + return a == b + case let (.imageURL(a), .imageURL(b)): + return a == b + default: + return false + } + } +} + diff --git a/Sources/xAI/Intramodular/Models/xAI.ChatRole.swift b/Sources/xAI/Intramodular/Models/xAI.ChatRole.swift new file mode 100644 index 00000000..b7e04857 --- /dev/null +++ b/Sources/xAI/Intramodular/Models/xAI.ChatRole.swift @@ -0,0 +1,41 @@ + +import CorePersistence +import Diagnostics +import LargeLanguageModels +import Swallow + +extension xAI { + public enum ChatRole: String, Codable, Hashable, Sendable { + case system + case user + case assistant + case function + + public init(from role: AbstractLLM.ChatRole) { + switch role { + case .system: + self = .system + case .user: + self = .user + case .assistant: + self = .assistant + case .other(.function): + self = .function + } + } + + public func __conversion() throws -> AbstractLLM.ChatRole { + switch self { + case .system: + return .system + case .user: + return .user + case .assistant: + return .assistant + case .function: + return .other(.function) + } + } + } +} + diff --git a/Sources/xAI/Intramodular/Models/xAI.Tool.swift b/Sources/xAI/Intramodular/Models/xAI.Tool.swift new file mode 100644 index 00000000..148be8e4 --- /dev/null +++ b/Sources/xAI/Intramodular/Models/xAI.Tool.swift @@ -0,0 +1,51 @@ + + +extension xAI { + public enum ToolType: String, CaseIterable, Codable, Hashable, Sendable { + /* Currently, only functions are supported as a tool. */ + case function + } + + public struct Tool: Codable, Hashable, Sendable { + public let type: ToolType + public let function: xAI.ChatFunctionDefinition? + + private init( + type: ToolType, + function: xAI.ChatFunctionDefinition? + ) { + self.type = type + self.function = function + + if function != nil { + assert(type == .function) + } + } + + public static func function( + _ function: xAI.ChatFunctionDefinition + ) -> Self { + Self(type: .function, function: function) + } + } + + public struct ToolCall: Codable, Hashable, Sendable { + public let index: Int? + public let id: String? + public let type: ToolType? + public let function: ChatMessageBody.FunctionCall + + public init( + index: Int? = nil, + id: String?, + type: ToolType = .function, + function: ChatMessageBody.FunctionCall + ) { + self.index = index + self.id = id + self.type = type + self.function = function + } + } +} + diff --git a/Sources/xAI/Intramodular/xAI+LLMRequestHandling.swift b/Sources/xAI/Intramodular/xAI+LLMRequestHandling.swift new file mode 100644 index 00000000..e978f81a --- /dev/null +++ b/Sources/xAI/Intramodular/xAI+LLMRequestHandling.swift @@ -0,0 +1,156 @@ + + +import CorePersistence +import LargeLanguageModels +import NetworkKit +import Swallow + +extension xAI.Client: _TaskDependenciesExporting { + public var _exportedTaskDependencies: TaskDependencies { + var result = TaskDependencies() + + result[\.llm] = self + + return result + } +} + +extension xAI.Client: LLMRequestHandling { + public var _availableModels: [ModelIdentifier]? { + xAI.Model.allCases.map({ $0.__conversion() }) + } + + public func complete( + prompt: Prompt, + parameters: Prompt.CompletionParameters + ) async throws -> Prompt.Completion { + let _completion: Any + + switch prompt { + case let prompt as AbstractLLM.TextPrompt: + _completion = try await _complete( + prompt: prompt, + parameters: try cast(parameters) + ) + + case let prompt as AbstractLLM.ChatPrompt: + _completion = try await _complete( + prompt: prompt, + parameters: try cast(parameters) + ) + default: + throw LLMRequestHandlingError.unsupportedPromptType(Prompt.self) + } + + return try cast(_completion) + } + + private func _complete( + prompt: AbstractLLM.TextPrompt, + parameters: AbstractLLM.TextCompletionParameters + ) async throws -> AbstractLLM.TextCompletion { + throw LLMRequestHandlingError.unsupportedPromptType(.init(Swift.type(of: prompt))) + } + + private func _complete( + prompt: AbstractLLM.ChatPrompt, + parameters: AbstractLLM.ChatCompletionParameters + ) async throws -> AbstractLLM.ChatCompletion { + + var messages: [xAI.ChatMessage] = [] + for message in prompt.messages { + let chatMessage = try await xAI.ChatMessage(from: message) + messages.append(chatMessage) + } + + let response: xAI.ChatCompletion = try await run( + \.chatCompletions, + with: .init( + messages: messages, + model: _model(for: prompt, parameters: parameters), + maxTokens: parameters.tokenLimit?.fixedValue, + seed: nil, + stream: false, + temperature: parameters.temperatureOrTopP?.temperature, + topProbabilityMass: parameters.temperatureOrTopP?.topProbabilityMass, + functions: parameters.functions?.map { xAI.ChatFunctionDefinition(from: $0) } + ) + ) + + assert(response.choices.count == 1) + + let message = try AbstractLLM.ChatMessage(from: response, choiceIndex: 0) + + return AbstractLLM.ChatCompletion( + prompt: prompt.messages, + message: message, + stopReason: .init() // FIXME: !!! + ) + } + + private func _model( + for prompt: AbstractLLM.ChatPrompt, + parameters: AbstractLLM.ChatCompletionParameters? + ) throws -> xAI.Model { + try prompt.context.get(\.modelIdentifier)?.as(xAI.Model.self) ?? .grok_beta + } +} + +// MARK: - Auxiliary + +extension AbstractLLM.ChatRole { + public init( + from role: xAI.ChatRole + ) throws { + switch role { + case .system: + self = .system + case .user: + self = .user + case .assistant: + self = .assistant + case .function: + self = .other(.function) + } + } +} + +extension AbstractLLM.ChatMessage { + public init( + from completion: xAI.ChatCompletion, + choiceIndex: Int + ) throws { + let choice = completion.choices[choiceIndex] + + self.init( + id: AnyPersistentIdentifier(erasing: "\(completion.id)_\(choiceIndex.description)"), + role: try AbstractLLM.ChatRole(from: choice.message.role), + content: PromptLiteral(choice.message.body.description) + ) + } +} + +extension xAI.ChatMessage { + public init( + from message: AbstractLLM.ChatMessage + ) throws { + self.init( + role: xAI.ChatRole( + from: message.role + ), + content: try message.content._stripToText() + ) + } +} + +extension xAI.ChatFunctionDefinition { + public init( + from function: AbstractLLM.ChatFunctionDefinition + ) { + self.init( + name: function.name.rawValue, + description: function.context, + parameters: function.parameters + ) + } +} diff --git a/Sources/xAI/Intramodular/xAI.ChatMessage+LargeLanguageModels.swift b/Sources/xAI/Intramodular/xAI.ChatMessage+LargeLanguageModels.swift new file mode 100644 index 00000000..a1937e11 --- /dev/null +++ b/Sources/xAI/Intramodular/xAI.ChatMessage+LargeLanguageModels.swift @@ -0,0 +1,258 @@ + + +import CorePersistence +import FoundationX +@_spi(Internal) import LargeLanguageModels + +extension xAI.ChatMessage: _PromptLiteralEncodingContainer { + public mutating func encode( + _ component: PromptLiteral._Degenerate.Component + ) async throws { + var content: [xAI.ChatMessageBody._Content] + + switch self.body { + case .text(let _content): + content = [.text(_content)] + case .content(let _content): + content = _content + case .functionCall(_): + throw Never.Reason.unsupported + case .toolCalls(_): + throw Never.Reason.unsupported + case .functionInvocation(_): + throw Never.Reason.unsupported + } + + switch component.payload { + case .string(let string): + content.append(.text(string)) + case .image(let image): + let imageURL: Base64DataURL = try await image.toBase64DataURL() + + content.append(.imageURL(xAI.ChatMessageBody._Content.ImageURL(url: imageURL.url, detail: .auto))) + case .functionCall: + throw Never.Reason.unsupported + case .resultOfFunctionCall: + throw Never.Reason.unsupported + } + + self = .init( + id: nil, // FIXME: !!! + role: role, + body: .content(content) + ) + } +} + +extension xAI.ChatMessage { + public init( + from message: AbstractLLM.ChatMessage + ) async throws { + let role: xAI.ChatRole + + switch message.role { + case .system: + role = .system + case .user: + role = .user + case .assistant: + role = .assistant + case .other(.function): + role = .function + } + + let _content = try message.content._degenerate() + + if _content.components.contains(where: { $0.payload.type == .functionCall || $0.payload.type == .functionInvocation }) { + switch try _content.components.toCollectionOfOne().value.payload { + case .functionCall(let call): + self.init( + id: nil, + // FIXME: !!! + role: role, + body: .functionCall( + xAI.ChatMessageBody.FunctionCall( + name: call.name.rawValue, + arguments: try call.arguments.__conversion() + ) + ) + ) + case .resultOfFunctionCall(let result): + self.init( + id: nil, // FIXME: !!! + role: role, + body: .functionInvocation( + .init( + name: result.name.rawValue, + response: try result.result.__conversion() as String + ) + ) + ) + default: + assertionFailure("Unsupported prompt literal.") + + throw Never.Reason.illegal + } + } else { + var _temp = Self( + id: nil, // FIXME: !!! + role: role, + body: .content([]) + ) + + try await message.content._encode(to: &_temp) + + self = _temp + } + } +} + +extension AbstractLLM.ChatMessage { + public init( + from message: xAI.ChatMessage + ) throws { + let id = message.id + let role: AbstractLLM.ChatRole + + switch message.role { + case .system: + role = .system + case .user: + role = .user + case .assistant: + role = .assistant + case .function: + role = .other(.function) + } + + switch message.body { + case .text(let content): + self.init( + id: AnyPersistentIdentifier(erasing: id), + role: role, + content: PromptLiteral( + content, + role: .chat(role) + ) + ) + case .content(let content): + self.init( + id: AnyPersistentIdentifier(erasing: id), + role: role, + content: PromptLiteral( + from: content, + role: .chat(role) + ) + ) + case .functionCall(let call): + self.init( + id: AnyPersistentIdentifier(erasing: id), + role: role, + content: try PromptLiteral( + functionCall: .init( + functionID: nil, + name: AbstractLLM.ChatFunction.Name(rawValue: call.name), + arguments: AbstractLLM.ChatFunctionCall.Arguments(unencoded: call.arguments), + context: .init() + ), + role: .chat(role) + ) + ) + case .toolCalls(let calls): + guard let firstCall = calls.first?.function else { + throw DecodingError.dataCorrupted(.init( + codingPath: [], + debugDescription: "Tool calls array is empty" + )) + } + + self.init( + id: AnyPersistentIdentifier(erasing: id), + role: role, + content: try PromptLiteral( + functionCall: .init( + functionID: nil, + name: AbstractLLM.ChatFunction.Name(rawValue: firstCall.name), + arguments: AbstractLLM.ChatFunctionCall.Arguments(unencoded: firstCall.arguments), + context: .init() + ), + role: .chat(role) + ) + ) + case .functionInvocation(let invocation): + self.init( + id: AnyPersistentIdentifier(erasing: id), + role: role, + content: try .init( + functionInvocation: .init( + functionID: nil, + name: AbstractLLM.ChatFunction.Name(rawValue: invocation.name), + result: .init(rawValue: invocation.response) + ), + role: .chat(role) + ) + ) + } + } +} + +extension PromptLiteral { + public init(from message: xAI.ChatMessage) throws { + let role: PromptMatterRole + + switch message.role { + case .system: + role = .chat(.system) + case .user: + role = .chat(.user) + case .assistant: + role = .chat(.assistant) + case .function: + role = .chat(.other(.function)) + } + + switch message.body { + case .text(let text): + self.init(from: [.text(text)], role: role) + case .content(let content): + self.init(from: content, role: role) + case .functionCall: + TODO.unimplemented + case .toolCalls(_): + TODO.unimplemented + case .functionInvocation: + TODO.unimplemented + } + } + + init( + from contents: [xAI.ChatMessageBody._Content], + role: PromptMatterRole + ) { + var components: [PromptLiteral.StringInterpolation.Component] = [] + + for content in contents { + switch content { + case .text(let content): + components.append( + PromptLiteral.StringInterpolation.Component( + payload: .stringLiteral(content), + role: role + ) + ) + case .imageURL(let image): + assert(image.detail == .auto) // FIXME + + components.append( + PromptLiteral.StringInterpolation.Component( + payload: .image(.url(image.url)), + role: role + ) + ) + } + } + + self.init(stringInterpolation: .init(components: components)) + } +} + diff --git a/Sources/xAI/Intramodular/xAI.Client.swift b/Sources/xAI/Intramodular/xAI.Client.swift new file mode 100644 index 00000000..eb33832f --- /dev/null +++ b/Sources/xAI/Intramodular/xAI.Client.swift @@ -0,0 +1,50 @@ + + +import CorePersistence +import LargeLanguageModels +import Merge +import NetworkKit +import Swallow + +extension xAI { + @RuntimeDiscoverable + public final class Client: HTTPClient, _StaticSwift.Namespace { + public static var persistentTypeRepresentation: some IdentityRepresentation { + _MIServiceTypeIdentifier._xAI + } + + public let interface: APISpecification + public let session: HTTPSession + + public init(interface: APISpecification, session: HTTPSession) { + self.interface = interface + self.session = session + } + + public convenience init(apiKey: String?) { + self.init( + interface: .init(configuration: .init(apiKey: apiKey)), + session: .shared + ) + } + } +} + +extension xAI.Client: _MIService { + public convenience init( + account: (any _MIServiceAccount)? + ) async throws { + let account: any _MIServiceAccount = try account.unwrap() + let serviceIdentifier: _MIServiceTypeIdentifier = account.serviceIdentifier + + guard serviceIdentifier == _MIServiceTypeIdentifier._xAI else { + throw _MIServiceError.serviceTypeIncompatible(serviceIdentifier) + } + + guard let credential = account.credential as? _MIServiceAPIKeyCredential else { + throw _MIServiceError.invalidCredentials(account.credential) + } + + self.init(apiKey: credential.apiKey) + } +} diff --git a/Sources/xAI/Intramodular/xAI.Model.swift b/Sources/xAI/Intramodular/xAI.Model.swift new file mode 100644 index 00000000..bc06398d --- /dev/null +++ b/Sources/xAI/Intramodular/xAI.Model.swift @@ -0,0 +1,52 @@ + + +import CoreMI +import CorePersistence +import LargeLanguageModels +import Swallow + +extension xAI { + public enum Model: String, CaseIterable, Codable, Hashable, Named, Sendable { + case grok_beta = "grok-beta" + case grok_vision_beta = "grok-vision-beta" + + public var name: String { + switch self { + case .grok_beta: + return "Grok Beta" + case .grok_vision_beta: + return "Grok Vision Beta" + } + } + } +} + +// MARK: - Conformances + +extension xAI.Model: CustomStringConvertible { + public var description: String { + rawValue + } +} + +extension xAI.Model: ModelIdentifierRepresentable { + public init(from identifier: ModelIdentifier) throws { + guard identifier.provider == ._xAI, identifier.revision == nil else { + throw Never.Reason.illegal + } + + guard let model = Self(rawValue: identifier.name) else { + throw Never.Reason.unexpected + } + + self = model + } + + public func __conversion() -> ModelIdentifier { + ModelIdentifier( + provider: ._xAI, + name: rawValue, + revision: nil + ) + } +} diff --git a/Sources/xAI/Intramodular/xAI.swift b/Sources/xAI/Intramodular/xAI.swift new file mode 100644 index 00000000..24ccd67a --- /dev/null +++ b/Sources/xAI/Intramodular/xAI.swift @@ -0,0 +1,7 @@ + +import Swift + +public enum xAI { + +} + diff --git a/Sources/xAI/module.swift b/Sources/xAI/module.swift new file mode 100644 index 00000000..86462ca6 --- /dev/null +++ b/Sources/xAI/module.swift @@ -0,0 +1,5 @@ + + +@_exported import LargeLanguageModels +@_exported import SwallowMacrosClient + diff --git a/Tests/Mistral/Intramodular/EmbeddingsTests.swift b/Tests/Mistral/Intramodular/EmbeddingsTests.swift index 4aee6269..3d8c2d75 100644 --- a/Tests/Mistral/Intramodular/EmbeddingsTests.swift +++ b/Tests/Mistral/Intramodular/EmbeddingsTests.swift @@ -3,7 +3,6 @@ // import LargeLanguageModels -import Groq import XCTest import Mistral diff --git a/Tests/Perplexity/Intramodular/CompletionTests.swift b/Tests/Perplexity/Intramodular/CompletionTests.swift index 2f07169b..1decb677 100644 --- a/Tests/Perplexity/Intramodular/CompletionTests.swift +++ b/Tests/Perplexity/Intramodular/CompletionTests.swift @@ -55,8 +55,9 @@ final class CompletionTests: XCTestCase { AbstractLLM.ChatMessage( role: .user, body: "Sup?" - ) + ), ] + AbstractLLM.ChatCompletionStream let result: String = try await llm.complete( messages, diff --git a/Tests/TogetherAI/Intramodular/CompetionTests.swift b/Tests/TogetherAI/Intramodular/CompetionTests.swift new file mode 100644 index 00000000..0cb98147 --- /dev/null +++ b/Tests/TogetherAI/Intramodular/CompetionTests.swift @@ -0,0 +1,67 @@ +// +// Copyright (c) Vatsal Manot +// + +import Foundation +import LargeLanguageModels +import TogetherAI +import XCTest + +final class CompletionTests: XCTestCase { + + func testCompletionsLLMRequest() async throws { + let llm: any LLMRequestHandling = client + + let prompt: AbstractLLM.TextPrompt = .init(stringLiteral: "List all of the states in the USA and their capitals in a table.") + let parameters: AbstractLLM.TextCompletionParameters = .init( + tokenLimit: .fixed(400), + temperature: 0.8, + stops: nil + ) + + let completion = try await llm.complete( + prompt: prompt, + parameters: parameters + ) + + print(completion) + } + + func testCompletionsLlama() async throws { + let completion = try await client + .createCompletion( + for: .llama2_70B, + prompt: "List all of the states in the USA and their capitals in a table.", + maxTokens: 200, + temperature: 0.7, + choices: 5 + ) + print(completion) + } + + func testCompletionsMistral() async throws { + let completion = try await client + .createCompletion( + for: .mistral7b, + prompt: "List all of the states in the USA and their capitals in a table.", + maxTokens: 400, + temperature: 0.5, + choices: 2 + ) + print(completion) + } + + func testCompletionsMixtral() async throws { + let completion = try await client + .createCompletion( + for: .mixtral8x7b, + prompt: "List all of the states in the USA and their capitals in a table.", + maxTokens: 175, + temperature: 0.9, + choices: 3 + ) + print(completion) + } +} + + diff --git a/Tests/TogetherAI/Intramodular/EmbeddingsTests.swift b/Tests/TogetherAI/Intramodular/EmbeddingsTests.swift index a9de0f2b..ea5a3b69 100644 --- a/Tests/TogetherAI/Intramodular/EmbeddingsTests.swift +++ b/Tests/TogetherAI/Intramodular/EmbeddingsTests.swift @@ -3,11 +3,31 @@ // import LargeLanguageModels -import VoyageAI +import TogetherAI import XCTest final class EmbeddingsTests: XCTestCase { + func testTextEmbeddingsRequesntHandling() async { + let textEmbeddingsClient: any TextEmbeddingsRequestHandling = client + let textInput = "Our solar system orbits the Milky Way galaxy at about 515,000 mph" + + do { + let embeddings = try await textEmbeddingsClient.fulfill( + .init( + input: [textInput], + model: ModelIdentifier( + from: TogetherAI.Model.Embedding.togetherM2Bert80M8KRetrieval + ) + ) + ) + let embeddingsData = embeddings.data + XCTAssertTrue(!embeddingsData.isEmpty) + } catch { + XCTFail(String(describing: error)) + } + } + func testTextEmbeddings() async { let textInput = "Our solar system orbits the Milky Way galaxy at about 515,000 mph" do { diff --git a/Tests/TogetherAI/module.swift b/Tests/TogetherAI/module.swift index 67d8ba23..e856c439 100644 --- a/Tests/TogetherAI/module.swift +++ b/Tests/TogetherAI/module.swift @@ -5,7 +5,7 @@ import TogetherAI public var TOGETHERAI_API_KEY: String { - "" + "YOUR_API_KEY" } public var client: TogetherAI.Client { diff --git a/Tests/xAI/Intramodular/CompletionTests.swift b/Tests/xAI/Intramodular/CompletionTests.swift new file mode 100644 index 00000000..ef9e78b9 --- /dev/null +++ b/Tests/xAI/Intramodular/CompletionTests.swift @@ -0,0 +1,43 @@ + + +import LargeLanguageModels +import xAI +import XCTest + +final class CompletionTests: XCTestCase { + + let llm: any LLMRequestHandling = client + + func testChatCompletionsGrokBeta() async throws { + let result = try await resultForModel(xAI.Model.grok_beta) + print(result) // "Hey! What's up with you?" + } + + func testChatCompletionsGrokVisionBeta() async throws { + let result = try await resultForModel(xAI.Model.grok_vision_beta) + print(result) // "Hey! How can I help you today?" + } + + private func resultForModel(_ model: xAI.Model) async throws -> String { + + let messages: [AbstractLLM.ChatMessage] = [ + AbstractLLM.ChatMessage( + role: .system, + body: "You are an extremely intelligent assistant." + ), + AbstractLLM.ChatMessage( + role: .user, + body: "Sup?" + ) + ] + + let result: String = try await llm.complete( + messages, + model: model, + as: String.self + ) + + return result + } +} + diff --git a/Tests/xAI/Intramodular/FunctionCallingTests.swift b/Tests/xAI/Intramodular/FunctionCallingTests.swift new file mode 100644 index 00000000..ba6af3cb --- /dev/null +++ b/Tests/xAI/Intramodular/FunctionCallingTests.swift @@ -0,0 +1,103 @@ + + +import CorePersistence +import xAI +import XCTest + +final class FunctionCallingTests: XCTestCase { + let llm: any LLMRequestHandling = client + + func testFunctionCalling() async throws { + let messages: [AbstractLLM.ChatMessage] = [ + .system { + "You are a Metereologist Expert accurately giving weather data in fahrenheit at any given city around the world" + }, + .user { + "What is the weather in San Francisco, CA?" + } + ] + + let functionCall1: AbstractLLM.ChatFunctionCall = try await llm.complete( + messages, + functions: [makeGetWeatherFunction1()], + as: .functionCall + ) + + let functionCall2: AbstractLLM.ChatFunctionCall = try await llm.complete( + messages, + functions: [makeGetWeatherFunction2()], + as: .functionCall + ) + + let result1 = try functionCall1.decode(GetWeatherParameters.self) + let result2 = try functionCall2.decode(GetWeatherParameters.self) + + print(result1, result2) + } + + private func makeGetWeatherFunction1() -> AbstractLLM.ChatFunctionDefinition { + let weatherObjectSchema = JSONSchema( + type: .object, + description: "Weather in a certain location", + properties: [ + "location": JSONSchema( + type: .string, + description: "The city and state, e.g. San Francisco, CA" + ), + "unit_fahrenheit" : JSONSchema( + type: .number, + description: "The unit of temperature in 'fahrenheit'" + ) + ], + required: true + ) + + let getWeatherFunction: AbstractLLM.ChatFunctionDefinition = AbstractLLM.ChatFunctionDefinition( + name: "get_weather", + context: "Get the current weather in a given location", + parameters: JSONSchema( + type: .object, + description: "Weather data for a given location in fahrenheit", + properties: [ + "weather": .array(weatherObjectSchema) + ] + ) + ) + + return getWeatherFunction + } + + struct GetWeatherParameters: Codable, Hashable, Sendable { + let weather: [WeatherObject] + } + + struct WeatherObject: Codable, Hashable, Sendable { + let location: String + let unit_fahrenheit: Double? + } + + private func makeGetWeatherFunction2() throws -> AbstractLLM.ChatFunctionDefinition { + let getWeatherFunction: AbstractLLM.ChatFunctionDefinition = AbstractLLM.ChatFunctionDefinition( + name: "get_weather", + context: "Get the current weather in a given location", + parameters: JSONSchema( + type: .object, + description: "Weather data for a given location in fahrenheit", + properties: [ + "weather": try .array { + try JSONSchema( + type: WeatherObject.self, + description: "Weather in a certain location", + propertyDescriptions: [ + "location": "The city and state, e.g. San Francisco, CA", + "unit_fahrenheit": "The unit of temperature in 'fahrenheit'" + ] + ) + } + ] + ) + ) + + return getWeatherFunction + } +} diff --git a/Tests/xAI/module.swift b/Tests/xAI/module.swift new file mode 100644 index 00000000..bd88733f --- /dev/null +++ b/Tests/xAI/module.swift @@ -0,0 +1,12 @@ + + +import xAI + +public var xAI_API_KEY: String { + "xai-iukwcbTFm3HCyuJVq7U5c0c9LKHJ0uhnGIsiOyn4Qu0zxSH3g1ULDSkaCHHoDQnX9tsV5cSCWom0HosP" +} + +public var client: xAI.Client { + xAI.Client(apiKey: xAI_API_KEY) +} +