feat: raw response accumulation for streaming

Pratham-Mishra04 · Pratham-Mishra04 · commit 4b4a58402821 · 2025-12-04T19:14:06.000+05:30
diff --git a/framework/changelog.md b/framework/changelog.md
@@ -0,0 +1 @@
+feat: support raw response accumulation in stream accumulator
diff --git a/framework/streaming/accumulator.go b/framework/streaming/accumulator.go
@@ -43,6 +43,7 @@ func (a *Accumulator) putChatStreamChunk(chunk *ChatStreamChunk) {
 	chunk.ErrorDetails = nil
 	chunk.FinishReason = nil
 	chunk.TokenUsage = nil
+	chunk.RawResponse = nil
 	a.chatStreamChunkPool.Put(chunk)
 }
 
@@ -60,6 +61,7 @@ func (a *Accumulator) putAudioStreamChunk(chunk *AudioStreamChunk) {
 	chunk.ErrorDetails = nil
 	chunk.FinishReason = nil
 	chunk.TokenUsage = nil
+	chunk.RawResponse = nil
 	a.audioStreamChunkPool.Put(chunk)
 }
 
@@ -77,6 +79,7 @@ func (a *Accumulator) putTranscriptionStreamChunk(chunk *TranscriptionStreamChun
 	chunk.ErrorDetails = nil
 	chunk.FinishReason = nil
 	chunk.TokenUsage = nil
+	chunk.RawResponse = nil
 	a.transcriptionStreamChunkPool.Put(chunk)
 }
 
@@ -94,6 +97,7 @@ func (a *Accumulator) putResponsesStreamChunk(chunk *ResponsesStreamChunk) {
 	chunk.ErrorDetails = nil
 	chunk.FinishReason = nil
 	chunk.TokenUsage = nil
+	chunk.RawResponse = nil
 	a.responsesStreamChunkPool.Put(chunk)
 }
 
diff --git a/framework/streaming/audio.go b/framework/streaming/audio.go
@@ -91,6 +91,22 @@ func (a *Accumulator) processAccumulatedAudioStreamingChunks(requestID string, b
 			data.CacheDebug = lastChunk.SemanticCacheDebug
 		}
 	}
+	// Accumulate raw response
+	if len(accumulator.AudioStreamChunks) > 0 {
+		// Sort chunks by chunk index
+		sort.Slice(accumulator.AudioStreamChunks, func(i, j int) bool {
+			return accumulator.AudioStreamChunks[i].ChunkIndex < accumulator.AudioStreamChunks[j].ChunkIndex
+		})
+		for _, chunk := range accumulator.AudioStreamChunks {
+			if chunk.RawResponse != nil {
+				if data.RawResponse == nil {
+					data.RawResponse = bifrost.Ptr(*chunk.RawResponse + "\n\n")
+				} else {
+					*data.RawResponse += *chunk.RawResponse + "\n\n"
+				}
+			}
+		}
+	}
 	return data, nil
 }
 
@@ -118,6 +134,9 @@ func (a *Accumulator) processAudioStreamingResponse(ctx *schemas.BifrostContext,
 			Audio: result.SpeechStreamResponse.Audio,
 		}
 		chunk.Delta = newDelta
+		if result.SpeechStreamResponse.ExtraFields.RawResponse != nil {
+			chunk.RawResponse = bifrost.Ptr(fmt.Sprintf("%v", result.SpeechStreamResponse.ExtraFields.RawResponse))
+		}
 		if result.SpeechStreamResponse.Usage != nil {
 			chunk.TokenUsage = result.SpeechStreamResponse.Usage
 		}
diff --git a/framework/streaming/chat.go b/framework/streaming/chat.go
@@ -179,6 +179,22 @@ func (a *Accumulator) processAccumulatedChatStreamingChunks(requestID string, re
 		}
 		data.FinishReason = lastChunk.FinishReason
 	}
+	// Accumulate raw response
+	if len(accumulator.ChatStreamChunks) > 0 {
+		// Sort chunks by chunk index
+		sort.Slice(accumulator.ChatStreamChunks, func(i, j int) bool {
+			return accumulator.ChatStreamChunks[i].ChunkIndex < accumulator.ChatStreamChunks[j].ChunkIndex
+		})
+		for _, chunk := range accumulator.ChatStreamChunks {
+			if chunk.RawResponse != nil {
+				if data.RawResponse == nil {
+					data.RawResponse = bifrost.Ptr(*chunk.RawResponse + "\n\n")
+				} else {
+					*data.RawResponse += *chunk.RawResponse + "\n\n"
+				}
+			}
+		}
+	}
 	return data, nil
 }
 
@@ -227,6 +243,9 @@ func (a *Accumulator) processChatStreamingResponse(ctx *schemas.BifrostContext,
 			chunk.TokenUsage = result.ChatResponse.Usage
 		}
 		chunk.ChunkIndex = result.ChatResponse.ExtraFields.ChunkIndex
+		if result.ChatResponse.ExtraFields.RawResponse != nil {
+			chunk.RawResponse = bifrost.Ptr(fmt.Sprintf("%v", result.ChatResponse.ExtraFields.RawResponse))
+		}
 		if isFinalChunk {
 			if a.pricingManager != nil {
 				cost := a.pricingManager.CalculateCostWithCacheDebug(result)
diff --git a/framework/streaming/responses.go b/framework/streaming/responses.go
@@ -660,6 +660,23 @@ func (a *Accumulator) processAccumulatedResponsesStreamingChunks(requestID strin
 		data.FinishReason = lastChunk.FinishReason
 	}
 
+	// Accumulate raw response
+	if len(accumulator.ResponsesStreamChunks) > 0 {
+		// Sort chunks by chunk index
+		sort.Slice(accumulator.ResponsesStreamChunks, func(i, j int) bool {
+			return accumulator.ResponsesStreamChunks[i].ChunkIndex < accumulator.ResponsesStreamChunks[j].ChunkIndex
+		})
+		for _, chunk := range accumulator.ResponsesStreamChunks {
+			if chunk.RawResponse != nil {
+				if data.RawResponse == nil {
+					data.RawResponse = bifrost.Ptr(*chunk.RawResponse + "\n\n")
+				} else {
+					*data.RawResponse += *chunk.RawResponse + "\n\n"
+				}
+			}
+		}
+	}
+
 	return data, nil
 }
 
@@ -683,54 +700,94 @@ func (a *Accumulator) processResponsesStreamingResponse(ctx *schemas.BifrostCont
 
 	// For OpenAI-compatible providers, the last chunk already contains the whole accumulated response
 	// so just return it as is
+	// We maintain the accumulator only for raw response accumulation
 	if provider == schemas.OpenAI || provider == schemas.OpenRouter || (provider == schemas.Azure && !schemas.IsAnthropicModel(model)) {
 		isFinalChunk := bifrost.IsFinalChunk(ctx)
+		chunk := a.getResponsesStreamChunk()
+		chunk.Timestamp = time.Now()
+		chunk.ErrorDetails = bifrostErr
+		if bifrostErr != nil {
+			chunk.FinishReason = bifrost.Ptr("error")
+		} else if result != nil && result.ResponsesStreamResponse != nil {
+			if result.ResponsesStreamResponse.ExtraFields.RawResponse != nil {
+				rawResponse, ok := result.ResponsesStreamResponse.ExtraFields.RawResponse.(string)
+				if ok {
+					chunk.RawResponse = bifrost.Ptr(rawResponse)
+				}
+			}
+		}
+		if addErr := a.addResponsesStreamChunk(requestID, chunk, isFinalChunk); addErr != nil {
+			return nil, fmt.Errorf("failed to add responses stream chunk for request %s: %w", requestID, addErr)
+		}
 		if isFinalChunk {
-			// For OpenAI, the final chunk contains the complete response
-			// Extract the complete response and return it
-			if result != nil && result.ResponsesStreamResponse != nil {
-				// Build the complete response from the final chunk
-				data := &AccumulatedData{
-					RequestID:      requestID,
-					Status:         "success",
-					Stream:         true,
-					StartTimestamp: startTimestamp,
-					EndTimestamp:   endTimestamp,
-					Latency:        result.GetExtraFields().Latency,
-					ErrorDetails:   bifrostErr,
+			shouldProcess := false
+			// Get the accumulator to check if processing has already been triggered
+			accumulator := a.getOrCreateStreamAccumulator(requestID)
+			accumulator.mu.Lock()
+			shouldProcess = !accumulator.IsComplete
+			// Mark as complete when we're about to process
+			if shouldProcess {
+				accumulator.IsComplete = true
+			}
+			accumulator.mu.Unlock()
+
+			if shouldProcess {
+				accumulatedData, processErr := a.processAccumulatedResponsesStreamingChunks(requestID, bifrostErr, isFinalChunk)
+				if processErr != nil {
+					a.logger.Error("failed to process accumulated responses chunks for request %s: %v", requestID, processErr)
+					return nil, processErr
 				}
 
-				if bifrostErr != nil {
-					data.Status = "error"
-				}
+				// For OpenAI, the final chunk contains the complete response
+				// Extract the complete response and return it
+				if result != nil && result.ResponsesStreamResponse != nil {
+					// Build the complete response from the final chunk
+					data := &AccumulatedData{
+						RequestID:      requestID,
+						Status:         "success",
+						Stream:         true,
+						StartTimestamp: startTimestamp,
+						EndTimestamp:   endTimestamp,
+						Latency:        result.GetExtraFields().Latency,
+						ErrorDetails:   bifrostErr,
+						RawResponse:    accumulatedData.RawResponse,
+					}
 
-				// Extract the complete response from the stream response
-				if result.ResponsesStreamResponse.Response != nil {
-					data.OutputMessages = result.ResponsesStreamResponse.Response.Output
-					if result.ResponsesStreamResponse.Response.Usage != nil {
-						// Convert ResponsesResponseUsage to schemas.LLMUsage
-						data.TokenUsage = &schemas.BifrostLLMUsage{
-							PromptTokens:     result.ResponsesStreamResponse.Response.Usage.InputTokens,
-							CompletionTokens: result.ResponsesStreamResponse.Response.Usage.OutputTokens,
-							TotalTokens:      result.ResponsesStreamResponse.Response.Usage.TotalTokens,
+					if bifrostErr != nil {
+						data.Status = "error"
+					}
+
+					// Extract the complete response from the stream response
+					if result.ResponsesStreamResponse.Response != nil {
+						data.OutputMessages = result.ResponsesStreamResponse.Response.Output
+						if result.ResponsesStreamResponse.Response.Usage != nil {
+							// Convert ResponsesResponseUsage to schemas.LLMUsage
+							data.TokenUsage = &schemas.BifrostLLMUsage{
+								PromptTokens:     result.ResponsesStreamResponse.Response.Usage.InputTokens,
+								CompletionTokens: result.ResponsesStreamResponse.Response.Usage.OutputTokens,
+								TotalTokens:      result.ResponsesStreamResponse.Response.Usage.TotalTokens,
+							}
 						}
 					}
-				}
 
-				if a.pricingManager != nil {
-					cost := a.pricingManager.CalculateCostWithCacheDebug(result)
-					data.Cost = bifrost.Ptr(cost)
-				}
+					if a.pricingManager != nil {
+						cost := a.pricingManager.CalculateCostWithCacheDebug(result)
+						data.Cost = bifrost.Ptr(cost)
+					}
 
-				return &ProcessedStreamResponse{
-					Type:       StreamResponseTypeFinal,
-					RequestID:  requestID,
-					StreamType: StreamTypeResponses,
-					Provider:   provider,
-					Model:      model,
-					Data:       data,
-				}, nil
+					return &ProcessedStreamResponse{
+						Type:       StreamResponseTypeFinal,
+						RequestID:  requestID,
+						StreamType: StreamTypeResponses,
+						Provider:   provider,
+						Model:      model,
+						Data:       data,
+					}, nil
+				} else {
+					return nil, nil
+				}
 			}
+			return nil, nil
 		}
 
 		// For non-final chunks from OpenAI, just pass through
@@ -753,6 +810,9 @@ func (a *Accumulator) processResponsesStreamingResponse(ctx *schemas.BifrostCont
 	if bifrostErr != nil {
 		chunk.FinishReason = bifrost.Ptr("error")
 	} else if result != nil && result.ResponsesStreamResponse != nil {
+		if result.ResponsesStreamResponse.ExtraFields.RawResponse != nil {
+			chunk.RawResponse = bifrost.Ptr(fmt.Sprintf("%v", result.ResponsesStreamResponse.ExtraFields.RawResponse))
+		}
 		// Store a deep copy of the stream response to prevent shared data mutation between plugins
 		chunk.StreamResponse = deepCopyResponsesStreamResponse(result.ResponsesStreamResponse)
 		// Extract token usage from stream response if available
diff --git a/framework/streaming/transcription.go b/framework/streaming/transcription.go
@@ -103,6 +103,22 @@ func (a *Accumulator) processAccumulatedTranscriptionStreamingChunks(requestID s
 			data.CacheDebug = lastChunk.SemanticCacheDebug
 		}
 	}
+	// Accumulate raw response
+	if len(accumulator.TranscriptionStreamChunks) > 0 {
+		// Sort chunks by chunk index
+		sort.Slice(accumulator.TranscriptionStreamChunks, func(i, j int) bool {
+			return accumulator.TranscriptionStreamChunks[i].ChunkIndex < accumulator.TranscriptionStreamChunks[j].ChunkIndex
+		})
+		for _, chunk := range accumulator.TranscriptionStreamChunks {
+			if chunk.RawResponse != nil {
+				if data.RawResponse == nil {
+					data.RawResponse = bifrost.Ptr(*chunk.RawResponse + "\n\n")
+				} else {
+					*data.RawResponse += *chunk.RawResponse + "\n\n"
+				}
+			}
+		}
+	}
 	return data, nil
 }
 
@@ -123,18 +139,22 @@ func (a *Accumulator) processTranscriptionStreamingResponse(ctx *schemas.Bifrost
 	if bifrostErr != nil {
 		chunk.FinishReason = bifrost.Ptr("error")
 	} else if result != nil && result.TranscriptionStreamResponse != nil {
+		// Set delta for all chunks (not just final chunks with usage)
+		// We create a deep copy of the delta to avoid pointing to stack memory
+		newDelta := &schemas.BifrostTranscriptionStreamResponse{
+			Type:  result.TranscriptionStreamResponse.Type,
+			Delta: result.TranscriptionStreamResponse.Delta,
+		}
+		chunk.Delta = newDelta
+
+		// Set token usage if available (typically only in final chunk)
 		if result.TranscriptionStreamResponse.Usage != nil {
 			chunk.TokenUsage = result.TranscriptionStreamResponse.Usage
-
-			// For Transcription, entire delta is sent in the final chunk which also has usage information
-			// We create a deep copy of the delta to avoid pointing to stack memory
-			newDelta := &schemas.BifrostTranscriptionStreamResponse{
-				Type:  result.TranscriptionStreamResponse.Type,
-				Delta: result.TranscriptionStreamResponse.Delta,
-			}
-			chunk.Delta = newDelta
 		}
 		chunk.ChunkIndex = result.TranscriptionStreamResponse.ExtraFields.ChunkIndex
+		if result.TranscriptionStreamResponse.ExtraFields.RawResponse != nil {
+			chunk.RawResponse = bifrost.Ptr(fmt.Sprintf("%v", result.TranscriptionStreamResponse.ExtraFields.RawResponse))
+		}
 		if isFinalChunk {
 			if a.pricingManager != nil {
 				cost := a.pricingManager.CalculateCostWithCacheDebug(result)
diff --git a/framework/streaming/types.go b/framework/streaming/types.go
@@ -43,6 +43,7 @@ type AccumulatedData struct {
 	AudioOutput         *schemas.BifrostSpeechResponse
 	TranscriptionOutput *schemas.BifrostTranscriptionResponse
 	FinishReason        *string
+	RawResponse         *string
 }
 
 // AudioStreamChunk represents a single streaming chunk
@@ -55,6 +56,7 @@ type AudioStreamChunk struct {
 	Cost               *float64                             // Cost in dollars from pricing plugin
 	ErrorDetails       *schemas.BifrostError                // Error if any
 	ChunkIndex         int                                  // Index of the chunk in the stream
+	RawResponse        *string
 }
 
 // TranscriptionStreamChunk represents a single transcription streaming chunk
@@ -67,6 +69,7 @@ type TranscriptionStreamChunk struct {
 	Cost               *float64                                    // Cost in dollars from pricing plugin
 	ErrorDetails       *schemas.BifrostError                       // Error if any
 	ChunkIndex         int                                         // Index of the chunk in the stream
+	RawResponse        *string
 }
 
 // ChatStreamChunk represents a single streaming chunk
@@ -79,6 +82,7 @@ type ChatStreamChunk struct {
 	Cost               *float64                               // Cost in dollars from pricing plugin
 	ErrorDetails       *schemas.BifrostError                  // Error if any
 	ChunkIndex         int                                    // Index of the chunk in the stream
+	RawResponse        *string                                // Raw response if available
 }
 
 // ResponsesStreamChunk represents a single responses streaming chunk
@@ -91,6 +95,7 @@ type ResponsesStreamChunk struct {
 	Cost               *float64                                // Cost in dollars from pricing plugin
 	ErrorDetails       *schemas.BifrostError                   // Error if any
 	ChunkIndex         int                                     // Index of the chunk in the stream
+	RawResponse        *string
 }
 
 // StreamAccumulator manages accumulation of streaming chunks
diff --git a/plugins/logging/operations.go b/plugins/logging/operations.go
@@ -293,6 +293,10 @@ func (p *LoggerPlugin) updateStreamingLogEntry(
 				updates["responses_output"] = tempEntry.ResponsesOutput
 			}
 		}
+		// Handle raw response from stream updates
+		if streamResponse.Data.RawResponse != nil {
+			updates["raw_response"] = *streamResponse.Data.RawResponse
+		}
 	}
 	// Only perform update if there's something to update
 	if len(updates) > 0 {
diff --git a/transports/changelog.md b/transports/changelog.md
@@ -1,4 +1,5 @@
 fix: vertex and bedrock usage aggregation improvements for streaming
 fix: choice index fixed to 0 for anthropic and bedrock streaming
 feat: model field added to responses api response
-feat: check allowed models and deployments of key for list models
+feat: check allowed models and deployments of key for list models
+feat: support for raw response accumulation for streaming
diff --git a/ui/app/workspace/logs/views/logDetailsSheet.tsx b/ui/app/workspace/logs/views/logDetailsSheet.tsx
diff --git a/ui/app/workspace/logs/views/speechView.tsx b/ui/app/workspace/logs/views/speechView.tsx
diff --git a/ui/app/workspace/logs/views/transcriptionView.tsx b/ui/app/workspace/logs/views/transcriptionView.tsx
diff --git a/ui/lib/types/logs.ts b/ui/lib/types/logs.ts

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+feat: support raw response accumulation in stream accumulator`
Original file line number	Diff line number	Diff line change
`@@ -293,6 +293,10 @@ func (p *LoggerPlugin) updateStreamingLogEntry(`
`293`	`293`	`updates["responses_output"] = tempEntry.ResponsesOutput`
`294`	`294`	`}`
`295`	`295`	`}`
	`296`	`+ // Handle raw response from stream updates`
	`297`	`+ if streamResponse.Data.RawResponse != nil {`
	`298`	`+ updates["raw_response"] = *streamResponse.Data.RawResponse`
	`299`	`+ }`
`296`	`300`	`}`
`297`	`301`	`// Only perform update if there's something to update`
`298`	`302`	`if len(updates) > 0 {`