@@ -660,6 +660,23 @@ func (a *Accumulator) processAccumulatedResponsesStreamingChunks(requestID strin
660660 data .FinishReason = lastChunk .FinishReason
661661 }
662662
663+ // Accumulate raw response
664+ if len (accumulator .ResponsesStreamChunks ) > 0 {
665+ // Sort chunks by chunk index
666+ sort .Slice (accumulator .ResponsesStreamChunks , func (i , j int ) bool {
667+ return accumulator .ResponsesStreamChunks [i ].ChunkIndex < accumulator .ResponsesStreamChunks [j ].ChunkIndex
668+ })
669+ for _ , chunk := range accumulator .ResponsesStreamChunks {
670+ if chunk .RawResponse != nil {
671+ if data .RawResponse == nil {
672+ data .RawResponse = bifrost .Ptr (* chunk .RawResponse + "\n \n " )
673+ } else {
674+ * data .RawResponse += * chunk .RawResponse + "\n \n "
675+ }
676+ }
677+ }
678+ }
679+
663680 return data , nil
664681}
665682
@@ -683,54 +700,94 @@ func (a *Accumulator) processResponsesStreamingResponse(ctx *schemas.BifrostCont
683700
684701 // For OpenAI-compatible providers, the last chunk already contains the whole accumulated response
685702 // so just return it as is
703+ // We maintain the accumulator only for raw response accumulation
686704 if provider == schemas .OpenAI || provider == schemas .OpenRouter || (provider == schemas .Azure && ! schemas .IsAnthropicModel (model )) {
687705 isFinalChunk := bifrost .IsFinalChunk (ctx )
706+ chunk := a .getResponsesStreamChunk ()
707+ chunk .Timestamp = time .Now ()
708+ chunk .ErrorDetails = bifrostErr
709+ if bifrostErr != nil {
710+ chunk .FinishReason = bifrost .Ptr ("error" )
711+ } else if result != nil && result .ResponsesStreamResponse != nil {
712+ if result .ResponsesStreamResponse .ExtraFields .RawResponse != nil {
713+ rawResponse , ok := result .ResponsesStreamResponse .ExtraFields .RawResponse .(string )
714+ if ok {
715+ chunk .RawResponse = bifrost .Ptr (rawResponse )
716+ }
717+ }
718+ }
719+ if addErr := a .addResponsesStreamChunk (requestID , chunk , isFinalChunk ); addErr != nil {
720+ return nil , fmt .Errorf ("failed to add responses stream chunk for request %s: %w" , requestID , addErr )
721+ }
688722 if isFinalChunk {
689- // For OpenAI, the final chunk contains the complete response
690- // Extract the complete response and return it
691- if result != nil && result .ResponsesStreamResponse != nil {
692- // Build the complete response from the final chunk
693- data := & AccumulatedData {
694- RequestID : requestID ,
695- Status : "success" ,
696- Stream : true ,
697- StartTimestamp : startTimestamp ,
698- EndTimestamp : endTimestamp ,
699- Latency : result .GetExtraFields ().Latency ,
700- ErrorDetails : bifrostErr ,
723+ shouldProcess := false
724+ // Get the accumulator to check if processing has already been triggered
725+ accumulator := a .getOrCreateStreamAccumulator (requestID )
726+ accumulator .mu .Lock ()
727+ shouldProcess = ! accumulator .IsComplete
728+ // Mark as complete when we're about to process
729+ if shouldProcess {
730+ accumulator .IsComplete = true
731+ }
732+ accumulator .mu .Unlock ()
733+
734+ if shouldProcess {
735+ accumulatedData , processErr := a .processAccumulatedResponsesStreamingChunks (requestID , bifrostErr , isFinalChunk )
736+ if processErr != nil {
737+ a .logger .Error ("failed to process accumulated responses chunks for request %s: %v" , requestID , processErr )
738+ return nil , processErr
701739 }
702740
703- if bifrostErr != nil {
704- data .Status = "error"
705- }
741+ // For OpenAI, the final chunk contains the complete response
742+ // Extract the complete response and return it
743+ if result != nil && result .ResponsesStreamResponse != nil {
744+ // Build the complete response from the final chunk
745+ data := & AccumulatedData {
746+ RequestID : requestID ,
747+ Status : "success" ,
748+ Stream : true ,
749+ StartTimestamp : startTimestamp ,
750+ EndTimestamp : endTimestamp ,
751+ Latency : result .GetExtraFields ().Latency ,
752+ ErrorDetails : bifrostErr ,
753+ RawResponse : accumulatedData .RawResponse ,
754+ }
706755
707- // Extract the complete response from the stream response
708- if result .ResponsesStreamResponse .Response != nil {
709- data .OutputMessages = result .ResponsesStreamResponse .Response .Output
710- if result .ResponsesStreamResponse .Response .Usage != nil {
711- // Convert ResponsesResponseUsage to schemas.LLMUsage
712- data .TokenUsage = & schemas.BifrostLLMUsage {
713- PromptTokens : result .ResponsesStreamResponse .Response .Usage .InputTokens ,
714- CompletionTokens : result .ResponsesStreamResponse .Response .Usage .OutputTokens ,
715- TotalTokens : result .ResponsesStreamResponse .Response .Usage .TotalTokens ,
756+ if bifrostErr != nil {
757+ data .Status = "error"
758+ }
759+
760+ // Extract the complete response from the stream response
761+ if result .ResponsesStreamResponse .Response != nil {
762+ data .OutputMessages = result .ResponsesStreamResponse .Response .Output
763+ if result .ResponsesStreamResponse .Response .Usage != nil {
764+ // Convert ResponsesResponseUsage to schemas.LLMUsage
765+ data .TokenUsage = & schemas.BifrostLLMUsage {
766+ PromptTokens : result .ResponsesStreamResponse .Response .Usage .InputTokens ,
767+ CompletionTokens : result .ResponsesStreamResponse .Response .Usage .OutputTokens ,
768+ TotalTokens : result .ResponsesStreamResponse .Response .Usage .TotalTokens ,
769+ }
716770 }
717771 }
718- }
719772
720- if a .pricingManager != nil {
721- cost := a .pricingManager .CalculateCostWithCacheDebug (result )
722- data .Cost = bifrost .Ptr (cost )
723- }
773+ if a .pricingManager != nil {
774+ cost := a .pricingManager .CalculateCostWithCacheDebug (result )
775+ data .Cost = bifrost .Ptr (cost )
776+ }
724777
725- return & ProcessedStreamResponse {
726- Type : StreamResponseTypeFinal ,
727- RequestID : requestID ,
728- StreamType : StreamTypeResponses ,
729- Provider : provider ,
730- Model : model ,
731- Data : data ,
732- }, nil
778+ return & ProcessedStreamResponse {
779+ Type : StreamResponseTypeFinal ,
780+ RequestID : requestID ,
781+ StreamType : StreamTypeResponses ,
782+ Provider : provider ,
783+ Model : model ,
784+ Data : data ,
785+ }, nil
786+ } else {
787+ return nil , nil
788+ }
733789 }
790+ return nil , nil
734791 }
735792
736793 // For non-final chunks from OpenAI, just pass through
@@ -753,6 +810,9 @@ func (a *Accumulator) processResponsesStreamingResponse(ctx *schemas.BifrostCont
753810 if bifrostErr != nil {
754811 chunk .FinishReason = bifrost .Ptr ("error" )
755812 } else if result != nil && result .ResponsesStreamResponse != nil {
813+ if result .ResponsesStreamResponse .ExtraFields .RawResponse != nil {
814+ chunk .RawResponse = bifrost .Ptr (fmt .Sprintf ("%v" , result .ResponsesStreamResponse .ExtraFields .RawResponse ))
815+ }
756816 // Store a deep copy of the stream response to prevent shared data mutation between plugins
757817 chunk .StreamResponse = deepCopyResponsesStreamResponse (result .ResponsesStreamResponse )
758818 // Extract token usage from stream response if available
0 commit comments