-
Notifications
You must be signed in to change notification settings - Fork 2
fix: pass reasoning_content back in thinking mode to avoid HTTP 400 #324
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -967,6 +967,7 @@ | |
| if (cancellationToken.IsCancellationRequested) throw new TaskCanceledException(); | ||
|
|
||
| var contentBuilder = new StringBuilder(); | ||
| var reasoningContentBuilder = new StringBuilder(); | ||
| var toolCallAccumulators = new Dictionary<int, ToolCallAccumulator>(); | ||
| ChatFinishReason? finishReason = null; | ||
|
|
||
|
|
@@ -985,6 +986,12 @@ | |
| } | ||
| } | ||
|
|
||
| // Accumulate reasoning content for thinking mode models (e.g., Kimi-thinking-preview) | ||
| var reasoningUpdate = GetStreamingReasoningContent(update); | ||
| if (!string.IsNullOrEmpty(reasoningUpdate)) { | ||
| reasoningContentBuilder.Append(reasoningUpdate); | ||
| } | ||
|
|
||
| // Accumulate tool call updates | ||
| foreach (var toolCallUpdate in update.ToolCallUpdates ?? Enumerable.Empty<StreamingChatToolCallUpdate>()) { | ||
| int index = toolCallUpdate.Index; | ||
|
|
@@ -1006,6 +1013,7 @@ | |
| } | ||
|
|
||
| string responseText = contentBuilder.ToString().Trim(); | ||
| string reasoningContent = reasoningContentBuilder.ToString().Trim(); | ||
|
|
||
| // Check if this is a tool call response | ||
| if (finishReason == ChatFinishReason.ToolCalls && toolCallAccumulators.Any()) { | ||
|
|
@@ -1025,6 +1033,10 @@ | |
| if (!string.IsNullOrWhiteSpace(responseText)) { | ||
| assistantMessage = new AssistantChatMessage(chatToolCalls) { Content = { ChatMessageContentPart.CreateTextPart(responseText) } }; | ||
| } | ||
| // Set reasoning content for thinking mode models | ||
| if (!string.IsNullOrEmpty(reasoningContent)) { | ||
| SetAssistantReasoningContent(assistantMessage, reasoningContent); | ||
| } | ||
| providerHistory.Add(assistantMessage); | ||
|
|
||
| var toolIndicators = new StringBuilder(); | ||
|
|
@@ -1066,7 +1078,11 @@ | |
| } else { | ||
| // Not a tool call - regular text response | ||
| if (!string.IsNullOrWhiteSpace(responseText)) { | ||
| providerHistory.Add(new AssistantChatMessage(responseText)); | ||
| var assistantMsg = new AssistantChatMessage(responseText); | ||
| if (!string.IsNullOrEmpty(reasoningContent)) { | ||
| SetAssistantReasoningContent(assistantMsg, reasoningContent); | ||
| } | ||
| providerHistory.Add(assistantMsg); | ||
| } | ||
| yield break; | ||
| } | ||
|
|
@@ -1328,13 +1344,17 @@ | |
| foreach (var msg in history) { | ||
| string role; | ||
| string content = ""; | ||
| string? reasoningContent = null; | ||
|
|
||
| if (msg is SystemChatMessage systemMsg) { | ||
| role = "system"; | ||
| content = string.Join("", systemMsg.Content?.Select(p => p.Text) ?? Enumerable.Empty<string>()); | ||
| } else if (msg is AssistantChatMessage assistantMsg) { | ||
| role = "assistant"; | ||
| content = string.Join("", assistantMsg.Content?.Select(p => p.Text) ?? Enumerable.Empty<string>()); | ||
| // Try to get reasoning content from the assistant message | ||
| // OpenAI SDK stores reasoning content in a separate property | ||
| reasoningContent = GetAssistantReasoningContent(assistantMsg); | ||
| } else if (msg is UserChatMessage userMsg) { | ||
| role = "user"; | ||
| content = string.Join("", userMsg.Content?.Select(p => p.Text) ?? Enumerable.Empty<string>()); | ||
|
|
@@ -1343,11 +1363,61 @@ | |
| content = msg.ToString(); | ||
| } | ||
|
|
||
| result.Add(new SerializedChatMessage { Role = role, Content = content }); | ||
| result.Add(new SerializedChatMessage { Role = role, Content = content, ReasoningContent = reasoningContent }); | ||
| } | ||
| return result; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Extract reasoning_content from AssistantChatMessage if available. | ||
| /// For thinking mode models, the reasoning process is returned separately. | ||
| /// </summary> | ||
| private static string? GetAssistantReasoningContent(AssistantChatMessage assistantMsg) { | ||
|
Check warning on line 1375 in TelegramSearchBot.LLM/Service/AI/LLM/OpenAIService.cs
|
||
| // Try to access reasoning content - OpenAI Chat SDK may store it in various ways | ||
| // The reasoning_content is typically available via reflection or specific properties | ||
| try { | ||
| // Check for Reasoning property via reflection | ||
| var reasoningProp = assistantMsg.GetType().GetProperty("Reasoning"); | ||
| if (reasoningProp != null) { | ||
| var value = reasoningProp.GetValue(assistantMsg); | ||
| if (value is string reasoning && !string.IsNullOrEmpty(reasoning)) { | ||
| return reasoning; | ||
| } | ||
| } | ||
| } catch { | ||
| // Reflection failed, return null | ||
| } | ||
| return null; | ||
|
Comment on lines
+1387
to
+1390
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't silently swallow reflection failures — at least log at Debug. All three reflection helpers swallow every exception with empty 🛡️ Suggested change (illustrative, applied to
|
||
| } | ||
|
|
||
| /// <summary> | ||
| /// Extract reasoning_content from streaming update for thinking mode models. | ||
| /// Uses reflection to access SDK internals. | ||
| /// </summary> | ||
| private static string? GetStreamingReasoningContent(StreamingChatCompletionUpdate update) { | ||
| try { | ||
| // Try ReasoningContentUpdate property (OpenAI SDK for thinking models) | ||
| var reasoningProp = update.GetType().GetProperty("ReasoningContentUpdate"); | ||
| if (reasoningProp != null) { | ||
| var value = reasoningProp.GetValue(update); | ||
| if (value is string reasoning && !string.IsNullOrEmpty(reasoning)) { | ||
| return reasoning; | ||
| } | ||
| } | ||
| // Fallback: try Reasoning property | ||
| var fallbackProp = update.GetType().GetProperty("Reasoning"); | ||
| if (fallbackProp != null) { | ||
| var value = fallbackProp.GetValue(update); | ||
| if (value is string fallback && !string.IsNullOrEmpty(fallback)) { | ||
| return fallback; | ||
| } | ||
| } | ||
| } catch { | ||
| // Reflection failed | ||
| } | ||
| return null; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Deserialize portable format back to OpenAI ChatMessage list. | ||
| /// </summary> | ||
|
|
@@ -1361,7 +1431,12 @@ | |
| result.Add(new SystemChatMessage(msg.Content ?? "")); | ||
| break; | ||
| case "assistant": | ||
| result.Add(new AssistantChatMessage(msg.Content ?? "")); | ||
| var assistantMsg = new AssistantChatMessage(msg.Content ?? ""); | ||
| // Set reasoning content if available (for thinking mode models) | ||
| if (!string.IsNullOrEmpty(msg.ReasoningContent)) { | ||
| SetAssistantReasoningContent(assistantMsg, msg.ReasoningContent); | ||
| } | ||
| result.Add(assistantMsg); | ||
| break; | ||
| case "user": | ||
| default: | ||
|
|
@@ -1372,6 +1447,21 @@ | |
| return result; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Set reasoning_content on AssistantChatMessage for thinking mode models. | ||
| /// Uses reflection since OpenAI SDK doesn't have a public setter. | ||
| /// </summary> | ||
| private static void SetAssistantReasoningContent(AssistantChatMessage msg, string reasoningContent) { | ||
| try { | ||
| var prop = msg.GetType().GetProperty("Reasoning"); | ||
| if (prop != null && prop.CanWrite) { | ||
| prop.SetValue(msg, reasoningContent); | ||
| } | ||
| } catch { | ||
| // Reflection failed, ignore | ||
| } | ||
| } | ||
|
|
||
| public async Task<float[]> GenerateEmbeddingsAsync(string text, string modelName, LLMChannel channel) { | ||
|
|
||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
🌐 Web query:
Does Kimi-thinking-preview or QwQ require the reasoning_content echoed back in subsequent chat.completions requests to match exactly (including leading/trailing whitespace) the streamed value, or is whitespace-trimmed content accepted?💡 Result:
Kimi-thinking-preview (likely referring to Moonshot AI's Kimi K2 Thinking models like kimi-k2-thinking or kimi-k2.6 with thinking enabled) and QwQ (likely QwQ-32B-Preview, a reasoning model comparable to Kimi K2 Thinking) require the reasoning_content echoed back in subsequent chat.completions requests to match exactly the streamed value, including leading/trailing whitespace. Official Moonshot AI/Kimi docs explicitly state to "keep the reasoning_content from every historical assistant message in messages as-is. The simplest way is to append the assistant message returned from the previous API call directly back into messages." No sources indicate whitespace trimming is accepted; "as-is" implies exact preservation is necessary for proper preserved thinking and chain-of-thought continuity in multi-turn conversations. This applies to both official API and compatible implementations like vLLM, where field handling bugs highlight the need for precise input matching.
Citations:
Remove Trim() on reasoningContent to preserve provider compatibility.
Kimi-thinking-preview and QwQ require the reasoning_content field to be echoed back in subsequent chat.completions requests with exact preservation, including leading/trailing whitespace. Official Moonshot AI documentation explicitly states to "keep the reasoning_content from every historical assistant message in messages as-is." The current Trim() operation violates this requirement and may cause validation or continuity issues in multi-turn conversations.
🤖 Prompt for AI Agents