Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions TelegramSearchBot.Common/Model/AI/LlmContinuationSnapshot.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ public class SerializedChatMessage {
/// The text content of the message
/// </summary>
public string Content { get; set; } = null!;

/// <summary>
/// The reasoning content for thinking mode models (e.g., Kimi-thinking-preview, QwQ).
/// This field must be passed back to the API in subsequent requests to avoid HTTP 400 errors.
/// </summary>
public string? ReasoningContent { get; set; }
}

/// <summary>
Expand Down
96 changes: 93 additions & 3 deletions TelegramSearchBot.LLM/Service/AI/LLM/OpenAIService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -967,6 +967,7 @@
if (cancellationToken.IsCancellationRequested) throw new TaskCanceledException();

var contentBuilder = new StringBuilder();
var reasoningContentBuilder = new StringBuilder();
var toolCallAccumulators = new Dictionary<int, ToolCallAccumulator>();
ChatFinishReason? finishReason = null;

Expand All @@ -985,6 +986,12 @@
}
}

// Accumulate reasoning content for thinking mode models (e.g., Kimi-thinking-preview)
var reasoningUpdate = GetStreamingReasoningContent(update);
if (!string.IsNullOrEmpty(reasoningUpdate)) {
reasoningContentBuilder.Append(reasoningUpdate);
}

// Accumulate tool call updates
foreach (var toolCallUpdate in update.ToolCallUpdates ?? Enumerable.Empty<StreamingChatToolCallUpdate>()) {
int index = toolCallUpdate.Index;
Expand All @@ -1006,6 +1013,7 @@
}

string responseText = contentBuilder.ToString().Trim();
string reasoningContent = reasoningContentBuilder.ToString().Trim();
Comment on lines 1015 to +1016
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🌐 Web query:

Does Kimi-thinking-preview or QwQ require the reasoning_content echoed back in subsequent chat.completions requests to match exactly (including leading/trailing whitespace) the streamed value, or is whitespace-trimmed content accepted?

💡 Result:

Kimi-thinking-preview (likely referring to Moonshot AI's Kimi K2 Thinking models like kimi-k2-thinking or kimi-k2.6 with thinking enabled) and QwQ (likely QwQ-32B-Preview, a reasoning model comparable to Kimi K2 Thinking) require the reasoning_content echoed back in subsequent chat.completions requests to match exactly the streamed value, including leading/trailing whitespace. Official Moonshot AI/Kimi docs explicitly state to "keep the reasoning_content from every historical assistant message in messages as-is. The simplest way is to append the assistant message returned from the previous API call directly back into messages." No sources indicate whitespace trimming is accepted; "as-is" implies exact preservation is necessary for proper preserved thinking and chain-of-thought continuity in multi-turn conversations. This applies to both official API and compatible implementations like vLLM, where field handling bugs highlight the need for precise input matching.

Citations:


Remove Trim() on reasoningContent to preserve provider compatibility.

Kimi-thinking-preview and QwQ require the reasoning_content field to be echoed back in subsequent chat.completions requests with exact preservation, including leading/trailing whitespace. Official Moonshot AI documentation explicitly states to "keep the reasoning_content from every historical assistant message in messages as-is." The current Trim() operation violates this requirement and may cause validation or continuity issues in multi-turn conversations.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@TelegramSearchBot.LLM/Service/AI/LLM/OpenAIService.cs` around lines 1015 -
1016, The code trims reasoningContent which breaks provider compatibility;
change the assignment that uses reasoningContentBuilder.ToString().Trim() so it
uses the raw string (reasoningContentBuilder.ToString()) without calling Trim(),
leaving responseText (responseText = contentBuilder.ToString().Trim())
unchanged; ensure the variable reasoningContent (and any downstream use in
OpenAIService.cs / methods that send chat.completions) preserves
leading/trailing whitespace exactly as produced by reasoningContentBuilder.


// Check if this is a tool call response
if (finishReason == ChatFinishReason.ToolCalls && toolCallAccumulators.Any()) {
Expand All @@ -1025,6 +1033,10 @@
if (!string.IsNullOrWhiteSpace(responseText)) {
assistantMessage = new AssistantChatMessage(chatToolCalls) { Content = { ChatMessageContentPart.CreateTextPart(responseText) } };
}
// Set reasoning content for thinking mode models
if (!string.IsNullOrEmpty(reasoningContent)) {
SetAssistantReasoningContent(assistantMessage, reasoningContent);
}
providerHistory.Add(assistantMessage);

var toolIndicators = new StringBuilder();
Expand Down Expand Up @@ -1066,7 +1078,11 @@
} else {
// Not a tool call - regular text response
if (!string.IsNullOrWhiteSpace(responseText)) {
providerHistory.Add(new AssistantChatMessage(responseText));
var assistantMsg = new AssistantChatMessage(responseText);
if (!string.IsNullOrEmpty(reasoningContent)) {
SetAssistantReasoningContent(assistantMsg, reasoningContent);
}
providerHistory.Add(assistantMsg);
}
yield break;
}
Expand Down Expand Up @@ -1328,13 +1344,17 @@
foreach (var msg in history) {
string role;
string content = "";
string? reasoningContent = null;

if (msg is SystemChatMessage systemMsg) {
role = "system";
content = string.Join("", systemMsg.Content?.Select(p => p.Text) ?? Enumerable.Empty<string>());
} else if (msg is AssistantChatMessage assistantMsg) {
role = "assistant";
content = string.Join("", assistantMsg.Content?.Select(p => p.Text) ?? Enumerable.Empty<string>());
// Try to get reasoning content from the assistant message
// OpenAI SDK stores reasoning content in a separate property
reasoningContent = GetAssistantReasoningContent(assistantMsg);
} else if (msg is UserChatMessage userMsg) {
role = "user";
content = string.Join("", userMsg.Content?.Select(p => p.Text) ?? Enumerable.Empty<string>());
Expand All @@ -1343,11 +1363,61 @@
content = msg.ToString();
}

result.Add(new SerializedChatMessage { Role = role, Content = content });
result.Add(new SerializedChatMessage { Role = role, Content = content, ReasoningContent = reasoningContent });
}
return result;
}

/// <summary>
/// Extract reasoning_content from AssistantChatMessage if available.
/// For thinking mode models, the reasoning process is returned separately.
/// </summary>
private static string? GetAssistantReasoningContent(AssistantChatMessage assistantMsg) {

Check warning on line 1375 in TelegramSearchBot.LLM/Service/AI/LLM/OpenAIService.cs

View workflow job for this annotation

GitHub Actions / build (ubuntu-latest)

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 1375 in TelegramSearchBot.LLM/Service/AI/LLM/OpenAIService.cs

View workflow job for this annotation

GitHub Actions / build (windows-latest)

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.
// Try to access reasoning content - OpenAI Chat SDK may store it in various ways
// The reasoning_content is typically available via reflection or specific properties
try {
// Check for Reasoning property via reflection
var reasoningProp = assistantMsg.GetType().GetProperty("Reasoning");
if (reasoningProp != null) {
var value = reasoningProp.GetValue(assistantMsg);
if (value is string reasoning && !string.IsNullOrEmpty(reasoning)) {
return reasoning;
}
}
} catch {
// Reflection failed, return null
}
return null;
Comment on lines +1387 to +1390
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Don't silently swallow reflection failures — at least log at Debug.

All three reflection helpers swallow every exception with empty catch { }. Combined with the concern above, that means if the SDK changes property names, or the property exists but isn't writable, or the cast fails, you'll get zero diagnostics — the feature will simply appear not to work in production. Capture the exception and log it at Debug/Trace level (and ideally only once via a static flag) so this is actually observable.

🛡️ Suggested change (illustrative, applied to SetAssistantReasoningContent)
-        private static void SetAssistantReasoningContent(AssistantChatMessage msg, string reasoningContent) {
-            try {
-                var prop = msg.GetType().GetProperty("Reasoning");
-                if (prop != null && prop.CanWrite) {
-                    prop.SetValue(msg, reasoningContent);
-                }
-            } catch {
-                // Reflection failed, ignore
-            }
-        }
+        private static int _reasoningReflectionWarned;
+        private static void SetAssistantReasoningContent(AssistantChatMessage msg, string reasoningContent) {
+            try {
+                var prop = msg.GetType().GetProperty("Reasoning");
+                if (prop != null && prop.CanWrite) {
+                    prop.SetValue(msg, reasoningContent);
+                } else if (System.Threading.Interlocked.Exchange(ref _reasoningReflectionWarned, 1) == 0) {
+                    System.Diagnostics.Debug.WriteLine(
+                        "AssistantChatMessage has no writable 'Reasoning' property; reasoning_content round-trip is a no-op.");
+                }
+            } catch (Exception ex) {
+                System.Diagnostics.Debug.WriteLine($"SetAssistantReasoningContent failed: {ex}");
+            }
+        }

(Same pattern applies to GetAssistantReasoningContent and GetStreamingReasoningContent. An injected ILogger would be even better, but these methods are static.)

Also applies to: 1415-1418, 1460-1462

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@TelegramSearchBot.LLM/Service/AI/LLM/OpenAIService.cs` around lines 1387 -
1390, The three static reflection helpers (SetAssistantReasoningContent,
GetAssistantReasoningContent, GetStreamingReasoningContent) currently swallow
all exceptions; change each empty catch to catch Exception ex and emit a
Debug/Trace-level log containing the exception message/stack (e.g.,
Debug.WriteLine or Trace.TraceInformation/TraceEvent) and make the logging occur
only once by guarding with a static bool flag per method so you don’t flood
logs; keep the methods static (no DI) and ensure the original behavior still
returns null/false after logging.

}

/// <summary>
/// Extract reasoning_content from streaming update for thinking mode models.
/// Uses reflection to access SDK internals.
/// </summary>
private static string? GetStreamingReasoningContent(StreamingChatCompletionUpdate update) {
try {
// Try ReasoningContentUpdate property (OpenAI SDK for thinking models)
var reasoningProp = update.GetType().GetProperty("ReasoningContentUpdate");
if (reasoningProp != null) {
var value = reasoningProp.GetValue(update);
if (value is string reasoning && !string.IsNullOrEmpty(reasoning)) {
return reasoning;
}
}
// Fallback: try Reasoning property
var fallbackProp = update.GetType().GetProperty("Reasoning");
if (fallbackProp != null) {
var value = fallbackProp.GetValue(update);
if (value is string fallback && !string.IsNullOrEmpty(fallback)) {
return fallback;
}
}
} catch {
// Reflection failed
}
return null;
}

/// <summary>
/// Deserialize portable format back to OpenAI ChatMessage list.
/// </summary>
Expand All @@ -1361,7 +1431,12 @@
result.Add(new SystemChatMessage(msg.Content ?? ""));
break;
case "assistant":
result.Add(new AssistantChatMessage(msg.Content ?? ""));
var assistantMsg = new AssistantChatMessage(msg.Content ?? "");
// Set reasoning content if available (for thinking mode models)
if (!string.IsNullOrEmpty(msg.ReasoningContent)) {
SetAssistantReasoningContent(assistantMsg, msg.ReasoningContent);
}
result.Add(assistantMsg);
break;
case "user":
default:
Expand All @@ -1372,6 +1447,21 @@
return result;
}

/// <summary>
/// Set reasoning_content on AssistantChatMessage for thinking mode models.
/// Uses reflection since OpenAI SDK doesn't have a public setter.
/// </summary>
private static void SetAssistantReasoningContent(AssistantChatMessage msg, string reasoningContent) {
try {
var prop = msg.GetType().GetProperty("Reasoning");
if (prop != null && prop.CanWrite) {
prop.SetValue(msg, reasoningContent);
}
} catch {
// Reflection failed, ignore
}
}

public async Task<float[]> GenerateEmbeddingsAsync(string text, string modelName, LLMChannel channel) {


Expand Down
Loading