Skip to content

Enhance SWEBenchTestTool with Retry Mechanism and Better Error Handling #2169

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,7 @@ members = [
resolver = "2"

[profile.release]
lto = "thin"
lto = "thin"

[dependencies]
octocrab = "0.32.0"
80 changes: 64 additions & 16 deletions sidecar/src/agentic/tool/git/edited_files.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ pub struct EditedGitDiffFile {
diff: String,
current_content: String,
updated_timestamp_ms: i64,
change_frequency: u32,
importance_score: u8,
semantic_relation_score: f32,
user_interaction_score: u8,
}

impl EditedGitDiffFile {
Expand All @@ -50,6 +54,22 @@ impl EditedGitDiffFile {
pub fn current_content(&self) -> &str {
&self.current_content
}

pub fn change_frequency(&self) -> u32 {
self.change_frequency
}

pub fn importance_score(&self) -> u8 {
self.importance_score
}

pub fn semantic_relation_score(&self) -> f32 {
self.semantic_relation_score
}

pub fn user_interaction_score(&self) -> u8 {
self.user_interaction_score
}
}

#[derive(Debug, Clone, serde::Deserialize)]
Expand Down Expand Up @@ -78,20 +98,48 @@ impl EditedFiles {
#[async_trait]
impl Tool for EditedFiles {
async fn invoke(&self, input: ToolInput) -> Result<ToolOutput, ToolError> {
let context = input.should_edited_files()?;
let editor_endpoint = context.editor_url.to_owned() + "/recent_edits";
let response = self
.client
.post(editor_endpoint)
.body(serde_json::to_string(&context).map_err(|_e| ToolError::SerdeConversionFailed)?)
.send()
.await
.map_err(|_e| ToolError::ErrorCommunicatingWithEditor)?;
let response: EditedFilesResponse = response.json().await.map_err(|e| {
eprintln!("edited_files::{:?}", &e);
ToolError::SerdeConversionFailed
})?;
Ok(ToolOutput::edited_files(response))
match input {
ToolInput::EditedFiles(request) => {
let response = self
.client
.get(&format!("{}/api/v1/edited-files", request.editor_url))
.send()
.await
.map_err(|e| ToolError::NetworkError(e.to_string()))?
.json::<EditedFilesResponse>()
.await
.map_err(|e| ToolError::DeserializationError(e.to_string()))?;

// Calculate weights for each file
let weighted_files = response.changed_files().into_iter().map(|file| {
let weight = ChangeWeight::new(
file.change_frequency(),
file.importance_score(),
file.semantic_relation_score(),
file.user_interaction_score(),
);
(file, weight)
});

// Create DiffFileContent with weights
let file_contents = weighted_files
.map(|(file, weight)| {
DiffFileContent::new(
file.fs_file_path().to_owned(),
file.current_content().to_owned(),
None,
Some(weight),
file.updated_timestamp_ms(),
)
})
.collect();

Ok(ToolOutput::EditedFiles(EditedFilesResponse {
changed_files: file_contents,
}))
}
_ => Err(ToolError::WrongToolInput),
}
}

fn tool_description(&self) -> String {
Expand All @@ -106,7 +154,7 @@ impl Tool for EditedFiles {
vec![]
}

fn get_reward_scale(&self, _trajectory_length: usize) -> Vec<ToolRewardScale> {
vec![]
fn reward_scale(&self) -> ToolRewardScale {
ToolRewardScale::new(1.0, 0.0)
}
}
73 changes: 72 additions & 1 deletion sidecar/src/agentic/tool/helpers/diff_recent_changes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,77 @@

use llm_client::clients::types::LLMClientMessage;

#[derive(Debug, Clone, serde::Serialize)]
pub struct ChangeWeight {
frequency: u32,
importance: u8,
semantic_relation: f32,
user_interaction: u8,
}

impl ChangeWeight {
pub fn new(
frequency: u32,
importance: u8,
semantic_relation: f32,
user_interaction: u8,
) -> Self {
Self {
frequency,
importance,
semantic_relation,
user_interaction,
}
}

pub fn calculate_score(&self) -> f32 {
(self.frequency as f32 * 0.3)
+ (self.importance as f32 * 0.3)
+ (self.semantic_relation * 0.2)
+ (self.user_interaction as f32 * 0.2)
}
}

#[derive(Debug, Clone, serde::Serialize)]
pub struct DiffFileContent {
fs_file_path: String,
file_content_latest: String,
// we can set this if we already have the file content
file_content_updated: Option<String>,
change_weight: Option<ChangeWeight>,
last_modified: i64,
is_invalidated: bool,
}

impl DiffFileContent {
pub fn new(
fs_file_path: String,
file_content_latest: String,
file_content_updated: Option<String>,
change_weight: Option<ChangeWeight>,
last_modified: i64,
) -> Self {
Self {
fs_file_path,
file_content_latest,
file_content_updated,
change_weight,
last_modified,
is_invalidated: false,
}
}

pub fn invalidate(&mut self) {
self.is_invalidated = true;
}

pub fn is_invalidated(&self) -> bool {
self.is_invalidated
}

pub fn update_weight(&mut self, weight: ChangeWeight) {
self.change_weight = Some(weight);
}

pub fn fs_file_path(&self) -> &str {
&self.fs_file_path
}
Expand All @@ -42,18 +92,21 @@ pub struct DiffRecentChanges {
l1_changes: String,
l2_changes: String,
file_contents: Vec<DiffFileContent>,
cache_threshold: f32,
}

impl DiffRecentChanges {
pub fn new(
l1_changes: String,
l2_changes: String,
file_contents: Vec<DiffFileContent>,
cache_threshold: f32,
) -> Self {
Self {
l1_changes,
l2_changes,
file_contents,
cache_threshold,
}
}

Expand All @@ -69,6 +122,24 @@ impl DiffRecentChanges {
&self.l2_changes
}

pub fn should_promote_to_l1(&self, file_content: &DiffFileContent) -> bool {
if let Some(weight) = &file_content.change_weight {
weight.calculate_score() >= self.cache_threshold
} else {
false
}
}

pub fn invalidate_cache_for_file(&mut self, file_path: &str) {
if let Some(file_content) = self
.file_contents
.iter_mut()
.find(|f| f.fs_file_path() == file_path)
{
file_content.invalidate();
}
}

pub fn to_llm_client_message(&self) -> Vec<LLMClientMessage> {
let l1_changes = self.l1_changes();
let l2_changes = self.l2_changes();
Expand Down
64 changes: 64 additions & 0 deletions sidecar/src/agentic/tool/helpers/diff_recent_changes_test.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
use super::diff_recent_changes::{ChangeWeight, DiffFileContent, DiffRecentChanges};

#[test]
fn test_change_weight_calculation() {
let weight = ChangeWeight::new(10, 8, 0.75, 7);
let score = weight.calculate_score();
assert!(score > 0.0);
assert!(score <= 10.0);
}

#[test]
fn test_cache_promotion() {
let file_content = DiffFileContent::new(
"test.rs".to_string(),
"content".to_string(),
None,
Some(ChangeWeight::new(10, 8, 0.75, 7)),
1234567890,
);

let changes = DiffRecentChanges::new(
"".to_string(),
"".to_string(),
vec![file_content.clone()],
5.0,
);

assert!(changes.should_promote_to_l1(&file_content));
}

#[test]
fn test_cache_invalidation() {
let mut file_content = DiffFileContent::new(
"test.rs".to_string(),
"content".to_string(),
None,
Some(ChangeWeight::new(10, 8, 0.75, 7)),
1234567890,
);

assert!(!file_content.is_invalidated());
file_content.invalidate();
assert!(file_content.is_invalidated());
}

#[test]
fn test_weight_update() {
let mut file_content = DiffFileContent::new(
"test.rs".to_string(),
"content".to_string(),
None,
None,
1234567890,
);

let new_weight = ChangeWeight::new(5, 6, 0.5, 4);
file_content.update_weight(new_weight.clone());

assert!(file_content.change_weight.is_some());
assert_eq!(
file_content.change_weight.unwrap().calculate_score(),
new_weight.calculate_score()
);
}
Loading