diff --git a/.gitignore b/.gitignore index 5220459..ec10849 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ __pycache__ tmp* .env +.envrc .venv .request_cache_*.pkl diff --git a/config/gesserit-config.json b/config/gesserit-config.json index c25a18a..15b5ded 100644 --- a/config/gesserit-config.json +++ b/config/gesserit-config.json @@ -3,6 +3,7 @@ "llvm": "", "cuda": "", "kokkos": "", + "ninja": "", "sm": "86", "exec_check": "ncu", "exec_check_fail_text": "==WARNING== No kernels were profiled." diff --git a/config/perlmutter-config.json b/config/perlmutter-config.json index d2d6dc5..e3bf83f 100644 --- a/config/perlmutter-config.json +++ b/config/perlmutter-config.json @@ -3,6 +3,7 @@ "llvm": "module load PrgEnv-llvm", "cuda": "module load cudatoolkit", "kokkos": "module load kokkos-gpu", + "ninja": "", "sm": "80", "exec_check": "ncu", "exec_check_fail_text": "==WARNING== No kernels were profiled." diff --git a/config/zaratan-config.json b/config/zaratan-config.json index cd4affe..1d3900a 100644 --- a/config/zaratan-config.json +++ b/config/zaratan-config.json @@ -3,6 +3,7 @@ "llvm": "spack load llvm && export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/scratch/zt1/project/bhatele-lab/user/jhdavis/spack-install/linux-rhel8-zen2/gcc-11.3.0/llvm-19.1.7-owz26zzxphj6x4xfgxsyxgfhvktvs4kd/lib/x86_64-unknown-linux-gnu/", "cuda": "module load cuda/gcc/11.3.0/zen2/12.3.0", "kokkos": "spack load kokkos", + "ninja": "spack load ninja", "sm": "80", "exec_check": "~/llms4hpc/code-translation/src/drivers/exec-check.sh", "exec_check_fail_text": "No GPU kernels launched!!" diff --git a/src/translate/swe_agent/swe_agent_translator.py b/src/translate/swe_agent/swe_agent_translator.py index 9cb1839..0acb49a 100644 --- a/src/translate/swe_agent/swe_agent_translator.py +++ b/src/translate/swe_agent/swe_agent_translator.py @@ -5,7 +5,10 @@ import shutil import subprocess import json -from typing import List, Optional, Dict, Any +import time +import atexit +from pathlib import Path +from typing import List, Optional, Dict, Any, Union # local imports from translator import Translator @@ -16,10 +19,13 @@ class SWEAgentTranslator(Translator): # Constants TEMP_REPO_PATH = "/tmp/temp_sweagent_repo" + CONTAINER_REPO_PATH = "/temp_sweagent_repo" TRANSLATION_TASK_FILENAME = "translation_task.md" TRAJECTORIES_DIR = "trajectories" PATCH_FILENAME = "temp.patch" EXPERIMENT_METADATA_FILENAME = "experiment_metadata.json" + SERVE_CHECK_COOLDOWN = 10 + _MAX_SERVE_CHECK_ATTEMPTS = 100 # File extensions to remove from output REMOVE_EXTENSIONS = (".cu", ".cuh") @@ -32,6 +38,10 @@ class SWEAgentTranslator(Translator): # Instance variables _swe_agent_model_name: str _swe_agent_per_instance_cost_limit: float + _swe_agent_config: Optional[List[str]] + _swe_agent_parser: Optional[str] + _swe_agent_max_input_token: Optional[int] + _temp_repo_path: str _translation_task_path: str _output_path: str @@ -47,7 +57,12 @@ def __init__( dry: bool = False, hide_progress: bool = False, swe_agent_model_name: Optional[str] = None, - swe_agent_per_instance_cost_limit: float = 0.06 + swe_agent_per_instance_cost_limit: float = 0.06, + swe_agent_config: Optional[Union[str, List[str]]] = None, + swe_agent_parser: Optional[str] = None, + swe_agent_max_input_token: Optional[int] = None, + vllm_environment: Optional[str] = None, + vllm_yaml_config: Optional[str] = None, ) -> None: super().__init__( input_repo, @@ -62,31 +77,101 @@ def __init__( self._swe_agent_model_name = swe_agent_model_name self._swe_agent_per_instance_cost_limit = swe_agent_per_instance_cost_limit + self._swe_agent_parser = swe_agent_parser + self._swe_agent_max_input_token = swe_agent_max_input_token + + # Handle a single-config file or multi-config files + if isinstance(swe_agent_config, str): + self._swe_agent_config = [swe_agent_config] + else: + self._swe_agent_config = swe_agent_config + self._temp_repo_path = self.TEMP_REPO_PATH self._translation_task_path = os.path.join( self._input_repo.path, self.TRANSLATION_TASK_FILENAME ) self._output_path = os.path.join(self._output_paths[0], "repo") + self._vllm_environment = vllm_environment + self._vllm_yaml_config = vllm_yaml_config + + if self._is_ollama_model(self._swe_agent_model_name): + if self._swe_agent_parser is None: + self._swe_agent_parser = "thought_action" + if self._swe_agent_max_input_token is None: + self._swe_agent_max_input_token = 4096 + self._launch_ollama_server() + + else: + if self._vllm_environment: + self._launch_vllm_server(self._vllm_environment, self._vllm_yaml_config) + else: + print("Warning: vLLM environment not provided; assuming external vLLM server is running.") + + @staticmethod + def _is_ollama_model(name: str) -> bool: + name = (name or "").lower() + return name.startswith("ollama/") + + + def _launch_ollama_server(self) -> None: + """Launch an Ollama server in the background.""" + # Check that ollama is installed + if not shutil.which("ollama"): + raise ValueError("Ollama is not in the path. Please install Ollama and add it to the path.") + # Early exit if ollama is already running + if subprocess.run(["ollama", "list"], capture_output=True, text=True).returncode == 0: + return + ollama_command = ["ollama", "serve"] + subprocess.Popen(ollama_command, + stdout=subprocess.DEVNULL, + stderr=subprocess.STDOUT, + stdin=subprocess.DEVNULL, + start_new_session=True) + # Check that the server is running + checking = True + while checking: + status = subprocess.run(["ollama", "list"], capture_output=True, text=True) + if status.returncode == 0: + checking = False + else: + print(f"Ollama server not ready, checking again after {self.SERVE_CHECK_COOLDOWN} seconds...") + time.sleep(self.SERVE_CHECK_COOLDOWN) + print(f"Ollama server ready.") + return + @staticmethod def add_args(parser: Any) -> None: """Add command line arguments for SWE-agent configuration.""" parser.add_argument("--swe-agent-model-name", type=str, - help="Name of the agent model to use (e.g. 'gpt-4o').") + help="Name of the agent model to use (e.g. 'gpt-4o', 'ollama/llama3.2:latest').") parser.add_argument("--swe-agent-per-instance-cost-limit", type=float, - help="Per-instance cost limit for the agent model.") - + help="Per-instance cost limit for the agent model; set to 0 for local models.") + parser.add_argument("--swe-agent-config", action="append", + help="May be specified multiple times; default config file is used if none is provided.") + parser.add_argument("--swe-agent-parser", type=str, choices=["thought_action", "function_calling"], + help="Parsing strategy. Use 'thought_action' for local/Ollama models.") + parser.add_argument("--swe-agent-max-input-token", type=int, + help="Override max input tokens to avoid local-model warnings.") + parser.add_argument("--vllm-environment", type=str, + help="Path to the Python environment that has vLLM installed (e.g. ~/pssg-venv).") + parser.add_argument("--vllm-yaml-config", type=str, + help="Path to vLLM YAML config file to pass via --config.") @staticmethod def parse_args(args: Any) -> Dict[str, Any]: """Parse command line arguments for SWE-agent configuration.""" return { "swe_agent_model_name": args.swe_agent_model_name, - "swe_agent_per_instance_cost_limit": args.swe_agent_per_instance_cost_limit + "swe_agent_per_instance_cost_limit": args.swe_agent_per_instance_cost_limit, + "swe_agent_config": args.swe_agent_config, + "swe_agent_parser": args.swe_agent_parser, + "swe_agent_max_input_token": args.swe_agent_max_input_token, + "vllm_environment": args.vllm_environment, + "vllm_yaml_config": args.vllm_yaml_config, } - def translate(self) -> None: """Execute the complete translation process using SWE-agent. @@ -108,6 +193,7 @@ def _execute_translation_workflow(self) -> None: self.initialize_temp_repo() if self.run_swe_agent(): + self._fix_makefile_tabs_and_duplicates() print("Saving translated output...") self.save_output(self._output_path) self.remove_unnecessary_output_files() @@ -138,12 +224,13 @@ def _create_translation_task_content(self) -> str: f"You are a helpful coding assistant. You are helping a software developer translate a " f"codebase from the {self._src_model} execution model to the {self._dst_model} execution " f"model.\n\n" - f"The codebase is called {data['app']}. Its path is {data['path']}. Given this code " + f"The codebase is called {data['app']}. Its path is {self.CONTAINER_REPO_PATH}. Given this code " f"repository, translate the {data['app']} codebase's {self._src_model}-specific files to " f"the {self._dst_model} execution model.\n\n" f"The new files should be in {data['filename_desc']} and all old {self._src_model} files " - f"must be deleted. A new {data['build_filename']} should be made to compile accordingly " - f"with the new files.\n\n" + f"must be deleted. You may use standard command-line tools (e.g., the `rm` command) to " + f"remove obsolete {self._src_model}-specific files. A new {data['build_filename']} should " + f"be made to compile accordingly with the new files.\n\n" f"Ensure that the user can compile this code using, for example, `{data['ex_build_cmd']}` " f"to build the code for {data['ex_build_desc']}. Ensure also that the command line " f"interface after translation still works as expected, so that, for example, " @@ -175,7 +262,6 @@ def _initialize_git_repo(self) -> None: subprocess.run(self.GIT_ADD_ALL, cwd=self._temp_repo_path, check=True) subprocess.run(self.GIT_COMMIT_INITIAL, cwd=self._temp_repo_path, check=True) - def run_swe_agent(self) -> bool: """Run the SWE-agent command and apply the resulting patch.""" command = self._build_swe_agent_command() @@ -197,15 +283,26 @@ def run_swe_agent(self) -> bool: def _build_swe_agent_command(self) -> List[str]: """Build the SWE-agent command with all required parameters.""" - return [ + cmd = [ "sweagent", "run", - f"--agent.model.name={self._swe_agent_model_name}", - f"--agent.model.per_instance_cost_limit={self._swe_agent_per_instance_cost_limit}", f"--env.repo.path={self._temp_repo_path}", - "--env.deployment.image=python", f"--problem_statement.path={self._translation_task_path}", ] + if self._swe_agent_model_name: + cmd.append(f"--agent.model.name={self._swe_agent_model_name}") + if self._swe_agent_per_instance_cost_limit: + cmd.append(f"--agent.model.per_instance_cost_limit={self._swe_agent_per_instance_cost_limit}") + if self._swe_agent_parser: + cmd.append(f"--agent.tools.parse_function.type={self._swe_agent_parser}") + if self._swe_agent_max_input_token: + cmd.append(f"--agent.model.max_input_tokens={self._swe_agent_max_input_token}") + if self._swe_agent_config: + for cfg in self._swe_agent_config: + cmd.extend(["--config", cfg]) + + return cmd + def _apply_swe_agent_patch(self) -> bool: """Find and apply the patch file generated by SWE-agent.""" print("Applying patch...") @@ -283,6 +380,45 @@ def _remove_files_by_extension(self, directory: str, extensions: tuple) -> None: file_path = os.path.join(root, file) os.remove(file_path) + def _fix_makefile_tabs_and_duplicates(self) -> None: + makefile = Path(self._temp_repo_path) / "Makefile" + if not makefile.exists(): + return + + lines = makefile.read_text(encoding="utf-8", errors="replace").splitlines(True) + + # 1) Remove exact duplicate lines (preserve order) + print("Removing duplicate lines in the Makefile...") + seen = set() + duplicates = [] + for line in lines: + if line not in seen: + seen.add(line) + duplicates.append(line) + lines = duplicates + + # 2) Enforce Makefile tab rules + print("Fixing Makefile tabs...") + i = 0 + while i < len(lines) - 1: + curr = lines[i].lstrip() + nxt = lines[i + 1] + + is_rule = ":" in curr + is_conditional = curr.startswith(( + "ifeq", + "ifneq", + "ifdef", + "ifndef", + "else" + )) + + if is_rule or is_conditional: + if nxt.strip() and not nxt.startswith("\t") and not nxt.lstrip().startswith("#"): + lines[i + 1] = "\t" + nxt + i += 1 + + makefile.write_text("".join(lines), encoding="utf-8") def write_experiment_metadata(self) -> None: """Write experiment metadata to a JSON file in the output directory.""" @@ -327,3 +463,44 @@ def cleanup_temp_repo(self) -> None: except OSError as e: print(f"Error cleaning up temporary repository: {e}") # Don't raise here as this is cleanup code + + def _launch_vllm_server(self, environment_path: str, yaml_config: Optional[str] = None): + """Launch a vLLM server in the background using the Python environment directory + provided. + """ + # Early exit if vLLM server is already running + if subprocess.run(["curl", "http://127.0.0.1:8000/health"], capture_output=True, + text=True, check=False).returncode == 0: + return None + py_executable = os.path.join(environment_path, "bin", "python") + vllm_command = [ + py_executable, "-m", "vllm.entrypoints.openai.api_server", + "--tool-call-parser", "openai", + "--enable-auto-tool-choice", + "--reasoning-parser", "openai_gptoss", + "--host", "127.0.0.1", + "--port", "8000", + ] + vllm_api_key = os.getenv("VLLM_API_KEY") + if self._swe_agent_model_name is not None: + vllm_command.extend(["--model", self._swe_agent_model_name]) + if vllm_api_key is not None: + vllm_command.extend(["--api-key", vllm_api_key]) + if yaml_config: + vllm_command.extend(["--config", yaml_config]) + print("Full vLLM subprocess command:", " ".join(vllm_command)) + vllm_server = subprocess.Popen(vllm_command) + # Ping the server until it is ready at the health endpoint + checking, num_attempts = True, 0 + while checking and num_attempts < self._MAX_SERVE_CHECK_ATTEMPTS: + status = subprocess.run(["curl", "http://127.0.0.1:8000/health"], capture_output=True, + text=True, check=False) + if status.returncode == 0: + checking = False + else: + print(f"VLLM server not ready, checking again after {self.SERVE_CHECK_COOLDOWN} seconds...") + time.sleep(self.SERVE_CHECK_COOLDOWN) + num_attempts += 1 + atexit.register(vllm_server.terminate) + print("VLLM server ready.") + return vllm_server diff --git a/targets/XSBench/kokkos/target.json b/targets/XSBench/kokkos/target.json index bbade92..446358c 100644 --- a/targets/XSBench/kokkos/target.json +++ b/targets/XSBench/kokkos/target.json @@ -2,7 +2,7 @@ "app": "xsbench", "model": "kokkos", "path": "targets/XSBench/kokkos/repo", - "dependencies": ["gnu", "cuda", "kokkos"], + "dependencies": ["gnu", "cuda", "kokkos", "ninja"], "build_commands_debug": "cmake -DKOKKOS_BACKEND=CUDA -DCMAKE_CXX_COMPILER=g++ -GNinja -Bbuild . && cmake --build build/", "build_commands_perf": "cmake -DKOKKOS_BACKEND=CUDA -DCMAKE_CXX_COMPILER=g++ -GNinja -Bbuild . && cmake --build build/", "build_timeout": 120, diff --git a/targets/llm.c/kokkos/target.json b/targets/llm.c/kokkos/target.json index f92feb8..f6ef533 100644 --- a/targets/llm.c/kokkos/target.json +++ b/targets/llm.c/kokkos/target.json @@ -2,7 +2,7 @@ "app": "llm.c", "model": "kokkos", "path": "targets/llm.c/kokkos/repo", - "dependencies": ["gnu", "cuda", "kokkos"], + "dependencies": ["gnu", "cuda", "kokkos", "ninja"], "setup_commands": ["cp $SCRATCH/llmc_inputs/*.bin ."], "build_commands_debug": "cmake -DKOKKOS_BACKEND=CUDA -DCMAKE_CXX_COMPILER=g++ -GNinja -Bbuild . && cmake --build build/", "build_commands_perf": "cmake -DKOKKOS_BACKEND=CUDA -DCMAKE_CXX_COMPILER=g++ -GNinja -Bbuild . && cmake --build build/", diff --git a/targets/microXOR/cuda/repo/translation_task.md b/targets/microXOR/cuda/repo/translation_task.md deleted file mode 100644 index ac9db36..0000000 --- a/targets/microXOR/cuda/repo/translation_task.md +++ /dev/null @@ -1,7 +0,0 @@ -You are a helpful coding assistant. You are helping a software developer translate a codebase from the cuda execution model to the openmp-offload execution model. - -The codebase is called microxor. Its path is targets/microXOR/openmp-offload/repo. Given this code repository, translate the microxor codebase's cuda-specific files to the openmp-offload execution model. - -The new files should be in C++ and all old cuda files must be deleted. A new Makefile should be made to compile accordingly with the new files. - -Ensure that the user can compile this code using, for example, `make SM_VERSION=sm_80 CXX_COMPILER=clang++` to build the code for a system with an NVIDIA GPU with compute capability 80 compiled with clang++. Ensure also that the command line interface after translation still works as expected, so that, for example, `./microXOR.exe 1024 32` still works to run the code with a 1024 by 1024 input matrix and a kernel with 32 times 32 threads per block. \ No newline at end of file diff --git a/targets/microXOR/kokkos/target.json b/targets/microXOR/kokkos/target.json index bdd37b2..f4f072a 100644 --- a/targets/microXOR/kokkos/target.json +++ b/targets/microXOR/kokkos/target.json @@ -2,7 +2,7 @@ "app": "microxor", "model": "kokkos", "path": "targets/microXOR/kokkos/repo", - "dependencies": ["gnu", "cuda", "kokkos"], + "dependencies": ["gnu", "cuda", "kokkos", "ninja"], "build_commands_debug": "cmake -DKOKKOS_BACKEND=CUDA -DCMAKE_CXX_COMPILER=g++ -GNinja -Bbuild . && cmake --build build/", "build_commands_perf": "cmake -DKOKKOS_BACKEND=CUDA -DCMAKE_CXX_COMPILER=g++ -GNinja -Bbuild . && cmake --build build/", "build_timeout": 120, diff --git a/targets/microXORh/kokkos/target.json b/targets/microXORh/kokkos/target.json index bc2f65d..d4fd087 100644 --- a/targets/microXORh/kokkos/target.json +++ b/targets/microXORh/kokkos/target.json @@ -2,7 +2,7 @@ "app": "microxorh", "model": "kokkos", "path": "targets/microXORh/kokkos/repo", - "dependencies": ["gnu", "cuda", "kokkos"], + "dependencies": ["gnu", "cuda", "kokkos", "ninja"], "build_commands_debug": "cmake -DKOKKOS_BACKEND=CUDA -DCMAKE_CXX_COMPILER=g++ -GNinja -Bbuild . && cmake --build build/", "build_commands_perf": "cmake -DKOKKOS_BACKEND=CUDA -DCMAKE_CXX_COMPILER=g++ -GNinja -Bbuild . && cmake --build build/", "build_timeout": 120, diff --git a/targets/nanoXOR/kokkos/target.json b/targets/nanoXOR/kokkos/target.json index ab42118..62decf1 100644 --- a/targets/nanoXOR/kokkos/target.json +++ b/targets/nanoXOR/kokkos/target.json @@ -2,7 +2,7 @@ "app": "nanoxor", "model": "kokkos", "path": "targets/nanoXOR/kokkos/repo", - "dependencies": ["gnu", "cuda", "kokkos"], + "dependencies": ["gnu", "cuda", "kokkos", "ninja"], "build_commands_debug": "cmake -DKOKKOS_BACKEND=CUDA -DCMAKE_CXX_COMPILER=g++ -GNinja -Bbuild . && cmake --build build/", "build_commands_perf": "cmake -DKOKKOS_BACKEND=CUDA -DCMAKE_CXX_COMPILER=g++ -GNinja -Bbuild . && cmake --build build/", "build_timeout": 120,