diff --git a/.gitignore b/.gitignore
index 5220459..ec10849 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,6 +11,7 @@ __pycache__
 
 tmp*
 .env
+.envrc
 .venv
 
 .request_cache_*.pkl
diff --git a/config/gesserit-config.json b/config/gesserit-config.json
index c25a18a..15b5ded 100644
--- a/config/gesserit-config.json
+++ b/config/gesserit-config.json
@@ -3,6 +3,7 @@
     "llvm": "",
     "cuda": "",
     "kokkos": "",
+    "ninja": "",
     "sm": "86",
     "exec_check": "ncu",
     "exec_check_fail_text": "==WARNING== No kernels were profiled."
diff --git a/config/perlmutter-config.json b/config/perlmutter-config.json
index d2d6dc5..e3bf83f 100644
--- a/config/perlmutter-config.json
+++ b/config/perlmutter-config.json
@@ -3,6 +3,7 @@
     "llvm": "module load PrgEnv-llvm",
     "cuda": "module load cudatoolkit",
     "kokkos": "module load kokkos-gpu",
+    "ninja": "",
     "sm": "80",
     "exec_check": "ncu",
     "exec_check_fail_text": "==WARNING== No kernels were profiled."
diff --git a/config/zaratan-config.json b/config/zaratan-config.json
index cd4affe..1d3900a 100644
--- a/config/zaratan-config.json
+++ b/config/zaratan-config.json
@@ -3,6 +3,7 @@
     "llvm": "spack load llvm && export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/scratch/zt1/project/bhatele-lab/user/jhdavis/spack-install/linux-rhel8-zen2/gcc-11.3.0/llvm-19.1.7-owz26zzxphj6x4xfgxsyxgfhvktvs4kd/lib/x86_64-unknown-linux-gnu/",
     "cuda": "module load cuda/gcc/11.3.0/zen2/12.3.0",
     "kokkos": "spack load kokkos",
+    "ninja": "spack load ninja",
     "sm": "80",
     "exec_check": "~/llms4hpc/code-translation/src/drivers/exec-check.sh",
     "exec_check_fail_text": "No GPU kernels launched!!"
diff --git a/src/translate/swe_agent/swe_agent_translator.py b/src/translate/swe_agent/swe_agent_translator.py
index 9cb1839..0acb49a 100644
--- a/src/translate/swe_agent/swe_agent_translator.py
+++ b/src/translate/swe_agent/swe_agent_translator.py
@@ -5,7 +5,10 @@
 import shutil
 import subprocess
 import json
-from typing import List, Optional, Dict, Any
+import time
+import atexit
+from pathlib import Path
+from typing import List, Optional, Dict, Any, Union
 
 # local imports
 from translator import Translator
@@ -16,10 +19,13 @@ class SWEAgentTranslator(Translator):
 
     # Constants
     TEMP_REPO_PATH = "/tmp/temp_sweagent_repo"
+    CONTAINER_REPO_PATH = "/temp_sweagent_repo"
     TRANSLATION_TASK_FILENAME = "translation_task.md"
     TRAJECTORIES_DIR = "trajectories"
     PATCH_FILENAME = "temp.patch"
     EXPERIMENT_METADATA_FILENAME = "experiment_metadata.json"
+    SERVE_CHECK_COOLDOWN = 10
+    _MAX_SERVE_CHECK_ATTEMPTS = 100
 
     # File extensions to remove from output
     REMOVE_EXTENSIONS = (".cu", ".cuh")
@@ -32,6 +38,10 @@ class SWEAgentTranslator(Translator):
     # Instance variables
     _swe_agent_model_name: str
     _swe_agent_per_instance_cost_limit: float
+    _swe_agent_config: Optional[List[str]]
+    _swe_agent_parser: Optional[str]
+    _swe_agent_max_input_token: Optional[int]
+
     _temp_repo_path: str
     _translation_task_path: str
     _output_path: str
@@ -47,7 +57,12 @@ def __init__(
         dry: bool = False,
         hide_progress: bool = False,
         swe_agent_model_name: Optional[str] = None,
-        swe_agent_per_instance_cost_limit: float = 0.06
+        swe_agent_per_instance_cost_limit: float = 0.06,
+        swe_agent_config: Optional[Union[str, List[str]]] = None,
+        swe_agent_parser: Optional[str] = None,
+        swe_agent_max_input_token: Optional[int] = None,
+        vllm_environment: Optional[str] = None,
+        vllm_yaml_config: Optional[str] = None,
     ) -> None:
         super().__init__(
             input_repo,
@@ -62,31 +77,101 @@ def __init__(
 
         self._swe_agent_model_name = swe_agent_model_name
         self._swe_agent_per_instance_cost_limit = swe_agent_per_instance_cost_limit
+        self._swe_agent_parser = swe_agent_parser
+        self._swe_agent_max_input_token = swe_agent_max_input_token
+
+        # Handle a single-config file or multi-config files
+        if isinstance(swe_agent_config, str):
+            self._swe_agent_config = [swe_agent_config]
+        else:
+            self._swe_agent_config = swe_agent_config
+
         self._temp_repo_path = self.TEMP_REPO_PATH
         self._translation_task_path = os.path.join(
             self._input_repo.path, self.TRANSLATION_TASK_FILENAME
         )
         self._output_path = os.path.join(self._output_paths[0], "repo")
 
+        self._vllm_environment = vllm_environment
+        self._vllm_yaml_config = vllm_yaml_config
+
+        if self._is_ollama_model(self._swe_agent_model_name):
+            if self._swe_agent_parser is None:
+                self._swe_agent_parser = "thought_action"
+            if self._swe_agent_max_input_token is None:
+                self._swe_agent_max_input_token = 4096
+            self._launch_ollama_server()
+
+        else:
+            if self._vllm_environment:
+                self._launch_vllm_server(self._vllm_environment, self._vllm_yaml_config)
+            else:
+                print("Warning: vLLM environment not provided; assuming external vLLM server is running.")
+
+    @staticmethod
+    def _is_ollama_model(name: str) -> bool:
+        name = (name or "").lower()
+        return name.startswith("ollama/")
+
+
+    def _launch_ollama_server(self) -> None:
+        """Launch an Ollama server in the background."""
+        # Check that ollama is installed
+        if not shutil.which("ollama"):
+            raise ValueError("Ollama is not in the path. Please install Ollama and add it to the path.")
+        # Early exit if ollama is already running
+        if subprocess.run(["ollama", "list"], capture_output=True, text=True).returncode == 0:
+            return
+        ollama_command = ["ollama", "serve"]
+        subprocess.Popen(ollama_command,
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.STDOUT,       
+        stdin=subprocess.DEVNULL,   
+        start_new_session=True)
+        # Check that the server is running
+        checking = True
+        while checking:
+            status = subprocess.run(["ollama", "list"], capture_output=True, text=True)
+            if status.returncode == 0:
+                checking = False
+            else:
+                print(f"Ollama server not ready, checking again after {self.SERVE_CHECK_COOLDOWN} seconds...")
+                time.sleep(self.SERVE_CHECK_COOLDOWN)
+        print(f"Ollama server ready.")
+        return
+
 
     @staticmethod
     def add_args(parser: Any) -> None:
         """Add command line arguments for SWE-agent configuration."""
         parser.add_argument("--swe-agent-model-name", type=str,
-                            help="Name of the agent model to use (e.g. 'gpt-4o').")
+                            help="Name of the agent model to use (e.g. 'gpt-4o', 'ollama/llama3.2:latest').")
         parser.add_argument("--swe-agent-per-instance-cost-limit", type=float,
-                            help="Per-instance cost limit for the agent model.")
-
+                            help="Per-instance cost limit for the agent model; set to 0 for local models.")
+        parser.add_argument("--swe-agent-config", action="append",
+                            help="May be specified multiple times; default config file is used if none is provided.")
+        parser.add_argument("--swe-agent-parser", type=str, choices=["thought_action", "function_calling"],
+                            help="Parsing strategy. Use 'thought_action' for local/Ollama models.")
+        parser.add_argument("--swe-agent-max-input-token", type=int,
+                            help="Override max input tokens to avoid local-model warnings.")
+        parser.add_argument("--vllm-environment", type=str,
+                    help="Path to the Python environment that has vLLM installed (e.g. ~/pssg-venv).")
+        parser.add_argument("--vllm-yaml-config", type=str,
+                    help="Path to vLLM YAML config file to pass via --config.")
 
     @staticmethod
     def parse_args(args: Any) -> Dict[str, Any]:
         """Parse command line arguments for SWE-agent configuration."""
         return {
             "swe_agent_model_name": args.swe_agent_model_name,
-            "swe_agent_per_instance_cost_limit": args.swe_agent_per_instance_cost_limit
+            "swe_agent_per_instance_cost_limit": args.swe_agent_per_instance_cost_limit,
+            "swe_agent_config": args.swe_agent_config,
+            "swe_agent_parser": args.swe_agent_parser,
+            "swe_agent_max_input_token": args.swe_agent_max_input_token,
+            "vllm_environment": args.vllm_environment,
+            "vllm_yaml_config": args.vllm_yaml_config,
         }
 
-
     def translate(self) -> None:
         """Execute the complete translation process using SWE-agent.
 
@@ -108,6 +193,7 @@ def _execute_translation_workflow(self) -> None:
         self.initialize_temp_repo()
 
         if self.run_swe_agent():
+            self._fix_makefile_tabs_and_duplicates()
             print("Saving translated output...")
             self.save_output(self._output_path)
             self.remove_unnecessary_output_files()
@@ -138,12 +224,13 @@ def _create_translation_task_content(self) -> str:
             f"You are a helpful coding assistant. You are helping a software developer translate a "
             f"codebase from the {self._src_model} execution model to the {self._dst_model} execution "
             f"model.\n\n"
-            f"The codebase is called {data['app']}. Its path is {data['path']}. Given this code "
+            f"The codebase is called {data['app']}. Its path is {self.CONTAINER_REPO_PATH}. Given this code "
             f"repository, translate the {data['app']} codebase's {self._src_model}-specific files to "
             f"the {self._dst_model} execution model.\n\n"
             f"The new files should be in {data['filename_desc']} and all old {self._src_model} files "
-            f"must be deleted. A new {data['build_filename']} should be made to compile accordingly "
-            f"with the new files.\n\n"
+            f"must be deleted. You may use standard command-line tools (e.g., the `rm` command) to "
+            f"remove obsolete {self._src_model}-specific files. A new {data['build_filename']} should "
+            f"be made to compile accordingly with the new files.\n\n"
             f"Ensure that the user can compile this code using, for example, `{data['ex_build_cmd']}` "
             f"to build the code for {data['ex_build_desc']}. Ensure also that the command line "
             f"interface after translation still works as expected, so that, for example, "
@@ -175,7 +262,6 @@ def _initialize_git_repo(self) -> None:
         subprocess.run(self.GIT_ADD_ALL, cwd=self._temp_repo_path, check=True)
         subprocess.run(self.GIT_COMMIT_INITIAL, cwd=self._temp_repo_path, check=True)
 
-
     def run_swe_agent(self) -> bool:
         """Run the SWE-agent command and apply the resulting patch."""
         command = self._build_swe_agent_command()
@@ -197,15 +283,26 @@ def run_swe_agent(self) -> bool:
 
     def _build_swe_agent_command(self) -> List[str]:
         """Build the SWE-agent command with all required parameters."""
-        return [
+        cmd = [
             "sweagent", "run",
-            f"--agent.model.name={self._swe_agent_model_name}",
-            f"--agent.model.per_instance_cost_limit={self._swe_agent_per_instance_cost_limit}",
             f"--env.repo.path={self._temp_repo_path}",
-            "--env.deployment.image=python",
             f"--problem_statement.path={self._translation_task_path}",
         ]
 
+        if self._swe_agent_model_name:
+            cmd.append(f"--agent.model.name={self._swe_agent_model_name}")
+        if self._swe_agent_per_instance_cost_limit:
+            cmd.append(f"--agent.model.per_instance_cost_limit={self._swe_agent_per_instance_cost_limit}")
+        if self._swe_agent_parser:
+            cmd.append(f"--agent.tools.parse_function.type={self._swe_agent_parser}")
+        if self._swe_agent_max_input_token:
+            cmd.append(f"--agent.model.max_input_tokens={self._swe_agent_max_input_token}")
+        if self._swe_agent_config:
+            for cfg in self._swe_agent_config:
+                cmd.extend(["--config", cfg])
+
+        return cmd
+
     def _apply_swe_agent_patch(self) -> bool:
         """Find and apply the patch file generated by SWE-agent."""
         print("Applying patch...")
@@ -283,6 +380,45 @@ def _remove_files_by_extension(self, directory: str, extensions: tuple) -> None:
                     file_path = os.path.join(root, file)
                     os.remove(file_path)
 
+    def _fix_makefile_tabs_and_duplicates(self) -> None:
+        makefile = Path(self._temp_repo_path) / "Makefile"
+        if not makefile.exists():
+            return
+
+        lines = makefile.read_text(encoding="utf-8", errors="replace").splitlines(True)
+
+        # 1) Remove exact duplicate lines (preserve order)
+        print("Removing duplicate lines in the Makefile...")
+        seen = set()
+        duplicates = []
+        for line in lines:
+            if line not in seen:
+                seen.add(line)
+                duplicates.append(line)
+        lines = duplicates
+
+        # 2) Enforce Makefile tab rules
+        print("Fixing Makefile tabs...")
+        i = 0
+        while i < len(lines) - 1:
+            curr = lines[i].lstrip()
+            nxt = lines[i + 1]
+
+            is_rule = ":" in curr
+            is_conditional = curr.startswith((
+                "ifeq",
+                "ifneq",
+                "ifdef",
+                "ifndef",
+                "else"
+            ))
+            
+            if is_rule or is_conditional:
+                if nxt.strip() and not nxt.startswith("\t") and not nxt.lstrip().startswith("#"):
+                    lines[i + 1] = "\t" + nxt
+            i += 1
+
+        makefile.write_text("".join(lines), encoding="utf-8")
 
     def write_experiment_metadata(self) -> None:
         """Write experiment metadata to a JSON file in the output directory."""
@@ -327,3 +463,44 @@ def cleanup_temp_repo(self) -> None:
         except OSError as e:
             print(f"Error cleaning up temporary repository: {e}")
             # Don't raise here as this is cleanup code
+
+    def _launch_vllm_server(self, environment_path: str, yaml_config: Optional[str] = None):
+        """Launch a vLLM server in the background using the Python environment directory
+           provided.
+        """
+        # Early exit if vLLM server is already running
+        if subprocess.run(["curl", "http://127.0.0.1:8000/health"], capture_output=True,
+                          text=True, check=False).returncode == 0:
+            return None
+        py_executable = os.path.join(environment_path, "bin", "python")
+        vllm_command = [
+            py_executable, "-m", "vllm.entrypoints.openai.api_server",
+            "--tool-call-parser", "openai",
+            "--enable-auto-tool-choice",
+            "--reasoning-parser", "openai_gptoss",
+            "--host", "127.0.0.1",
+            "--port", "8000",
+        ]
+        vllm_api_key = os.getenv("VLLM_API_KEY")
+        if self._swe_agent_model_name is not None:
+            vllm_command.extend(["--model", self._swe_agent_model_name])
+        if vllm_api_key is not None:
+            vllm_command.extend(["--api-key", vllm_api_key])
+        if yaml_config:
+            vllm_command.extend(["--config", yaml_config])
+        print("Full vLLM subprocess command:", " ".join(vllm_command))
+        vllm_server = subprocess.Popen(vllm_command)
+        # Ping the server until it is ready at the health endpoint
+        checking, num_attempts = True, 0
+        while checking and num_attempts < self._MAX_SERVE_CHECK_ATTEMPTS:
+            status = subprocess.run(["curl", "http://127.0.0.1:8000/health"], capture_output=True,
+                                    text=True, check=False)
+            if status.returncode == 0:
+                checking = False
+            else:
+                print(f"VLLM server not ready, checking again after {self.SERVE_CHECK_COOLDOWN} seconds...")
+                time.sleep(self.SERVE_CHECK_COOLDOWN)
+                num_attempts += 1
+        atexit.register(vllm_server.terminate)
+        print("VLLM server ready.")
+        return vllm_server
diff --git a/targets/XSBench/kokkos/target.json b/targets/XSBench/kokkos/target.json
index bbade92..446358c 100644
--- a/targets/XSBench/kokkos/target.json
+++ b/targets/XSBench/kokkos/target.json
@@ -2,7 +2,7 @@
     "app": "xsbench",
     "model": "kokkos",
     "path": "targets/XSBench/kokkos/repo",
-    "dependencies": ["gnu", "cuda", "kokkos"],
+    "dependencies": ["gnu", "cuda", "kokkos", "ninja"],
     "build_commands_debug": "cmake -DKOKKOS_BACKEND=CUDA -DCMAKE_CXX_COMPILER=g++ -GNinja -Bbuild . && cmake --build build/",
     "build_commands_perf": "cmake -DKOKKOS_BACKEND=CUDA -DCMAKE_CXX_COMPILER=g++ -GNinja -Bbuild . && cmake --build build/",
     "build_timeout": 120,
diff --git a/targets/llm.c/kokkos/target.json b/targets/llm.c/kokkos/target.json
index f92feb8..f6ef533 100644
--- a/targets/llm.c/kokkos/target.json
+++ b/targets/llm.c/kokkos/target.json
@@ -2,7 +2,7 @@
     "app": "llm.c",
     "model": "kokkos",
     "path": "targets/llm.c/kokkos/repo",
-    "dependencies": ["gnu", "cuda", "kokkos"],
+    "dependencies": ["gnu", "cuda", "kokkos", "ninja"],
     "setup_commands": ["cp $SCRATCH/llmc_inputs/*.bin ."],
     "build_commands_debug": "cmake -DKOKKOS_BACKEND=CUDA -DCMAKE_CXX_COMPILER=g++ -GNinja -Bbuild . && cmake --build build/",
     "build_commands_perf": "cmake -DKOKKOS_BACKEND=CUDA -DCMAKE_CXX_COMPILER=g++ -GNinja -Bbuild . && cmake --build build/",
diff --git a/targets/microXOR/cuda/repo/translation_task.md b/targets/microXOR/cuda/repo/translation_task.md
deleted file mode 100644
index ac9db36..0000000
--- a/targets/microXOR/cuda/repo/translation_task.md
+++ /dev/null
@@ -1,7 +0,0 @@
-You are a helpful coding assistant. You are helping a software developer translate a codebase from the cuda execution model to the openmp-offload execution model.
-
-The codebase is called microxor. Its path is targets/microXOR/openmp-offload/repo. Given this code repository, translate the microxor codebase's cuda-specific files to the openmp-offload execution model.
-
-The new files should be in C++ and all old cuda files must be deleted. A new Makefile should be made to compile accordingly with the new files.
-
-Ensure that the user can compile this code using, for example, `make SM_VERSION=sm_80 CXX_COMPILER=clang++` to build the code for a system with an NVIDIA GPU with compute capability 80 compiled with clang++. Ensure also that the command line interface after translation still works as expected, so that, for example, `./microXOR.exe 1024 32` still works to run the code with a 1024 by 1024 input matrix and a kernel with 32 times 32 threads per block.
\ No newline at end of file
diff --git a/targets/microXOR/kokkos/target.json b/targets/microXOR/kokkos/target.json
index bdd37b2..f4f072a 100644
--- a/targets/microXOR/kokkos/target.json
+++ b/targets/microXOR/kokkos/target.json
@@ -2,7 +2,7 @@
     "app": "microxor",
     "model": "kokkos",
     "path": "targets/microXOR/kokkos/repo",
-    "dependencies": ["gnu", "cuda", "kokkos"],
+    "dependencies": ["gnu", "cuda", "kokkos", "ninja"],
     "build_commands_debug": "cmake -DKOKKOS_BACKEND=CUDA -DCMAKE_CXX_COMPILER=g++ -GNinja -Bbuild . && cmake --build build/",
     "build_commands_perf": "cmake -DKOKKOS_BACKEND=CUDA -DCMAKE_CXX_COMPILER=g++ -GNinja -Bbuild . && cmake --build build/",
     "build_timeout": 120,
diff --git a/targets/microXORh/kokkos/target.json b/targets/microXORh/kokkos/target.json
index bc2f65d..d4fd087 100644
--- a/targets/microXORh/kokkos/target.json
+++ b/targets/microXORh/kokkos/target.json
@@ -2,7 +2,7 @@
     "app": "microxorh",
     "model": "kokkos",
     "path": "targets/microXORh/kokkos/repo",
-    "dependencies": ["gnu", "cuda", "kokkos"],
+    "dependencies": ["gnu", "cuda", "kokkos", "ninja"],
     "build_commands_debug": "cmake -DKOKKOS_BACKEND=CUDA -DCMAKE_CXX_COMPILER=g++ -GNinja -Bbuild . && cmake --build build/",
     "build_commands_perf": "cmake -DKOKKOS_BACKEND=CUDA -DCMAKE_CXX_COMPILER=g++ -GNinja -Bbuild . && cmake --build build/",
     "build_timeout": 120,
diff --git a/targets/nanoXOR/kokkos/target.json b/targets/nanoXOR/kokkos/target.json
index ab42118..62decf1 100644
--- a/targets/nanoXOR/kokkos/target.json
+++ b/targets/nanoXOR/kokkos/target.json
@@ -2,7 +2,7 @@
     "app": "nanoxor",
     "model": "kokkos",
     "path": "targets/nanoXOR/kokkos/repo",
-    "dependencies": ["gnu", "cuda", "kokkos"],
+    "dependencies": ["gnu", "cuda", "kokkos", "ninja"],
     "build_commands_debug": "cmake -DKOKKOS_BACKEND=CUDA -DCMAKE_CXX_COMPILER=g++ -GNinja -Bbuild . && cmake --build build/",
     "build_commands_perf": "cmake -DKOKKOS_BACKEND=CUDA -DCMAKE_CXX_COMPILER=g++ -GNinja -Bbuild . && cmake --build build/",
     "build_timeout": 120,