massgen · int-chaos · Mar 1, 2026 · Mar 1, 2026 · Mar 1, 2026 · Mar 5, 2026
diff --git a/docs/source/user_guide/cloud.rst b/docs/source/user_guide/cloud.rst
@@ -0,0 +1,31 @@
+Cloud
+=====
+
+Overview
+--------
+
+MassGen Cloud allows users to run a MassGen job in the cloud.
+
+Currently, MassGen supports:
+- running jobs in the Modal cloud.
+- single agent jobs.
+
+Quick Start
+-----------
+To start using MassGen Cloud, you need to have a Modal account and install the Modal CLI.
+
+.. code-block:: bash
+
+   pip install modal
+   modal setup
+   modal secret create massgen-env --from-dotenv .env
+
+To run a MassGen job in the cloud, use the ``--cloud`` flag:
+
+.. code-block:: bash
+
+   massgen --cloud --config config.yaml "Your question"
+
+MassGen will upload the config file, context paths, prompt, and any other necessary files to the cloud and run the job there. You can monitor the progress in the local terminal and view the results when the job is complete.
+
+Results and logs will be saved to the local directory ``.massgen/cloud_jobs/job_{job_id}/artifacts/``.
diff --git a/massgen/cli.py b/massgen/cli.py
@@ -460,6 +460,84 @@ def record_event(event):
 )
 
 
+def _run_cloud_job(args: argparse.Namespace, config: dict[str, Any], config_path_label: str | None) -> None:
+    """Launch a MassGen run in Modal cloud and materialize results locally."""
+    if not args.question:
+        raise ConfigurationError("--cloud requires a question argument")
+
+    import uuid
+
+    import yaml
+
+    from .cloud.cloud_job import CloudJobRequest
+    from .cloud.modal_launcher import ModalCloudJobLauncher
+    from .cloud.utils import process_context_paths
+
+    config_copy = copy.deepcopy(config)
+
+    cloud_job_id = uuid.uuid4().hex[:8]
+
+    # Package context path files and rewrite config paths for remote
+    orchestrator_cfg = config_copy.get("orchestrator", {})
+    context_paths = orchestrator_cfg.get("context_paths", [])
+    if context_paths:
+        rewritten_paths = process_context_paths(context_paths, cloud_job_id=cloud_job_id)
+        if rewritten_paths:
+            orchestrator_cfg["context_paths"] = rewritten_paths
+            config_copy["orchestrator"] = orchestrator_cfg
+
+    agents_list = config_copy.get("agents", [])
+
+    for agent_cfg in agents_list:
+        if isinstance(agent_cfg, dict):
+            backend_cfg = agent_cfg.get("backend", {})
+            agent_context_paths = backend_cfg.get("context_paths", [])
+            if agent_context_paths:
+                rewritten_paths = process_context_paths(agent_context_paths, cloud_job_id=cloud_job_id)
+                if rewritten_paths:
+                    backend_cfg["context_paths"] = rewritten_paths
+                    agent_cfg["backend"] = backend_cfg
+
+    # Cloud validation compatibility for PyPI version
+    # The PyPI version of massgen on Modal fails if display_type is "silent".
+    # Since modal_app.py passes `--automation`, it will be re-set to "silent"
+    # internally *after* validation in the cloud. We remove it here just for validation.
+    if "ui" in config_copy and config_copy["ui"].get("display_type") == "silent":
+        config_copy.pop("ui", None)
+
+    launcher = ModalCloudJobLauncher()
+    request = CloudJobRequest(
+        prompt=args.question,
+        config_yaml=yaml.safe_dump(config_copy, sort_keys=False),
+        timeout_seconds=args.cloud_timeout,
+        cloud_job_id=cloud_job_id,
+    )
+    result = launcher.launch(request)
+
+    final_answer = result.final_answer
+    output_path: Path | None = None
+    if args.output_file:
+        output_path = Path(args.output_file)
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        output_path.write_text(final_answer, encoding="utf-8")
+    else:
+        output_path = result.artifacts_dir / "final_answer.txt"
+        output_path.write_text(final_answer, encoding="utf-8")
+
+    # Always print location in automation mode for machine parsing.
+    if args.automation:
+        _automation_print(f"OUTPUT_FILE: {output_path.resolve()}")
+        _automation_print(f"CLOUD_ARTIFACTS_DIR: {result.artifacts_dir.resolve()}")
+        if result.local_log_dir:
+            _automation_print(f"LOG_DIR: {result.local_log_dir.resolve()}")
+        if result.local_events_path:
+            _automation_print(f"EVENTS_FILE: {result.local_events_path.resolve()}")
+        if config_path_label:
+            _automation_print(f"CLOUD_CONFIG_SOURCE: {config_path_label}")
+    else:
+        print(final_answer)
+
+
 def _build_coordination_ui(ui_config: dict[str, Any]) -> CoordinationUI:
     """Create a CoordinationUI with display_kwargs passthrough (incl. theme)."""
     display_kwargs = dict(ui_config.get("display_kwargs", {}) or {})
@@ -9093,9 +9171,6 @@ def _save_prompt_metadata_failure_fallback(
         # Validate that all context paths exist before proceeding
         validate_context_paths(config)
 
-        # Relocate all filesystem paths to .massgen/ directory
-        relocate_filesystem_paths(config)
-
         # Generate unique instance ID for parallel execution safety
         # This prevents Docker container naming conflicts when running multiple instances
         import uuid
@@ -9432,6 +9507,21 @@ def _save_prompt_metadata_failure_fallback(
                 f"[Spec Mode] Prepended spec creation instructions " f"(target_chunks={plan_target_chunks}, broadcast={broadcast_mode})",
             )
 
+        # Cloud execution path (Modal MVP)
+        if getattr(args, "cloud", False):
+            if not args.automation:
+                logger.info("Cloud mode requires automation output; enabling --automation")
+                args.automation = True
+            _run_cloud_job(
+                args=args,
+                config=config,
+                config_path_label=str(resolved_path) if resolved_path else None,
+            )
+            return
+
+        # Relocate all filesystem paths to .massgen/ directory
+        relocate_filesystem_paths(config)
+
         # For interactive mode without initial question, defer agent creation until first prompt
         # This allows @path references in the first prompt to be included in Docker mounts
         is_interactive_without_question = not args.question and not getattr(
@@ -9854,6 +9944,12 @@ def cleanup_agent(
         _save_prompt_metadata_failure_fallback("timeout_error", failure_error=e)
         sys.exit(EXIT_TIMEOUT)
     except Exception as e:
+        # Keep cloud-specific timeout mapping distinct from generic execution failures.
+        from .cloud.modal_launcher import CloudJobError
+
+        if isinstance(e, CloudJobError) and "timeout" in str(e).lower():
+            print(f"❌ Timeout error: {e}", flush=True)
+            sys.exit(EXIT_TIMEOUT)
-        # Keep cloud-specific timeout mapping distinct from generic execution failures.
-        from .cloud.modal_launcher import CloudJobError
-
-        if isinstance(e, CloudJobError) and "timeout" in str(e).lower():
-            print(f"❌ Timeout error: {e}", flush=True)
-            sys.exit(EXIT_TIMEOUT)
+        # Keep cloud-specific timeout mapping distinct from generic execution failures.
+        try:
+            from .cloud.modal_launcher import CloudJobError
+        except Exception:  # Don't mask the original failure.
+            CloudJobError = None
+
+        if CloudJobError and isinstance(e, CloudJobError) and "timeout" in str(e).lower():
+            print(f"❌ Timeout error: {e}", flush=True)
+            sys.exit(EXIT_TIMEOUT)
-        # Keep cloud-specific timeout mapping distinct from generic execution failures.
-        from .cloud.modal_launcher import CloudJobError
-
-        if isinstance(e, CloudJobError) and "timeout" in str(e).lower():
-            print(f"❌ Timeout error: {e}", flush=True)
-            sys.exit(EXIT_TIMEOUT)
+        # Keep cloud-specific timeout mapping distinct from generic execution failures.
+        try:
+            from .cloud.modal_launcher import CloudJobError
+        except Exception:  # Don't mask the original failure.
+            CloudJobError = None
+
+        if CloudJobError and isinstance(e, CloudJobError) and "timeout" in str(e).lower():
+            print(f"❌ Timeout error: {e}", flush=True)
+            sys.exit(EXIT_TIMEOUT)
         print(f"❌ Error: {e}", flush=True)
         _save_prompt_metadata_failure_fallback("execution_error", failure_error=e)
         sys.exit(EXIT_EXECUTION_ERROR)
@@ -10328,6 +10424,17 @@ def cli_main():
         help="Enable automation mode: silent output (~10 lines), status.json tracking, meaningful exit codes. "
         "REQUIRED for LLM agents and background execution. Automatically isolates workspaces for parallel runs.",
     )
+    parser.add_argument(
+        "--cloud",
+        action="store_true",
+        help="Run the job in Modal cloud (MVP: single-agent automation).",
+    )
+    parser.add_argument(
+        "--cloud-timeout",
+        type=int,
+        default=3600,
+        help="Cloud job timeout in seconds (default: 3600).",
+    )
     parser.add_argument(
         "--stream-events",
         action="store_true",
@@ -10585,6 +10692,9 @@ def cli_main():
     if args.plan_chunks is not None and args.plan_chunks <= 0:
         print("❌ --plan-chunks must be a positive integer")
         sys.exit(2)
+    if args.cloud_timeout <= 0:
+        print("❌ --cloud-timeout must be a positive integer")
+        sys.exit(2)
 
     # Validate mode flag combinations
     mode_errors = validate_mode_flag_combinations(args)

diff --git a/massgen/cloud/__init__.py b/massgen/cloud/__init__.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python3
+"""Cloud execution utilities for MassGen."""
+
+from massgen.cloud.cloud_job import (
+    CloudJobError,
+    CloudJobLauncher,
+    CloudJobRequest,
+    CloudJobResult,
+)
+from massgen.cloud.modal_launcher import ModalCloudJobLauncher
+
+__all__ = [
+    "CloudJobError",
+    "CloudJobLauncher",
+    "CloudJobRequest",
+    "CloudJobResult",
+    "ModalCloudJobLauncher",
+]
diff --git a/massgen/cloud/cloud_job.py b/massgen/cloud/cloud_job.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python3
+
+from dataclasses import dataclass
+from pathlib import Path
+
+
+class CloudJobError(RuntimeError):
+    """Raised when a cloud job fails or returns invalid output."""
+
+
+@dataclass
+class CloudJobRequest:
+    """Payload for launching a cloud run."""
+
+    prompt: str
+    config_yaml: str
+    timeout_seconds: int
+    cloud_job_id: str = ""
+
+
+@dataclass
+class CloudJobResult:
+    """Result returned from the cloud launcher."""
+
+    final_answer: str
+    artifacts_dir: Path
+    local_log_dir: Path | None
+    local_events_path: Path | None
+    remote_log_dir: str | None
+
+
+class CloudJobLauncher:
+    """Interface for launching cloud jobs."""
+
+    RESULT_MARKER = "__MASSGEN_CLOUD_JOB_RESULT__"
+
+    def __init__(self, workspace_root: Path | None = None):
+        base = workspace_root or (Path.cwd() / ".massgen" / "cloud_jobs")
+        self.workspace_root = base
+        self.workspace_root.mkdir(parents=True, exist_ok=True)
+
+    def launch(self, request: CloudJobRequest) -> CloudJobResult:
+        raise NotImplementedError