From e7efd08bb0dc08e50f7d4b28168fc49ffd1035e7 Mon Sep 17 00:00:00 2001
From: Brett Rosequist <15953734+brosequist@users.noreply.github.com>
Date: Tue, 28 Apr 2026 10:36:07 -0400
Subject: [PATCH 1/2] Add RTX 50-series CUDA support

---
 INSTALL.md                      |  3 ++-
 docker/Dockerfile_rocky         |  3 +--
 docker/Dockerfile_ubuntu        |  3 +--
 docker/build-rocky.sh           |  2 +-
 docker/build-ubuntu.sh          |  2 +-
 segmentationRDS/segmentation.py | 45 ++++++++++++++++++++++-----------
 torch-requirements.txt          |  6 ++---
 7 files changed, 39 insertions(+), 25 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index 474b66f..69d323e 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -11,6 +11,7 @@ It is recommended to install the dependencies of mrSegmentation through a virtua
 
 > [!NOTE]  
 > By default, some of the dependencies in the `requirements.txt` file install a CPU-only version of PyTorch. The plugin will run smoothly on the CPU, but for better performances, it is advised to install the GPU-enabled version of PyTorch.
+> The optional `torch-requirements.txt` file installs PyTorch with CUDA 12.8 support, which is required for NVIDIA RTX 50-series GPUs.
 
 - On Linux:
 ```
@@ -75,4 +76,4 @@ export MESHROOM_PLUGINS_PATH=/path/to/mrSegmentation:$MESHROOM_PLUGINS_PATH
 - On Windows:
 ```
 set MESHROOM_PLUGINS_PATH=/path/to/mrSegmentation;%MESHROOM_PLUGINS_PATH%
-```
\ No newline at end of file
+```
diff --git a/docker/Dockerfile_rocky b/docker/Dockerfile_rocky
index 7120e6e..b83698c 100644
--- a/docker/Dockerfile_rocky
+++ b/docker/Dockerfile_rocky
@@ -28,7 +28,6 @@ RUN python -m venv ${MRPLUGIN_BUNDLE}
 WORKDIR ${MRPLUGIN_BUNDLE}
 
 RUN ${MRPLUGIN_BUNDLE}/bin/pip install -r ${MRPLUGIN_DEV}/requirements.txt
-RUN sed -i -e 's/cu124/cu121/g' ${MRPLUGIN_DEV}/torch-requirements.txt
 RUN ${MRPLUGIN_BUNDLE}/bin/pip install -r ${MRPLUGIN_DEV}/torch-requirements.txt --upgrade --force-reinstall
 
 
@@ -59,4 +58,4 @@ ENV RDS_RECOGNITION_MODEL_PATH="${MRPLUGIN_BUNDLE}/models/ram_plus_swin_large_14
 ## Clean-up
 ## ========
 
-RUN rm -rf share && rm pyvenv.cfg
\ No newline at end of file
+RUN rm -rf share && rm pyvenv.cfg
diff --git a/docker/Dockerfile_ubuntu b/docker/Dockerfile_ubuntu
index 63a343e..f649918 100644
--- a/docker/Dockerfile_ubuntu
+++ b/docker/Dockerfile_ubuntu
@@ -34,7 +34,6 @@ RUN python -m venv ${MRPLUGIN_BUNDLE}
 WORKDIR ${MRPLUGIN_BUNDLE}
 
 RUN ${MRPLUGIN_BUNDLE}/bin/pip install -r ${MRPLUGIN_DEV}/requirements.txt
-RUN sed -i -e 's/cu124/cu121/g' ${MRPLUGIN_DEV}/torch-requirements.txt
 RUN ${MRPLUGIN_BUNDLE}/bin/pip install -r ${MRPLUGIN_DEV}/torch-requirements.txt --upgrade --force-reinstall
 
 
@@ -65,4 +64,4 @@ ENV RDS_RECOGNITION_MODEL_PATH="${MRPLUGIN_BUNDLE}/models/ram_plus_swin_large_14
 ## Clean-up
 ## ========
 
-RUN rm -rf share && rm pyvenv.cfg
\ No newline at end of file
+RUN rm -rf share && rm pyvenv.cfg
diff --git a/docker/build-rocky.sh b/docker/build-rocky.sh
index b5ac3c5..299c88a 100755
--- a/docker/build-rocky.sh
+++ b/docker/build-rocky.sh
@@ -2,7 +2,7 @@
 set -ex
 
 test -z "$MRSEGMENTATION_VERSION" && MRSEGMENTATION_VERSION="$(git rev-parse --abbrev-ref HEAD)-$(git rev-parse --short HEAD)"
-test -z "$CUDA_VERSION" && CUDA_VERSION=12.1.1
+test -z "$CUDA_VERSION" && CUDA_VERSION=12.8.1
 test -z "$ROCKY_VERSION" && ROCKY_VERSION=9
 
 test -d docker || (
diff --git a/docker/build-ubuntu.sh b/docker/build-ubuntu.sh
index 5ad5f46..2aac8f9 100755
--- a/docker/build-ubuntu.sh
+++ b/docker/build-ubuntu.sh
@@ -2,7 +2,7 @@
 set -ex
 
 test -z "$MRSEGMENTATION_VERSION" && MRSEGMENTATION_VERSION="$(git rev-parse --abbrev-ref HEAD)-$(git rev-parse --short HEAD)"
-test -z "$CUDA_VERSION" && CUDA_VERSION=12.1.1
+test -z "$CUDA_VERSION" && CUDA_VERSION=12.8.1
 test -z "$UBUNTU_VERSION" && UBUNTU_VERSION=22.04
 
 test -d docker || (
diff --git a/segmentationRDS/segmentation.py b/segmentationRDS/segmentation.py
index 8fb264c..e5f71c3 100644
--- a/segmentationRDS/segmentation.py
+++ b/segmentationRDS/segmentation.py
@@ -44,12 +44,35 @@ def cleanstr(s: str) -> str:
     return sclean
 
 
+def get_device(useGPU: bool) -> str:
+    if not useGPU:
+        return 'cpu'
+    if not torch.cuda.is_available():
+        print("Cannot execute on GPU, fallback to CPU execution mode")
+        return 'cpu'
+
+    device_index = torch.cuda.current_device()
+    major, minor = torch.cuda.get_device_capability(device_index)
+    device_arch = f"sm_{major}{minor}"
+    device_compute = f"compute_{major}{minor}"
+    supported_arches = torch.cuda.get_arch_list()
+    if supported_arches and device_arch not in supported_arches and device_compute not in supported_arches:
+        device_name = torch.cuda.get_device_name(device_index)
+        print(
+            f"CUDA device '{device_name}' requires {device_arch}, but this PyTorch "
+            f"build supports: {', '.join(supported_arches)}. "
+            "Fallback to CPU execution mode. For RTX 50-series GPUs, install a "
+            "PyTorch CUDA 12.8+ build."
+        )
+        return 'cpu'
+
+    return 'cuda'
+
+
 class SegmentationRDS:
 
     def __init__(self, RAM_CHECKPOINT_PATH:str, GD_CONFIG_PATH:str, GD_CHECKPOINT_PATH:str, SAM_CHECKPOINT_PATH:str, RAM_VIT:str='swin_l', RAM_IMAGE_SIZE:int=384, SAM_ENCODER_VERSION:str='vit_h', useGPU:bool=True):
-        self.DEVICE = 'cuda' if useGPU and torch.cuda.is_available() else 'cpu'
-        if self.DEVICE == 'cpu' and useGPU:
-            print("Cannot execute on GPU, fallback to CPU execution mode")
+        self.DEVICE = get_device(useGPU)
         self.RAM_IMAGE_SIZE = RAM_IMAGE_SIZE
         # Load models
         # Recognize Anything
@@ -191,9 +214,7 @@ def process(self, image: np.ndarray, prompt: str, synonyms: str = '', invert: bo
 class SegmentAnything:
 
     def __init__(self, SAM_CHECKPOINT_PATH:str, SAM_ENCODER_VERSION:str='vit_h', useGPU:bool=True):
-        self.DEVICE = 'cuda' if useGPU and torch.cuda.is_available() else 'cpu'
-        if self.DEVICE == 'cpu' and useGPU:
-            print("Cannot execute on GPU, fallback to CPU execution mode")
+        self.DEVICE = get_device(useGPU)
         # Load models
         sam = sam_model_registry[SAM_ENCODER_VERSION](checkpoint=SAM_CHECKPOINT_PATH)
         sam.to(self.DEVICE)
@@ -262,9 +283,7 @@ def process(self, image: np.ndarray, bboxes = [], clicksIn: np.ndarray = [], cli
 class RecognizeAnything:
 
     def __init__(self, RAM_CHECKPOINT_PATH:str, RAM_VIT:str='swin_l', RAM_IMAGE_SIZE:int=384, useGPU:bool=True):
-        self.DEVICE = 'cuda' if useGPU and torch.cuda.is_available() else 'cpu'
-        if self.DEVICE == 'cpu' and useGPU:
-            print("Cannot execute on GPU, fallback to CPU execution mode")
+        self.DEVICE = get_device(useGPU)
         self.RAM_IMAGE_SIZE = RAM_IMAGE_SIZE
         # Load models
         # Recognize Anything
@@ -294,9 +313,7 @@ def get_tags(self, image:np.ndarray) -> list[str]:
 class DetectAnything:
 
     def __init__(self, RAM_CHECKPOINT_PATH:str, GD_CONFIG_PATH:str, GD_CHECKPOINT_PATH:str, RAM_VIT:str='swin_l', RAM_IMAGE_SIZE:int=384, useGPU:bool=True):
-        self.DEVICE = 'cuda' if useGPU and torch.cuda.is_available() else 'cpu'
-        if self.DEVICE == 'cpu' and useGPU:
-            print("Cannot execute on GPU, fallback to CPU execution mode")
+        self.DEVICE = get_device(useGPU)
         self.RAM_IMAGE_SIZE = RAM_IMAGE_SIZE
         # Load models
         # Recognize Anything
@@ -420,9 +437,7 @@ class BiRefNetSeg:
     def __init__(self, modelType:str, useGPU:bool=True):
         from birefnet.models.birefnet import BiRefNet
 
-        self.DEVICE = 'cuda' if useGPU and torch.cuda.is_available() else 'cpu'
-        if self.DEVICE == 'cpu' and useGPU:
-            print("Cannot execute on GPU, fallback to CPU execution mode")
+        self.DEVICE = get_device(useGPU)
         # Load models
         pretrainedModel = 'ZhengPeng7/BiRefNet_HR-matting'
         if modelType == 'BiRefNet LR':
diff --git a/torch-requirements.txt b/torch-requirements.txt
index 8f514f8..739dc2a 100644
--- a/torch-requirements.txt
+++ b/torch-requirements.txt
@@ -1,3 +1,3 @@
--i https://download.pytorch.org/whl/cu124
-torch==2.5.1
-torchvision==0.20.*
\ No newline at end of file
+-i https://download.pytorch.org/whl/cu128
+torch==2.7.1
+torchvision==0.22.1

From 4eafdfdf5e1c2e799b3186dfa20b47b783a6a1e3 Mon Sep 17 00:00:00 2001
From: Brett Rosequist <15953734+brosequist@users.noreply.github.com>
Date: Thu, 28 May 2026 23:38:05 -0400
Subject: [PATCH 2/2] Bump torch to 2.12.0 / torchvision to 0.27.0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Original PR (2026-04-29) pinned torch 2.7.1 / torchvision 0.22.1 — the
latest CUDA-12.8 wheels available at the time. Bumping to current stable
to keep the RTX 50 / Blackwell GPU support work current. CUDA 12.8 wheel
index unchanged.

Upstream main has moved 24 commits since the PR's merge base but none of
them touched torch-requirements.txt, Dockerfile_*, build-*.sh, INSTALL.md,
or segmentationRDS/segmentation.py — the rebase remains clean.
---
 torch-requirements.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/torch-requirements.txt b/torch-requirements.txt
index 739dc2a..f1b7ace 100644
--- a/torch-requirements.txt
+++ b/torch-requirements.txt
@@ -1,3 +1,3 @@
 -i https://download.pytorch.org/whl/cu128
-torch==2.7.1
-torchvision==0.22.1
+torch==2.12.0
+torchvision==0.27.0
\ No newline at end of file