runhey · ytipocmk631 · Mar 20, 2026 · Mar 20, 2026 · Mar 21, 2026
diff --git a/deploy/config.py b/deploy/config.py
@@ -38,6 +38,7 @@ class ConfigModel:
     StartOcrServer: bool = False
     OcrServerPort: int = 22268
     OcrClientAddress: str = "127.0.0.1:22268"
+    OcrEngine: str = "ppocr"
 
     # Update
     EnableReload: bool = True

diff --git a/deploy/template b/deploy/template
@@ -80,6 +80,10 @@ Deploy:
     # Address of ocr server for alas instance to connect
     # [Default] 127.0.0.1:22268
     OcrClientAddress: 127.0.0.1:22268
+    # Specify the OCR engine/backend to be used for text recognition
+    # Supported options: ppocr (onnxocr)
+    # [Default] ppocr
+    OcrEngine: ppocr
 
   Update:
     # Use auto update and builtin updater feature

diff --git a/module/ocr/models.py b/module/ocr/models.py
@@ -1,11 +1,17 @@
 from module.base.decorator import cached_property
-from module.ocr.ppocr import TextSystem
+from module.server.setting import State
 
 
 class OcrModel:
     @cached_property
     def ch(self):
-        return TextSystem()
+        if "ppocr" in State.deploy_config.OcrEngine.lower():
+            from module.ocr.ppocr import TextSystem
+            return TextSystem()
+        elif "onnxocr" in State.deploy_config.OcrEngine.lower():
+            from module.ocr.oxocr import ONNXPaddleOcr
+            return ONNXPaddleOcr()
+        return None
 
 
 OCR_MODEL = OcrModel()
@@ -17,7 +23,7 @@ def ch(self):
     import cv2
     import time
     from memory_profiler import profile
-    image = cv2.imread(r"E:\Project\OnmyojiAutoScript-assets\jade.png")
+    image = cv2.imread(r"D:\2025-10-08_01-32-05-371817.png")
 
     # 引入ocr 会导致非常巨大的内存开销
     @profile

diff --git a/module/ocr/oxocr.py b/module/ocr/oxocr.py
@@ -0,0 +1,129 @@
+import base64
+import pickle
+from typing import List
+
+import cv2
+import numpy as np
+from onnxocr import onnx_paddleocr
+
+
+class BoxedResult(object):
+    box: np.ndarray
+    text_img: np.ndarray
+    ocr_text: str
+    score: float
+
+    def __init__(self, box, text_img, ocr_text, score):
+        self.box = box
+        self.text_img = text_img
+        self.ocr_text = ocr_text
+        self.score = score
+
+    def __str__(self):
+        return 'BoxedResult[%s, %s]' % (self.ocr_text, self.score)
+
+    def __repr__(self):
+        return self.__str__()
+
+    def to_dict(self):
+        """Convert BoxedResult to a serializable dictionary"""
+        return {
+            'box': self.box.tolist(),
+            'text_img': base64.b64encode(pickle.dumps(self.text_img)).decode(
+                'utf-8') if self.text_img is not None else None,
+            'ocr_text': self.ocr_text,
+            'score': self.score
+        }
+
+    @classmethod
+    def from_dict(cls, data):
+        """Create BoxedResult from a dictionary"""
+        text_img = None
+        if data['text_img'] is not None:
+            text_img = pickle.loads(base64.b64decode(data['text_img'].encode('utf-8')))
+
+        return cls(
+            box=np.array(data['box']),
+            text_img=text_img,
+            ocr_text=data['ocr_text'],
+            score=data['score']
+        )
+
+
+class ONNXPaddleOcr(onnx_paddleocr.ONNXPaddleOcr):
+    def __init__(self,
+                 use_gpu=False,
+                 gpu_mem=500,
+                 gpu_id=0,
+                 use_tensorrt=False,
+                 precision="fp32",
+                 drop_score=0.5,
+                 use_angle_cls=True,
+                 ):
+        super().__init__(
+            use_gpu=use_gpu,
+            gpu_mem=gpu_mem,
+            gpu_id=gpu_id,
+            use_tensorrt=use_tensorrt,
+            precision=precision,
+            drop_score=drop_score,
+            use_angle_cls=use_angle_cls,
+
+        )
+
+    @staticmethod
+    def _prepare_ocr_image(img: np.ndarray, use_grayscale: bool=True) -> np.ndarray:
+        if not use_grayscale:
+            print("Using original image for single line OCR")
+            return img
+        if img.ndim == 2:
+            return cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
+        if img.ndim == 3 and img.shape[2] == 1:
+            return cv2.cvtColor(img[:, :, 0], cv2.COLOR_GRAY2RGB)
+
+        gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
+        return cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
+
+    def detect_and_ocr(self, img: np.ndarray, drop_score=None):
+        """
+        Detect text boxes and recognize text from the image.
+        :param img: Input image in RGB format.
+        :param drop_score: Minimum score to keep the recognized text. If None, uses the object's drop_score.
+        :return: List of BoxedResult containing detected boxes, cropped images, recognized text, and scores.
+
+        """
+        img = self._prepare_ocr_image(img)
+        rec_res = self.ocr(img, det=True, rec=True, cls=True)
+        if not rec_res:
+            return []
+        rec_res = rec_res[0]
+        res = []
+
+        if drop_score is None:
+            drop_score = self.drop_score
+
+        for box, rec_result in rec_res:
+            text, score = rec_result
+            if score >= drop_score:
+                if not isinstance(box, np.ndarray):
+                    box = np.array(box)
+                res.append(BoxedResult(box, img, text, score))
+        return res
+
+    def ocr_lines(self, img_list: List[np.ndarray]):
+        tmp_img_list = []
+        for img in img_list:
+            img = self._prepare_ocr_image(img)
+            img_height, img_width = img.shape[0:2]
+            if img_height * 1.0 / img_width >= 1.5:
+                img = np.rot90(img)
+            tmp_img_list.append(img)
+
+        rec_res = self.text_recognizer(tmp_img_list)
+        return rec_res
+
+    def ocr_single_line(self, img):
+        res = self.ocr_lines([img])
+        if res:
+            return res[0]
+        return None
diff --git a/requirements-in.txt b/requirements-in.txt
@@ -13,6 +13,7 @@ pywin32==306
 # OCR
 ppocr-onnx==0.0.3.9
 cn2an==0.5.23
+onnxocr==2025.5
 
 # Web
 paho-mqtt==1.6.1

diff --git a/requirements.txt b/requirements.txt
@@ -4,6 +4,8 @@
 #
 #    pip-compile --annotation-style=line --output-file=requirements.txt requirements-in.txt
 #
+--index-url https://pypi.tuna.tsinghua.edu.cn/simple
+
 adbutils==0.11.0          # via -r requirements-in.txt, uiautomator2
 annotated-types==0.7.0    # via pydantic
 anyio==3.7.1              # via fastapi, starlette
@@ -41,29 +43,34 @@ markdown-it-py==2.2.0     # via rich
 mdurl==0.1.2              # via markdown-it-py
 mpmath==1.3.0             # via sympy
 msgpack==1.0.7            # via zerorpc
-numpy==1.24.3             # via -r requirements-in.txt, onnxruntime, opencv-python, ppocr-onnx, shapely
+numpy==1.24.3             # via -r requirements-in.txt, onnxocr, onnxruntime, opencv-contrib-python, opencv-python, opencv-python-headless, ppocr-onnx, shapely
 oas-checkin-biggod==0.0.1  # via -r requirements-in.txt
 oashya==0.0.7             # via -r requirements-in.txt
 onepush==1.3.0            # via -r requirements-in.txt
-onnxruntime==1.16.3       # via ppocr-onnx
+onnxocr==2025.5           # via -r requirements-in.txt
+onnxruntime==1.16.3       # via onnxocr, ppocr-onnx
+opencv-contrib-python==4.11.0.86  # via onnxocr
 opencv-python==4.7.0.72   # via -r requirements-in.txt, ppocr-onnx
+opencv-python-headless==4.11.0.86  # via onnxocr
 packaging==20.9           # via deprecation, onnxruntime, uiautomator2
 paho-mqtt==1.6.1          # via -r requirements-in.txt
-pillow==10.2.0            # via ppocr-onnx, uiautomator2
+pdf2image==1.17.0         # via onnxocr
+pillow==10.2.0            # via pdf2image, ppocr-onnx, uiautomator2
 ppocr-onnx==0.0.3.9       # via -r requirements-in.txt
 proces==0.1.7             # via cn2an
 progress==1.6             # via uiautomator2
 prompt-toolkit==3.0.52    # via frida-tools
 protobuf==4.25.1          # via onnxruntime
 psutil==6.1.1             # via -r requirements-in.txt
 py==1.11.0                # via retry
-pyclipper==1.3.0.post5    # via ppocr-onnx
+pyclipper==1.3.0.post5    # via onnxocr, ppocr-onnx
 pycparser==2.21           # via cffi
 pycryptodome==3.21.0      # via onepush
 pydantic==2.10.0          # via -r requirements-in.txt, fastapi
 pydantic-core==2.27.0     # via pydantic
 pyelftools==0.30          # via apkutils2
 pygments==2.17.2          # via frida-tools, rich
+pymupdf==1.27.2           # via onnxocr
 pyparsing==3.1.1          # via packaging
 pyreadline3==3.4.1        # via humanfriendly
 pywin32==306              # via -r requirements-in.txt
@@ -72,7 +79,7 @@ pyzmq==25.1.2             # via zerorpc
 requests==2.31.0          # via adbutils, onepush, ppocr-onnx, uiautomator2
 retry==0.9.2              # via adbutils, uiautomator2
 rich==13.3.5              # via -r requirements-in.txt
-shapely==2.0.2            # via ppocr-onnx
+shapely==2.0.2            # via onnxocr, ppocr-onnx
 six==1.16.0               # via adbutils, anytree, uiautomator2
 sniffio==1.3.0            # via anyio
 starlette==0.27.0         # via fastapi

diff --git a/tasks/SixRealms/oas_ocr.py b/tasks/SixRealms/oas_ocr.py
@@ -7,6 +7,7 @@
 from module.base.utils import color_similarity_2d, load_image
 # from module.ocr.ocr import Ocr
 from module.ocr.base_ocr import BaseCor
+from module.ocr.ppocr import TextSystem
 
 
 
@@ -33,6 +34,10 @@ def rotate_image(image):
         return image
 
     def detect_and_ocr(self, *args, **kwargs):
+        # onnx不需要此补丁
+        if not isinstance(self.model, TextSystem):
+            return super().detect_and_ocr(*args, **kwargs)
+
         # Try hard to lower TextSystem.box_thresh
         backup = self.model.text_detector.box_thresh
         # Patch text_recognizer