Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions deploy/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class ConfigModel:
StartOcrServer: bool = False
OcrServerPort: int = 22268
OcrClientAddress: str = "127.0.0.1:22268"
OcrEngine: str = "ppocr"

# Update
EnableReload: bool = True
Expand Down
4 changes: 4 additions & 0 deletions deploy/template
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ Deploy:
# Address of ocr server for alas instance to connect
# [Default] 127.0.0.1:22268
OcrClientAddress: 127.0.0.1:22268
# Specify the OCR engine/backend to be used for text recognition
# Supported options: ppocr (onnxocr)
# [Default] ppocr
OcrEngine: ppocr

Update:
# Use auto update and builtin updater feature
Expand Down
12 changes: 9 additions & 3 deletions module/ocr/models.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
from module.base.decorator import cached_property
from module.ocr.ppocr import TextSystem
from module.server.setting import State


class OcrModel:
@cached_property
def ch(self):
return TextSystem()
if "ppocr" in State.deploy_config.OcrEngine.lower():
from module.ocr.ppocr import TextSystem
return TextSystem()
elif "onnxocr" in State.deploy_config.OcrEngine.lower():
from module.ocr.oxocr import ONNXPaddleOcr
return ONNXPaddleOcr()
return None


OCR_MODEL = OcrModel()
Expand All @@ -17,7 +23,7 @@ def ch(self):
import cv2
import time
from memory_profiler import profile
image = cv2.imread(r"E:\Project\OnmyojiAutoScript-assets\jade.png")
image = cv2.imread(r"D:\2025-10-08_01-32-05-371817.png")

# 引入ocr 会导致非常巨大的内存开销
@profile
Expand Down
129 changes: 129 additions & 0 deletions module/ocr/oxocr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
import base64
import pickle
from typing import List

import cv2
import numpy as np
from onnxocr import onnx_paddleocr


class BoxedResult(object):
box: np.ndarray
text_img: np.ndarray
ocr_text: str
score: float

def __init__(self, box, text_img, ocr_text, score):
self.box = box
self.text_img = text_img
self.ocr_text = ocr_text
self.score = score

def __str__(self):
return 'BoxedResult[%s, %s]' % (self.ocr_text, self.score)

def __repr__(self):
return self.__str__()

def to_dict(self):
"""Convert BoxedResult to a serializable dictionary"""
return {
'box': self.box.tolist(),
'text_img': base64.b64encode(pickle.dumps(self.text_img)).decode(
'utf-8') if self.text_img is not None else None,
'ocr_text': self.ocr_text,
'score': self.score
}

@classmethod
def from_dict(cls, data):
"""Create BoxedResult from a dictionary"""
text_img = None
if data['text_img'] is not None:
text_img = pickle.loads(base64.b64decode(data['text_img'].encode('utf-8')))

return cls(
box=np.array(data['box']),
text_img=text_img,
ocr_text=data['ocr_text'],
score=data['score']
)


class ONNXPaddleOcr(onnx_paddleocr.ONNXPaddleOcr):
def __init__(self,
use_gpu=False,
gpu_mem=500,
gpu_id=0,
use_tensorrt=False,
precision="fp32",
drop_score=0.5,
use_angle_cls=True,
):
super().__init__(
use_gpu=use_gpu,
gpu_mem=gpu_mem,
gpu_id=gpu_id,
use_tensorrt=use_tensorrt,
precision=precision,
drop_score=drop_score,
use_angle_cls=use_angle_cls,

)

@staticmethod
def _prepare_ocr_image(img: np.ndarray, use_grayscale: bool=True) -> np.ndarray:
if not use_grayscale:
print("Using original image for single line OCR")
return img
if img.ndim == 2:
return cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
if img.ndim == 3 and img.shape[2] == 1:
return cv2.cvtColor(img[:, :, 0], cv2.COLOR_GRAY2RGB)

gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
return cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)

def detect_and_ocr(self, img: np.ndarray, drop_score=None):
"""
Detect text boxes and recognize text from the image.
:param img: Input image in RGB format.
:param drop_score: Minimum score to keep the recognized text. If None, uses the object's drop_score.
:return: List of BoxedResult containing detected boxes, cropped images, recognized text, and scores.

"""
img = self._prepare_ocr_image(img)
rec_res = self.ocr(img, det=True, rec=True, cls=True)
if not rec_res:
return []
rec_res = rec_res[0]
res = []

if drop_score is None:
drop_score = self.drop_score

for box, rec_result in rec_res:
text, score = rec_result
if score >= drop_score:
if not isinstance(box, np.ndarray):
box = np.array(box)
res.append(BoxedResult(box, img, text, score))
return res

def ocr_lines(self, img_list: List[np.ndarray]):
tmp_img_list = []
for img in img_list:
img = self._prepare_ocr_image(img)
img_height, img_width = img.shape[0:2]
if img_height * 1.0 / img_width >= 1.5:
img = np.rot90(img)
tmp_img_list.append(img)

rec_res = self.text_recognizer(tmp_img_list)
return rec_res

def ocr_single_line(self, img):
res = self.ocr_lines([img])
if res:
return res[0]
return None
1 change: 1 addition & 0 deletions requirements-in.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ pywin32==306
# OCR
ppocr-onnx==0.0.3.9
cn2an==0.5.23
onnxocr==2025.5

# Web
paho-mqtt==1.6.1
Expand Down
17 changes: 12 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#
# pip-compile --annotation-style=line --output-file=requirements.txt requirements-in.txt
#
--index-url https://pypi.tuna.tsinghua.edu.cn/simple

adbutils==0.11.0 # via -r requirements-in.txt, uiautomator2
annotated-types==0.7.0 # via pydantic
anyio==3.7.1 # via fastapi, starlette
Expand Down Expand Up @@ -41,29 +43,34 @@ markdown-it-py==2.2.0 # via rich
mdurl==0.1.2 # via markdown-it-py
mpmath==1.3.0 # via sympy
msgpack==1.0.7 # via zerorpc
numpy==1.24.3 # via -r requirements-in.txt, onnxruntime, opencv-python, ppocr-onnx, shapely
numpy==1.24.3 # via -r requirements-in.txt, onnxocr, onnxruntime, opencv-contrib-python, opencv-python, opencv-python-headless, ppocr-onnx, shapely
oas-checkin-biggod==0.0.1 # via -r requirements-in.txt
oashya==0.0.7 # via -r requirements-in.txt
onepush==1.3.0 # via -r requirements-in.txt
onnxruntime==1.16.3 # via ppocr-onnx
onnxocr==2025.5 # via -r requirements-in.txt
onnxruntime==1.16.3 # via onnxocr, ppocr-onnx
opencv-contrib-python==4.11.0.86 # via onnxocr
opencv-python==4.7.0.72 # via -r requirements-in.txt, ppocr-onnx
opencv-python-headless==4.11.0.86 # via onnxocr
packaging==20.9 # via deprecation, onnxruntime, uiautomator2
paho-mqtt==1.6.1 # via -r requirements-in.txt
pillow==10.2.0 # via ppocr-onnx, uiautomator2
pdf2image==1.17.0 # via onnxocr
pillow==10.2.0 # via pdf2image, ppocr-onnx, uiautomator2
ppocr-onnx==0.0.3.9 # via -r requirements-in.txt
proces==0.1.7 # via cn2an
progress==1.6 # via uiautomator2
prompt-toolkit==3.0.52 # via frida-tools
protobuf==4.25.1 # via onnxruntime
psutil==6.1.1 # via -r requirements-in.txt
py==1.11.0 # via retry
pyclipper==1.3.0.post5 # via ppocr-onnx
pyclipper==1.3.0.post5 # via onnxocr, ppocr-onnx
pycparser==2.21 # via cffi
pycryptodome==3.21.0 # via onepush
pydantic==2.10.0 # via -r requirements-in.txt, fastapi
pydantic-core==2.27.0 # via pydantic
pyelftools==0.30 # via apkutils2
pygments==2.17.2 # via frida-tools, rich
pymupdf==1.27.2 # via onnxocr
pyparsing==3.1.1 # via packaging
pyreadline3==3.4.1 # via humanfriendly
pywin32==306 # via -r requirements-in.txt
Expand All @@ -72,7 +79,7 @@ pyzmq==25.1.2 # via zerorpc
requests==2.31.0 # via adbutils, onepush, ppocr-onnx, uiautomator2
retry==0.9.2 # via adbutils, uiautomator2
rich==13.3.5 # via -r requirements-in.txt
shapely==2.0.2 # via ppocr-onnx
shapely==2.0.2 # via onnxocr, ppocr-onnx
six==1.16.0 # via adbutils, anytree, uiautomator2
sniffio==1.3.0 # via anyio
starlette==0.27.0 # via fastapi
Expand Down
5 changes: 5 additions & 0 deletions tasks/SixRealms/oas_ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from module.base.utils import color_similarity_2d, load_image
# from module.ocr.ocr import Ocr
from module.ocr.base_ocr import BaseCor
from module.ocr.ppocr import TextSystem



Expand All @@ -33,6 +34,10 @@ def rotate_image(image):
return image

def detect_and_ocr(self, *args, **kwargs):
# onnx不需要此补丁
if not isinstance(self.model, TextSystem):
return super().detect_and_ocr(*args, **kwargs)

# Try hard to lower TextSystem.box_thresh
backup = self.model.text_detector.box_thresh
# Patch text_recognizer
Expand Down