Skip to content

Commit 154025f

Browse files
committed
refactor: 优化解析器日志与API检查逻辑,简化异常处理
1 parent 587d1b2 commit 154025f

File tree

2 files changed

+12
-11
lines changed

2 files changed

+12
-11
lines changed

docreader/parser/chain_parser.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from docreader.utils import endecode
77

88
logger = logging.getLogger(__name__)
9+
logger.setLevel(logging.INFO)
910

1011

1112
class FirstParser(BaseParser):
@@ -16,16 +17,15 @@ def __init__(self, *args, **kwargs):
1617

1718
self._parsers: List[BaseParser] = []
1819
for parser_cls in self._parser_cls:
19-
try:
20-
parser = parser_cls(*args, **kwargs)
21-
self._parsers.append(parser)
22-
except Exception as e:
23-
logger.error(f"Failed to create parser {parser_cls.__name__}: {e}")
20+
parser = parser_cls(*args, **kwargs)
21+
self._parsers.append(parser)
2422

2523
def parse_into_text(self, content: bytes) -> Document:
2624
for p in self._parsers:
25+
logger.info(f"FirstParser: using parser {p.__class__.__name__}")
2726
document = p.parse_into_text(content)
2827
if document.is_valid():
28+
logger.info(f"FirstParser: parser {p.__class__.__name__} succeeded")
2929
return document
3030
return Document()
3131

@@ -43,16 +43,14 @@ def __init__(self, *args, **kwargs):
4343

4444
self._parsers: List[BaseParser] = []
4545
for parser_cls in self._parser_cls:
46-
try:
47-
parser = parser_cls(*args, **kwargs)
48-
self._parsers.append(parser)
49-
except Exception as e:
50-
logger.error(f"Failed to create parser {parser_cls.__name__}: {e}")
46+
parser = parser_cls(*args, **kwargs)
47+
self._parsers.append(parser)
5148

5249
def parse_into_text(self, content: bytes) -> Document:
5350
images: Dict[str, str] = {}
5451
document = Document()
5552
for p in self._parsers:
53+
logger.info(f"PipelineParser: using parser {p.__class__.__name__}")
5654
document = p.parse_into_text(content)
5755
content = endecode.encode_bytes(document.content)
5856
images.update(document.images)

docreader/parser/mineru_parser.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ def __init__(
2828
self.image_helper = MarkdownImageUtil()
2929
self.base64_pattern = re.compile(r"data:image/(\w+);base64,(.*)")
3030
self.enable = self.ping()
31-
assert self.ping(), "MinerU API is not reachable"
3231

3332
def ping(self, timeout: int = 5) -> bool:
3433
try:
@@ -41,6 +40,10 @@ def ping(self, timeout: int = 5) -> bool:
4140
return False
4241

4342
def parse_into_text(self, content: bytes) -> Document:
43+
if not self.enable:
44+
logger.debug("MinerU API is not enabled")
45+
return Document()
46+
4447
logger.info(f"Parsing scanned PDF via MinerU API (size: {len(content)} bytes)")
4548
md_content: str = ""
4649
images_b64: Dict[str, str] = {}

0 commit comments

Comments
 (0)