diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c646c0..b38922d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,16 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). +## [0.1.5] - 2026-04-03 + +### Added +- `src/docproc/vision.py` — Async Vision LLM extraction via DeepFellow OpenAI-compatible API +- PDF-to-image conversion using PyMuPDF (zero system dependencies) +- Base64 image encoding and per-page Vision API calls +- Retry logic with exponential backoff (3 attempts, 1s initial delay, 2x factor) +- `pymupdf` dependency for local PDF rendering +- Test suite for Vision module (~27 tests) + ## [0.1.4] - 2026-02-27 ### Added diff --git a/pyproject.toml b/pyproject.toml index 908aff8..f97501f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "docproc" -version = "0.1.4" +version = "0.1.5" requires-python = ">=3.14" dependencies = [ "watchdog>=4.0.0", @@ -10,6 +10,7 @@ dependencies = [ "gradio>=4.0.0", "python-dotenv>=1.0.0", "httpx>=0.28.0", + "pymupdf>=1.25.0", ] [build-system] diff --git a/src/docproc/__init__.py b/src/docproc/__init__.py index bbab024..1276d02 100644 --- a/src/docproc/__init__.py +++ b/src/docproc/__init__.py @@ -1 +1 @@ -__version__ = "0.1.4" +__version__ = "0.1.5" diff --git a/src/docproc/vision.py b/src/docproc/vision.py new file mode 100644 index 0000000..0e58266 --- /dev/null +++ b/src/docproc/vision.py @@ -0,0 +1,197 @@ +"""Vision LLM extraction via DeepFellow OpenAI-compatible API. + +Converts PDF pages to images using PyMuPDF, sends base64-encoded images +to a vision model via chat completions, and returns structured markdown. +Runs async for parallel execution with OCR extraction. +""" + +import asyncio +import base64 +import logging +from pathlib import Path + +import pymupdf +from openai import APIConnectionError, APIStatusError, AsyncOpenAI + +from docproc.config import Config +from docproc.models import VisionResult + +logger = logging.getLogger(__name__) + +SUPPORTED_EXTENSIONS = frozenset({".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".tif"}) +_PDF_EXTENSION = ".pdf" +_IMAGE_DPI = 150 +_MAX_RETRIES = 3 +_INITIAL_DELAY = 1.0 +_BACKOFF_FACTOR = 2.0 +_MAX_TOKENS = 4096 + +_EXTRACTION_PROMPT = """\ +Extract all text content from this document image. +Preserve the structure including headers, paragraphs, and lists. +Format tables as markdown tables. +Note any structural elements (letterhead, signatures, stamps). +Output everything in clean markdown format.\ +""" + + +class VisionError(Exception): + """Raised when Vision extraction fails.""" + + +def _validate_file(file_path: Path) -> None: + if not file_path.is_file(): + msg = f"File not found or not a regular file: {file_path}" + raise VisionError(msg) + ext = file_path.suffix.lower() + if ext not in SUPPORTED_EXTENSIONS: + msg = f"Unsupported file type: {ext}" + raise VisionError(msg) + + +def _pdf_to_images(file_path: Path) -> list[bytes]: + """Render each PDF page as PNG bytes using PyMuPDF.""" + try: + doc = pymupdf.open(file_path) + except Exception as exc: + msg = f"Failed to open PDF: {file_path}: {exc}" + raise VisionError(msg) from exc + try: + images = [] + for page in doc: + pix = page.get_pixmap(dpi=_IMAGE_DPI) + images.append(pix.tobytes("png")) + return images + except Exception as exc: + msg = f"Failed to render PDF pages: {file_path}: {exc}" + raise VisionError(msg) from exc + finally: + doc.close() + + +def _encode_image(image_bytes: bytes) -> str: + return base64.b64encode(image_bytes).decode("utf-8") + + +async def _call_vision_api( + client: AsyncOpenAI, + model: str, + encoded_image: str, +) -> str: + """Send a single image to the vision model with retry on failures.""" + delay = _INITIAL_DELAY + last_error: Exception | None = None + + for attempt in range(1, _MAX_RETRIES + 1): + try: + response = await client.chat.completions.create( + model=model, + messages=[ + { + "role": "user", + "content": [ + {"type": "text", "text": _EXTRACTION_PROMPT}, + { + "type": "image_url", + "image_url": { + "url": f"data:image/png;base64,{encoded_image}" + }, + }, + ], + } + ], + max_tokens=_MAX_TOKENS, + ) + if not response.choices: + msg = "Vision API returned empty choices" + raise VisionError(msg) + content = response.choices[0].message.content + if not content: + logger.warning( + "Vision API returned empty content on attempt %d", + attempt, + ) + return content or "" + + except APIStatusError as exc: + if exc.status_code >= 500: + last_error = exc + logger.warning( + "Vision attempt %d/%d failed (HTTP %d): %s", + attempt, + _MAX_RETRIES, + exc.status_code, + str(exc)[:200], + ) + if attempt < _MAX_RETRIES: + await asyncio.sleep(delay) + delay *= _BACKOFF_FACTOR + continue + msg = f"Client error {exc.status_code}: {exc}" + raise VisionError(msg) from exc + + except APIConnectionError as exc: + last_error = exc + logger.warning( + "Vision attempt %d/%d failed with connection error: %s", + attempt, + _MAX_RETRIES, + exc, + ) + if attempt < _MAX_RETRIES: + await asyncio.sleep(delay) + delay *= _BACKOFF_FACTOR + + msg = f"Vision extraction failed after {_MAX_RETRIES} attempts" + raise VisionError(msg) from last_error + + +async def extract_with_vision(file_path: Path, config: Config) -> VisionResult: + """Extract content from a document using a Vision LLM model. + + Converts PDF pages to images, sends each to the vision model, + and returns structured markdown content. + + Args: + file_path: Path to PDF or image file. + config: Application configuration. + + Returns: + VisionResult with markdown content. + + Raises: + VisionError: If extraction fails after retries. + """ + _validate_file(file_path) + + ext = file_path.suffix.lower() + if ext == _PDF_EXTENSION: + image_pages = _pdf_to_images(file_path) + else: + try: + image_pages = [file_path.read_bytes()] + except OSError as exc: + msg = f"Failed to read file {file_path}: {exc}" + raise VisionError(msg) from exc + + logger.info( + "Starting Vision extraction: %s (%d pages)", + file_path.name, + len(image_pages), + ) + + client = AsyncOpenAI( + base_url=config.deepfellow.base_url, + api_key=config.deepfellow.api_key, + ) + + page_results = [] + for i, page_bytes in enumerate(image_pages, 1): + encoded = _encode_image(page_bytes) + text = await _call_vision_api(client, config.deepfellow.vision_model, encoded) + page_results.append(text) + logger.debug("Vision page %d/%d complete", i, len(image_pages)) + + content = "\n\n".join(page_results) + logger.info("Vision complete: %s (%d pages)", file_path.name, len(image_pages)) + return VisionResult(content=content) diff --git a/tests/test_init.py b/tests/test_init.py index d5bbd46..639fd7e 100644 --- a/tests/test_init.py +++ b/tests/test_init.py @@ -2,4 +2,4 @@ def test_version_matches_expected(): - assert __version__ == "0.1.4" + assert __version__ == "0.1.5" diff --git a/tests/test_vision.py b/tests/test_vision.py new file mode 100644 index 0000000..06a4afc --- /dev/null +++ b/tests/test_vision.py @@ -0,0 +1,376 @@ +"""Tests for the Vision LLM extraction module.""" + +from unittest import mock + +import pytest +from openai import APIConnectionError, APIStatusError + +from docproc.config import Config +from docproc.vision import ( + VisionError, + _call_vision_api, + _encode_image, + _pdf_to_images, + _validate_file, + extract_with_vision, +) + + +def _make_config(**overrides: str) -> Config: + """Build a minimal frozen Config for testing.""" + deepfellow = { + "base_url": "http://localhost:8000", + "responses_endpoint": "/v1/responses", + "ocr_endpoint": "/v1/ocr", + "api_key": "test-key", + "vision_model": "llama3.2-vision:11b", + "llm_model": "deepseek", + "rag_collection": "documents", + **overrides, + } + return Config( + directories={"watch": "/tmp/inbox", "output": "/tmp/output"}, + deepfellow=deepfellow, + recipients=[{"name": "Test", "tags": ["t1"]}], + ) + + +def _mock_completion(content: str | None = "Extracted text"): + """Build a mock ChatCompletion response.""" + message = mock.Mock() + message.content = content + choice = mock.Mock() + choice.message = message + response = mock.Mock() + response.choices = [choice] + return response + + +def _make_api_status_error(status_code: int, message: str = "error"): + """Build a mock APIStatusError.""" + response = mock.Mock() + response.status_code = status_code + response.headers = {} + return APIStatusError( + message=message, + response=response, + body=None, + ) + + +# --- _validate_file --- + + +@pytest.mark.parametrize("ext", [".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".tif"]) +def test_validate_file_accepts_supported_extensions(tmp_path, ext): + f = tmp_path / f"doc{ext}" + f.touch() + _validate_file(f) # should not raise + + +def test_validate_file_rejects_unsupported_extension(tmp_path): + f = tmp_path / "doc.docx" + f.touch() + with pytest.raises(VisionError, match="Unsupported file type"): + _validate_file(f) + + +def test_validate_file_rejects_missing_file(tmp_path): + f = tmp_path / "missing.pdf" + with pytest.raises(VisionError, match="File not found"): + _validate_file(f) + + +@pytest.mark.parametrize("ext", [".PDF", ".Png", ".JPG"]) +def test_validate_file_is_case_insensitive(tmp_path, ext): + f = tmp_path / f"doc{ext}" + f.touch() + _validate_file(f) # should not raise + + +# --- _pdf_to_images --- + + +@mock.patch("docproc.vision.pymupdf") +def test_pdf_to_images_renders_pages(mock_pymupdf, tmp_path): + pdf = tmp_path / "doc.pdf" + pdf.write_bytes(b"%PDF-fake") + + mock_pix = mock.Mock() + mock_pix.tobytes.return_value = b"png-bytes" + mock_page = mock.Mock() + mock_page.get_pixmap.return_value = mock_pix + + mock_doc = mock.MagicMock() + mock_doc.__iter__ = mock.Mock(return_value=iter([mock_page, mock_page])) + mock_pymupdf.open.return_value = mock_doc + + images = _pdf_to_images(pdf) + + assert len(images) == 2 + assert images[0] == b"png-bytes" + assert mock_page.get_pixmap.call_args.kwargs["dpi"] == 150 + assert mock_doc.close.call_count == 1 + + +@mock.patch("docproc.vision.pymupdf") +def test_pdf_to_images_raises_on_corrupt_file(mock_pymupdf, tmp_path): + pdf = tmp_path / "corrupt.pdf" + pdf.write_bytes(b"not-a-pdf") + mock_pymupdf.open.side_effect = RuntimeError("corrupt") + + with pytest.raises(VisionError, match="Failed to open PDF"): + _pdf_to_images(pdf) + + +# --- _encode_image --- + + +def test_encode_image_returns_base64(): + result = _encode_image(b"hello") + assert result == "aGVsbG8=" + + +# --- _call_vision_api --- + + +async def test_call_vision_api_returns_content(): + client = mock.AsyncMock() + client.chat.completions.create.return_value = _mock_completion("# Title\nContent") + + result = await _call_vision_api(client, "vision-model", "base64data") + + assert result == "# Title\nContent" + assert client.chat.completions.create.call_count == 1 + + +async def test_call_vision_api_raises_on_empty_choices(): + client = mock.AsyncMock() + response = mock.Mock() + response.choices = [] + client.chat.completions.create.return_value = response + + with pytest.raises(VisionError, match="empty choices"): + await _call_vision_api(client, "model", "img") + + +async def test_call_vision_api_returns_empty_string_on_none_content(): + client = mock.AsyncMock() + client.chat.completions.create.return_value = _mock_completion(None) + + result = await _call_vision_api(client, "model", "img") + + assert result == "" + + +async def test_call_vision_api_retries_on_5xx(): + client = mock.AsyncMock() + client.chat.completions.create.side_effect = [ + _make_api_status_error(503, "Service Unavailable"), + _mock_completion("Recovered"), + ] + + with mock.patch("docproc.vision.asyncio.sleep", new_callable=mock.AsyncMock): + result = await _call_vision_api(client, "model", "img") + + assert result == "Recovered" + assert client.chat.completions.create.call_count == 2 + + +async def test_call_vision_api_fails_immediately_on_4xx(): + client = mock.AsyncMock() + client.chat.completions.create.side_effect = _make_api_status_error( + 422, "Unprocessable" + ) + + with pytest.raises(VisionError, match="Client error 422"): + await _call_vision_api(client, "model", "img") + + assert client.chat.completions.create.call_count == 1 + + +async def test_call_vision_api_raises_after_max_retries(): + client = mock.AsyncMock() + client.chat.completions.create.side_effect = _make_api_status_error( + 500, "Internal Server Error" + ) + + with ( + mock.patch("docproc.vision.asyncio.sleep", new_callable=mock.AsyncMock), + pytest.raises(VisionError, match="failed after 3 attempts"), + ): + await _call_vision_api(client, "model", "img") + + assert client.chat.completions.create.call_count == 3 + + +async def test_call_vision_api_retries_on_connection_error(): + client = mock.AsyncMock() + client.chat.completions.create.side_effect = [ + APIConnectionError(request=mock.Mock()), + _mock_completion("OK"), + ] + + with mock.patch("docproc.vision.asyncio.sleep", new_callable=mock.AsyncMock): + result = await _call_vision_api(client, "model", "img") + + assert result == "OK" + assert client.chat.completions.create.call_count == 2 + + +async def test_call_vision_api_raises_after_max_connection_errors(): + client = mock.AsyncMock() + client.chat.completions.create.side_effect = APIConnectionError(request=mock.Mock()) + + with ( + mock.patch("docproc.vision.asyncio.sleep", new_callable=mock.AsyncMock), + pytest.raises(VisionError, match="failed after 3 attempts"), + ): + await _call_vision_api(client, "model", "img") + + assert client.chat.completions.create.call_count == 3 + + +async def test_call_vision_api_exponential_backoff(): + client = mock.AsyncMock() + client.chat.completions.create.side_effect = _make_api_status_error(500, "error") + + mock_sleep = mock.AsyncMock() + with ( + mock.patch("docproc.vision.asyncio.sleep", mock_sleep), + pytest.raises(VisionError), + ): + await _call_vision_api(client, "model", "img") + + assert mock_sleep.call_count == 2 + assert mock_sleep.call_args_list[0].args[0] == 1.0 + assert mock_sleep.call_args_list[1].args[0] == 2.0 + + +# --- extract_with_vision (integration, mocked) --- + + +@mock.patch("docproc.vision.AsyncOpenAI") +@mock.patch("docproc.vision._pdf_to_images") +async def test_extract_with_vision_single_page_pdf(mock_pdf, mock_openai_cls, tmp_path): + pdf = tmp_path / "doc.pdf" + pdf.write_bytes(b"%PDF-fake") + mock_pdf.return_value = [b"png-page-1"] + + mock_client = mock.AsyncMock() + mock_client.chat.completions.create.return_value = _mock_completion("# Page 1") + mock_openai_cls.return_value = mock_client + + result = await extract_with_vision(pdf, _make_config()) + + assert result.content == "# Page 1" + assert result.tables is None + assert result.structural_notes is None + + +@mock.patch("docproc.vision.AsyncOpenAI") +@mock.patch("docproc.vision._pdf_to_images") +async def test_extract_with_vision_multi_page_pdf(mock_pdf, mock_openai_cls, tmp_path): + pdf = tmp_path / "doc.pdf" + pdf.write_bytes(b"%PDF-fake") + mock_pdf.return_value = [b"png-1", b"png-2", b"png-3"] + + mock_client = mock.AsyncMock() + mock_client.chat.completions.create.side_effect = [ + _mock_completion("Page 1 text"), + _mock_completion("Page 2 text"), + _mock_completion("Page 3 text"), + ] + mock_openai_cls.return_value = mock_client + + result = await extract_with_vision(pdf, _make_config()) + + assert result.content == "Page 1 text\n\nPage 2 text\n\nPage 3 text" + assert mock_client.chat.completions.create.call_count == 3 + + +@mock.patch("docproc.vision.AsyncOpenAI") +async def test_extract_with_vision_direct_image(mock_openai_cls, tmp_path): + img = tmp_path / "photo.png" + img.write_bytes(b"fake-png-data") + + mock_client = mock.AsyncMock() + mock_client.chat.completions.create.return_value = _mock_completion("Image content") + mock_openai_cls.return_value = mock_client + + result = await extract_with_vision(img, _make_config()) + + assert result.content == "Image content" + + +async def test_extract_with_vision_rejects_unsupported_file(tmp_path): + docx = tmp_path / "doc.docx" + docx.touch() + + with pytest.raises(VisionError, match="Unsupported file type"): + await extract_with_vision(docx, _make_config()) + + +async def test_extract_with_vision_rejects_missing_file(tmp_path): + missing = tmp_path / "missing.pdf" + + with pytest.raises(VisionError, match="File not found"): + await extract_with_vision(missing, _make_config()) + + +@mock.patch("docproc.vision.AsyncOpenAI") +async def test_extract_with_vision_uses_config_model(mock_openai_cls, tmp_path): + img = tmp_path / "doc.jpg" + img.write_bytes(b"fake-jpg") + + mock_client = mock.AsyncMock() + mock_client.chat.completions.create.return_value = _mock_completion("text") + mock_openai_cls.return_value = mock_client + + await extract_with_vision(img, _make_config(vision_model="my-vision-model")) + + call_kwargs = mock_client.chat.completions.create.call_args.kwargs + assert call_kwargs["model"] == "my-vision-model" + + +@mock.patch("docproc.vision.AsyncOpenAI") +async def test_extract_with_vision_uses_config_base_url(mock_openai_cls, tmp_path): + img = tmp_path / "doc.jpg" + img.write_bytes(b"fake-jpg") + + mock_client = mock.AsyncMock() + mock_client.chat.completions.create.return_value = _mock_completion("text") + mock_openai_cls.return_value = mock_client + + await extract_with_vision(img, _make_config(base_url="http://custom:9000")) + + assert mock_openai_cls.call_args.kwargs["base_url"] == "http://custom:9000" + + +async def test_extract_with_vision_raises_on_image_read_failure(tmp_path): + img = tmp_path / "unreadable.png" + img.write_bytes(b"data") + img.chmod(0o000) + + with pytest.raises(VisionError, match="Failed to read file"): + await extract_with_vision(img, _make_config()) + + img.chmod(0o644) + + +@mock.patch("docproc.vision.pymupdf") +def test_pdf_to_images_raises_on_render_failure(mock_pymupdf, tmp_path): + pdf = tmp_path / "doc.pdf" + pdf.write_bytes(b"%PDF-fake") + + mock_page = mock.Mock() + mock_page.get_pixmap.side_effect = RuntimeError("render failed") + + mock_doc = mock.MagicMock() + mock_doc.__iter__ = mock.Mock(return_value=iter([mock_page])) + mock_pymupdf.open.return_value = mock_doc + + with pytest.raises(VisionError, match="Failed to render PDF pages"): + _pdf_to_images(pdf) + + assert mock_doc.close.call_count == 1 diff --git a/uv.lock b/uv.lock index aef2ec4..6eebfb4 100644 --- a/uv.lock +++ b/uv.lock @@ -184,13 +184,14 @@ wheels = [ [[package]] name = "docproc" -version = "0.1.4" +version = "0.1.5" source = { editable = "." } dependencies = [ { name = "gradio" }, { name = "httpx" }, { name = "openai" }, { name = "pydantic" }, + { name = "pymupdf" }, { name = "python-dotenv" }, { name = "pyyaml" }, { name = "watchdog" }, @@ -211,6 +212,7 @@ requires-dist = [ { name = "httpx", specifier = ">=0.28.0" }, { name = "openai", specifier = ">=1.0.0" }, { name = "pydantic", specifier = ">=2.0.0" }, + { name = "pymupdf", specifier = ">=1.25.0" }, { name = "python-dotenv", specifier = ">=1.0.0" }, { name = "pyyaml", specifier = ">=6.0" }, { name = "watchdog", specifier = ">=4.0.0" }, @@ -751,6 +753,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, ] +[[package]] +name = "pymupdf" +version = "1.27.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/32/f6b645c51d79a188a4844140c5dabca7b487ad56c4be69c4bc782d0d11a9/pymupdf-1.27.2.2.tar.gz", hash = "sha256:ea8fdc3ab6671ca98f629d5ec3032d662c8cf1796b146996b7ad306ac7ed3335", size = 85354380, upload-time = "2026-03-20T09:47:58.386Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/90/88/d01992a50165e22dec057a1129826846c547feb4ba07f42720ac030ce438/pymupdf-1.27.2.2-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:800f43e60a6f01f644343c2213b8613db02eaf4f4ba235b417b3351fa99e01c0", size = 23987563, upload-time = "2026-03-19T12:35:42.989Z" }, + { url = "https://files.pythonhosted.org/packages/6d/0e/9f526bc1d49d8082eff0d1547a69d541a0c5a052e71da625559efaba46a6/pymupdf-1.27.2.2-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:8e2e4299ef1ac0c9dff9be096cbd22783699673abecfa7c3f73173ae06421d73", size = 23263089, upload-time = "2026-03-20T09:44:16.982Z" }, + { url = "https://files.pythonhosted.org/packages/42/be/984f0d6343935b5dd30afaed6be04fc753146bf55709e63ef28bf9ef7497/pymupdf-1.27.2.2-cp310-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:c5e3d54922db1c7da844f1208ac1db05704770988752311f81dd36694ae0a07b", size = 24318817, upload-time = "2026-03-20T09:44:33.209Z" }, + { url = "https://files.pythonhosted.org/packages/22/8e/85e9d9f11dbf34036eb1df283805ef6b885f2005a56d6533bb58ab0b8a11/pymupdf-1.27.2.2-cp310-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:892698c9768457eb0991c102c96a856c0a7062539371df5e6bee0816f3ef498e", size = 24948135, upload-time = "2026-03-20T09:44:51.012Z" }, + { url = "https://files.pythonhosted.org/packages/db/e6/386edb017e5b93f1ab0bf6653ae32f3dd8dfc834ed770212e10ca62f4af9/pymupdf-1.27.2.2-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:8b4bbfa6ef347fade678771a93f6364971c51a2cdc44cd2400dc4eeed1ddb4e6", size = 25169585, upload-time = "2026-03-20T09:45:05.393Z" }, + { url = "https://files.pythonhosted.org/packages/ba/fd/f1ebe24fcd31aaea8b85b3a7ac4c3fc96e20388be5466ace27c9a3c546d9/pymupdf-1.27.2.2-cp310-abi3-win32.whl", hash = "sha256:0b8e924433b7e0bd46be820899300259235997d5a747638471fb2762baa8ee30", size = 18008861, upload-time = "2026-03-20T09:45:21.353Z" }, + { url = "https://files.pythonhosted.org/packages/a8/b6/2a9a8556000199bbf80a5915dcd15d550d1e5288894316445c54726aaf53/pymupdf-1.27.2.2-cp310-abi3-win_amd64.whl", hash = "sha256:09bb53f9486ccb5297030cbc2dbdae845ba1c3c5126e96eb2d16c4f118de0b5b", size = 19238032, upload-time = "2026-03-20T09:45:37.941Z" }, + { url = "https://files.pythonhosted.org/packages/c2/c6/e3e11c42f09b9c34ec332c0f37b817671b59ef4001895b854f0494092105/pymupdf-1.27.2.2-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:6cebfbbdfd219ebdebf4d8e3914624b2e3d3a844c43f4f76935822dd9b13cc12", size = 24985299, upload-time = "2026-03-20T09:45:53.26Z" }, +] + [[package]] name = "pytest" version = "9.0.2"