Skip to content
This repository was archived by the owner on Dec 31, 2023. It is now read-only.

Commit a58e216

Browse files
Better hack for supporting user using OCRmyPDF inside WSL (#2)
1 parent 139635c commit a58e216

File tree

6 files changed

+33
-12
lines changed

6 files changed

+33
-12
lines changed

Dockerfile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,4 +40,7 @@ COPY api/ /app/api
4040

4141
EXPOSE 8000
4242

43+
ENV WORKDIR /workdir
44+
VOLUME /workdir
45+
4346
CMD ["uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "8000"]

api/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def clean_docs():
6666

6767
def do_ocr(_doc: Document):
6868
pool_ocr.acquire()
69-
_doc.ocr()
69+
_doc.ocr(config.enable_wsl_compat)
7070
pool_ocr.release()
7171

7272

api/models.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from pydantic import BaseModel
99

1010
from api.settings import config
11+
from api.tools import special_win_wslpath
1112

1213

1314
class Lang(str, Enum):
@@ -43,20 +44,33 @@ class Document(BaseModel):
4344
expire: datetime
4445
finished: Optional[datetime] = None
4546

46-
def ocr(self):
47+
def ocr(self, wsl: bool = False):
4748
self.status = "processing"
4849
self.processing = datetime.now()
4950
self.save_state()
51+
52+
# Hack for user using OCRmyPDF inside WSL (Windows)
53+
output_txt_path = (
54+
special_win_wslpath(self.output_txt)
55+
if wsl
56+
else str(self.output_txt.absolute())
57+
)
58+
input_path = (
59+
special_win_wslpath(self.input) if wsl else str(self.input.absolute())
60+
)
61+
output_path = (
62+
special_win_wslpath(self.output) if wsl else str(self.output.absolute())
63+
)
5064
try:
5165
output = subprocess.check_output(
5266
" ".join(
5367
[
5468
config.base_command_ocr,
5569
config.base_command_option,
5670
f"-l {'+'.join([l.value for l in self.lang])}",
57-
f"--sidecar {self.output_txt.resolve().relative_to(config.basedir).as_posix()}",
58-
self.input.resolve().relative_to(config.basedir).as_posix(),
59-
self.output.resolve().relative_to(config.basedir).as_posix(),
71+
f"--sidecar {output_txt_path}",
72+
input_path,
73+
output_path,
6074
]
6175
),
6276
stderr=subprocess.STDOUT,

api/settings.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ class Settings(BaseSettings):
1212
base_command_option: str = "--output-type pdf --fast-web-view 0 --optimize 0"
1313
max_ocr_process: int = 1
1414
document_expire_hour: int = 1
15+
enable_wsl_compat: bool = False
1516

1617

1718
config = Settings()

api/tools.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,10 @@ def save_upload_file(upload_file: UploadFile, destination: Path) -> None:
1010
shutil.copyfileobj(upload_file.file, buffer)
1111
finally:
1212
upload_file.file.close()
13+
14+
15+
def special_win_wslpath(path: Path) -> str:
16+
"""
17+
This is a special function returning a compatible path for user of OCRmyPDF inside WSL
18+
"""
19+
return f"`wslpath -a '{str(path)}'`"

tests/test_models.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,13 +48,9 @@ def test_ocr(self, monkeypatch, tmp_path, document_model, subprocess_check_outpu
4848
api.settings.config.base_command_ocr,
4949
api.settings.config.base_command_option,
5050
f"-l {'+'.join([l.value for l in document.lang])}",
51-
f"--sidecar {document.output_txt.resolve().relative_to(api.settings.config.basedir).as_posix()}",
52-
document.input.resolve()
53-
.relative_to(api.settings.config.basedir)
54-
.as_posix(),
55-
document.output.resolve()
56-
.relative_to(api.settings.config.basedir)
57-
.as_posix(),
51+
f"--sidecar {str(document.output_txt.absolute())}",
52+
str(document.input.absolute()),
53+
str(document.output.absolute()),
5854
]
5955
),
6056
stderr=subprocess.STDOUT,

0 commit comments

Comments
 (0)