Skip to content

Commit 2a65689

Browse files
committed
chore: add sample generator and test_inputs (TXT, DOCX) for QA
1 parent 93d751d commit 2a65689

File tree

6 files changed

+101
-0
lines changed

6 files changed

+101
-0
lines changed

WORKLOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,8 @@
1616
- README: added CLI stdin/stdout examples and new flags
1717
- Wiki Usage: documented flags and stdin/stdout example
1818

19+
2025-11-03T01:00:00Z — Test inputs
20+
- Added generator script scripts/make_samples.py
21+
- Created TXT and DOCX samples in test_inputs/
22+
1923

scripts/make_samples.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#!/usr/bin/env python3
2+
import os
3+
from pathlib import Path
4+
import zipfile
5+
6+
ROOT = Path(__file__).resolve().parents[1]
7+
OUT = ROOT / 'test_inputs'
8+
9+
10+
def write_txt_samples():
11+
OUT.mkdir(exist_ok=True)
12+
(OUT / 'sample1_basic.txt').write_text(
13+
'Page 1 of 2\nConfidential\n\nIntroduction\nThis is auto-\nmatic text.\n\n1\n\nPage 2 of 2\nDRAFT\nMain content here.\n',
14+
encoding='utf-8'
15+
)
16+
(OUT / 'sample2_lists_tables.txt').write_text(
17+
'- Item one\n- Item two\n\nName Age City\nAlice 30 Paris\nBob 22 Berlin\n',
18+
encoding='utf-8'
19+
)
20+
(OUT / 'sample3_multilingual.txt').write_text(
21+
'Страница 1 из 1\nЗаголовок\n\nПривет, мир!\nこんにちは 世界\n',
22+
encoding='utf-8'
23+
)
24+
25+
26+
def make_minimal_docx(path: Path, text: str):
27+
path.parent.mkdir(exist_ok=True)
28+
with zipfile.ZipFile(path, 'w') as z:
29+
z.writestr('[Content_Types].xml', (
30+
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
31+
'<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">'
32+
'<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>'
33+
'<Default Extension="xml" ContentType="application/xml"/>'
34+
'<Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>'
35+
'</Types>'
36+
))
37+
z.writestr('_rels/.rels', (
38+
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
39+
'<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
40+
'<Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="/word/document.xml"/>'
41+
'</Relationships>'
42+
))
43+
z.writestr('word/_rels/document.xml.rels', (
44+
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
45+
'<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships" />'
46+
))
47+
# Very simple document.xml containing paragraphs
48+
def para(t):
49+
return f'<w:p><w:r><w:t>{t}</w:t></w:r></w:p>'
50+
body = ''.join(para(p) for p in text.split('\n'))
51+
document_xml = (
52+
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
53+
'<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">'
54+
f'<w:body>{body}</w:body>'
55+
'</w:document>'
56+
)
57+
z.writestr('word/document.xml', document_xml)
58+
59+
60+
def write_docx_sample():
61+
sample_text = 'Page 1\nConfidential\n\nTitle\nThis is auto-\nmatic text in DOCX.'
62+
make_minimal_docx(OUT / 'sample4_simple.docx', sample_text)
63+
64+
65+
def main():
66+
write_txt_samples()
67+
write_docx_sample()
68+
print(f"Samples written to {OUT}")
69+
70+
71+
if __name__ == '__main__':
72+
main()
73+
74+

test_inputs/sample1_basic.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
Page 1 of 2
2+
Confidential
3+
4+
Introduction
5+
This is auto-
6+
matic text.
7+
8+
1
9+
10+
Page 2 of 2
11+
DRAFT
12+
Main content here.
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
- Item one
2+
- Item two
3+
4+
Name Age City
5+
Alice 30 Paris
6+
Bob 22 Berlin
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Страница 1 из 1
2+
Заголовок
3+
4+
Привет, мир!
5+
こんにちは 世界

test_inputs/sample4_simple.docx

1.72 KB
Binary file not shown.

0 commit comments

Comments
 (0)