Skip to content

Commit f988c8e

Browse files
Creating Generate Screenshot button for Archives which are missing a screenshot.
1 parent c635267 commit f988c8e

File tree

11 files changed

+508
-4
lines changed

11 files changed

+508
-4
lines changed

app/src/Events.js

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,19 @@ function handleEvents(events) {
105105
eventToast('Archive Upload Failed!', message, 'error', 5000);
106106
}
107107

108+
if (event === 'screenshot_generated') {
109+
eventToast(
110+
'Screenshot Generated',
111+
message,
112+
'success',
113+
5000,
114+
() => window.open(url, '_self'));
115+
}
116+
117+
if (event === 'screenshot_generation_failed') {
118+
eventToast('Screenshot Generation Failed!', message, 'error', 5000);
119+
}
120+
108121
if (subject) {
109122
newestEvents[subject] = dt;
110123
}

app/src/api.js

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -750,6 +750,28 @@ export async function getArchive(archiveId) {
750750
}
751751
}
752752

753+
export async function generateArchiveScreenshot(archiveId) {
754+
const response = await apiPost(`${ARCHIVES_API}/${archiveId}/generate_screenshot`);
755+
if (response.ok) {
756+
toast({
757+
type: 'success',
758+
title: 'Screenshot Generation Queued',
759+
description: 'Screenshot generation has been queued. This may take a moment.',
760+
time: 3000,
761+
});
762+
return true;
763+
} else {
764+
const message = await getErrorMessage(response, 'Failed to queue screenshot generation.');
765+
toast({
766+
type: 'error',
767+
title: 'Screenshot Generation Error',
768+
description: message,
769+
time: 5000,
770+
});
771+
return false;
772+
}
773+
}
774+
753775
export async function postDownload(downloadData) {
754776
if (!downloadData.downloader) {
755777
toast({

app/src/components/Archive.js

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ import {
3535
textEllipsis,
3636
useTitle
3737
} from "./Common";
38-
import {deleteArchives, postDownload, tagFileGroup, untagFileGroup} from "../api";
38+
import {deleteArchives, generateArchiveScreenshot, postDownload, tagFileGroup, untagFileGroup} from "../api";
3939
import {Link, Route, Routes, useNavigate, useParams} from "react-router-dom";
4040
import Message from "semantic-ui-react/dist/commonjs/collections/Message";
4141
import {useArchive, useDomains, useSearchArchives, useSearchOrder} from "../hooks/customHooks";
@@ -122,6 +122,14 @@ function ArchivePage() {
122122
}
123123
}
124124

125+
const localGenerateScreenshot = async () => {
126+
const success = await generateArchiveScreenshot(data.id);
127+
if (success) {
128+
// Refresh the archive data after a short delay to show the new screenshot
129+
setTimeout(() => fetchArchive(), 2000);
130+
}
131+
}
132+
125133
const updateButton = <APIButton
126134
text='Update'
127135
color='green'
@@ -143,6 +151,17 @@ function ArchivePage() {
143151
>
144152
Delete
145153
</APIButton>;
154+
const generateScreenshotButton = !screenshotUrl ? <APIButton
155+
text='Generate Screenshot'
156+
color='yellow'
157+
confirmContent='Generate a screenshot for this archive?'
158+
confirmButton='Generate'
159+
onClick={localGenerateScreenshot}
160+
obeyWROLMode={true}
161+
style={{marginTop: '0.5em'}}
162+
>
163+
Generate Screenshot
164+
</APIButton> : null;
146165

147166
let historyList = <Loader active/>;
148167
if (history && history.length === 0) {
@@ -254,6 +273,7 @@ function ArchivePage() {
254273
{readButton}
255274
{updateButton}
256275
{deleteButton}
276+
{generateScreenshotButton}
257277
</Segment>
258278

259279
<Segment>

docker/archive/main.py

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import os.path
1111
import pathlib
1212
import subprocess
13+
import sys
1314
import tempfile
1415
import traceback
1516
from json import JSONDecodeError
@@ -33,6 +34,12 @@
3334
if not SINGLEFILE_PATH.is_file():
3435
raise FileNotFoundError("Can't find single-file executable!")
3536

37+
BROWSER_EXEC = pathlib.Path('/usr/bin/google-chrome')
38+
if not BROWSER_EXEC.is_file():
39+
print('Unable to find browser!', file=sys.stderr)
40+
sys.exit(1)
41+
42+
3643
# Increase response timeout, archiving can take several minutes.
3744
RESPONSE_TIMEOUT = 10 * 60
3845
config = {
@@ -117,7 +124,7 @@ async def extract_readability(path: str, url: str) -> dict:
117124

118125

119126
async def take_screenshot(url: str) -> bytes:
120-
cmd = '/usr/bin/google-chrome' \
127+
cmd = f'{BROWSER_EXEC}' \
121128
' --headless' \
122129
' --disable-gpu' \
123130
' --no-sandbox' \
@@ -155,6 +162,61 @@ def prepare_bytes(b: bytes) -> str:
155162
return b
156163

157164

165+
@app.post('/screenshot')
166+
async def post_screenshot(request: Request):
167+
"""Generate a screenshot for the provided singlefile."""
168+
url = request.json['url']
169+
singlefile = request.json.get('singlefile')
170+
171+
try:
172+
logger.info(f'Generating screenshot for {url}')
173+
174+
# Decode and decompress the singlefile
175+
if singlefile:
176+
singlefile = base64.b64decode(singlefile)
177+
singlefile = gzip.decompress(singlefile)
178+
179+
if not singlefile:
180+
raise ValueError(f'No singlefile provided for {url}')
181+
182+
# Write singlefile to temp file and screenshot it
183+
# Use html suffix so chrome screenshot recognizes it as an HTML file
184+
with tempfile.NamedTemporaryFile('wb', suffix='.html') as fh:
185+
fh.write(singlefile)
186+
fh.flush()
187+
188+
screenshot = None
189+
try:
190+
# Screenshot the local singlefile
191+
screenshot = await take_screenshot(f'file://{fh.name}')
192+
except Exception as e:
193+
logger.error(f'Failed to take screenshot of {fh.name}', exc_info=e)
194+
195+
# Fall back to URL if local screenshot failed
196+
if not screenshot:
197+
logger.warning(f'Failed to screenshot local singlefile, attempting to screenshot URL: {url}')
198+
try:
199+
screenshot = await take_screenshot(url)
200+
except Exception as e:
201+
logger.error(f'Failed to take screenshot of {url}', exc_info=e)
202+
203+
if not screenshot:
204+
raise ValueError(f'Failed to generate screenshot for {url}')
205+
206+
# Compress for smaller response
207+
screenshot = prepare_bytes(screenshot)
208+
209+
ret = dict(
210+
url=url,
211+
screenshot=screenshot,
212+
)
213+
return response.json(ret)
214+
except Exception as e:
215+
logger.error(f'Failed to generate screenshot for {url}', exc_info=e)
216+
error = str(traceback.format_exc())
217+
return response.json({'error': f'Failed to generate screenshot for {url} traceback is below... \n\n {error}'})
218+
219+
158220
@app.post('/json')
159221
async def post_archive(request: Request):
160222
url = request.json['url']

modules/archive/api.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,10 @@
99

1010
from wrolpi.api_utils import json_response, api_app
1111
from wrolpi.common import logger, wrol_mode_check, api_param_limiter, TRACE_LEVEL
12+
from wrolpi.db import get_db_session
1213
from wrolpi.errors import ValidationError
1314
from wrolpi.events import Events
15+
from wrolpi.files.lib import upsert_file
1416
from wrolpi.schema import JSONErrorResponse
1517
from wrolpi.switches import register_switch_handler, ActivateSwitchMethod
1618
from . import lib, schema
@@ -136,6 +138,49 @@ async def singlefile_upload_switch_handler(url=None):
136138
singlefile_upload_switch_handler: ActivateSwitchMethod
137139

138140

141+
@register_switch_handler('generate_screenshot_switch_handler')
142+
async def generate_screenshot_switch_handler(archive_id=None):
143+
"""Used by `post_generate_screenshot` to generate screenshots in the background"""
144+
q: multiprocessing.Queue = api_app.shared_ctx.archive_screenshots
145+
146+
trace_enabled = logger.isEnabledFor(TRACE_LEVEL)
147+
if trace_enabled:
148+
logger.trace(f'generate_screenshot_switch_handler called for archive_id={archive_id}')
149+
try:
150+
archive_id = q.get_nowait()
151+
except queue.Empty:
152+
if trace_enabled:
153+
logger.trace(f'generate_screenshot_switch_handler called on empty queue')
154+
return
155+
156+
try:
157+
q_size = q.qsize()
158+
except NotImplementedError:
159+
# qsize() is not implemented on macOS
160+
q_size = '?'
161+
logger.info(f'generate_screenshot_switch_handler queue size: {q_size}')
162+
163+
try:
164+
await lib.generate_archive_screenshot(archive_id)
165+
# Always send success event since exceptions are raised on failure
166+
from modules.archive import Archive
167+
archive = lib.get_archive(archive_id=archive_id)
168+
location = archive.location
169+
name = archive.file_group.title or archive.file_group.url
170+
logger.info(f'Generated screenshot for Archive ({q_size}): {archive_id}')
171+
Events.send_screenshot_generated(f'Generated screenshot for: {name}', url=location)
172+
except Exception as e:
173+
logger.error(f'generate_screenshot_switch_handler failed for Archive {archive_id}', exc_info=e)
174+
Events.send_screenshot_generation_failed(f'Failed to generate screenshot: {e}')
175+
raise
176+
177+
# Call this function again so any new screenshot requests can be processed.
178+
generate_screenshot_switch_handler.activate_switch()
179+
180+
181+
generate_screenshot_switch_handler: ActivateSwitchMethod
182+
183+
139184
@archive_bp.post('/upload')
140185
@openapi.definition(
141186
summary='Upload SingleFile from SingleFile browser extension and convert it to an Archive.'
@@ -153,3 +198,28 @@ async def post_upload_singlefile(request: Request):
153198
singlefile_upload_switch_handler.activate_switch(context=dict(url=url))
154199
# Return empty json response because SingleFile extension expects a JSON response.
155200
return json_response(dict(), status=HTTPStatus.OK)
201+
202+
203+
@archive_bp.post('/<archive_id:int>/generate_screenshot')
204+
@openapi.description('Generate a screenshot for an Archive that does not have one')
205+
@openapi.response(HTTPStatus.OK, description='Screenshot generation queued')
206+
@openapi.response(HTTPStatus.NOT_FOUND, JSONErrorResponse)
207+
@openapi.response(HTTPStatus.BAD_REQUEST, JSONErrorResponse)
208+
@wrol_mode_check
209+
async def post_generate_screenshot(_: Request, archive_id: int):
210+
"""Queue a screenshot generation request for an Archive."""
211+
# Verify archive exists
212+
try:
213+
archive = lib.get_archive(archive_id=archive_id)
214+
except Exception:
215+
return json_response({'error': f'Archive {archive_id} not found'}, status=HTTPStatus.NOT_FOUND)
216+
217+
if not archive.singlefile_path:
218+
return json_response({'error': 'Archive has no singlefile'}, status=HTTPStatus.BAD_REQUEST)
219+
220+
# Queue the screenshot generation request
221+
logger.info(f'Queueing screenshot generation for Archive {archive_id}')
222+
api_app.shared_ctx.archive_screenshots.put(archive_id)
223+
generate_screenshot_switch_handler.activate_switch(context=dict(archive_id=archive_id))
224+
225+
return json_response({'message': 'Screenshot generation queued'}, status=HTTPStatus.OK)

modules/archive/lib.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,42 @@ async def request_archive(url: str, singlefile: str = None) -> Tuple[str, Option
156156
return singlefile, readability, screenshot
157157

158158

159+
async def request_screenshot(url: str, singlefile_path: pathlib.Path) -> Optional[bytes]:
160+
"""Send a request to the archive service to generate a screenshot from the singlefile."""
161+
logger.info(f'Sending screenshot request to archive service: {url}')
162+
163+
# Read, compress, and encode the singlefile
164+
singlefile_contents = singlefile_path.read_bytes()
165+
singlefile_compressed = gzip.compress(singlefile_contents)
166+
singlefile_b64 = base64.b64encode(singlefile_compressed).decode()
167+
168+
data = dict(url=url, singlefile=singlefile_b64)
169+
try:
170+
async with aiohttp_post(f'{ARCHIVE_SERVICE}/screenshot', json_=data, timeout=ARCHIVE_TIMEOUT) as response:
171+
status = response.status
172+
contents = await response.json()
173+
if contents and (error := contents.get('error')):
174+
# Report the error from the archive service.
175+
raise Exception(f'Received error from archive service: {error}')
176+
177+
# Compressed base64
178+
screenshot = contents.get('screenshot')
179+
if not screenshot:
180+
logger.warning(f'Failed to get screenshot for {url=}')
181+
return None
182+
183+
logger.debug(f'screenshot request status code {status}')
184+
except Exception as e:
185+
logger.error('Error when requesting screenshot', exc_info=e)
186+
raise
187+
188+
# Decode and decompress.
189+
screenshot = base64.b64decode(screenshot)
190+
screenshot = gzip.decompress(screenshot)
191+
192+
return screenshot
193+
194+
159195
async def model_archive_result(url: str, singlefile: str, readability: dict, screenshot: bytes) -> Archive:
160196
"""
161197
Convert results from ArchiveDownloader into real files. Create Archive record.
@@ -625,3 +661,74 @@ async def singlefile_to_archive(singlefile: bytes) -> Archive:
625661
logger.trace(f'singlefile_to_archive modeling: {url}')
626662
archive: Archive = await model_archive_result(url, singlefile, readability, screenshot)
627663
return archive
664+
665+
666+
async def generate_archive_screenshot(archive_id: int) -> pathlib.Path:
667+
"""
668+
Generate a screenshot for an existing Archive that doesn't have one.
669+
If the Archive already has a screenshot, verify it exists and ensure it's tracked in the FileGroup.
670+
671+
Returns the path to the generated screenshot.
672+
673+
Raises:
674+
ValueError: If Archive has no singlefile
675+
RuntimeError: If screenshot generation fails
676+
"""
677+
from wrolpi.db import get_db_session
678+
679+
with get_db_session() as session:
680+
archive = Archive.find_by_id(archive_id, session=session)
681+
682+
if not archive.singlefile_path:
683+
raise ValueError(f'Cannot generate screenshot for Archive {archive_id}: no singlefile')
684+
685+
# Check if screenshot already exists
686+
if archive.screenshot_path:
687+
# Verify the screenshot file actually exists on disk
688+
if archive.screenshot_path.is_file():
689+
logger.info(f'Archive {archive_id} already has a screenshot, ensuring it is tracked')
690+
# Ensure the screenshot is tracked in FileGroup.files and FileGroup.data
691+
with get_db_session(commit=True) as tracking_session:
692+
archive = Archive.find_by_id(archive_id, session=tracking_session)
693+
file_group = archive.file_group
694+
# append_files uses unique_by_predicate, so this is safe even if already tracked
695+
file_group.append_files(archive.screenshot_path)
696+
697+
# Also update FileGroup.data (same pattern as set_screenshot)
698+
data = dict(file_group.data) if file_group.data else {}
699+
data['screenshot_path'] = str(archive.screenshot_path)
700+
file_group.data = data
701+
702+
archive.validate()
703+
tracking_session.flush()
704+
return archive.screenshot_path
705+
else:
706+
logger.warning(f'Archive {archive_id} has screenshot_path but file does not exist, regenerating')
707+
708+
singlefile_path = archive.singlefile_path
709+
url = archive.file_group.url
710+
711+
# Request screenshot from Archive docker service or generate locally
712+
if DOCKERIZED:
713+
logger.debug(f'Requesting screenshot from archive service for Archive {archive_id}')
714+
screenshot_bytes = await request_screenshot(url, singlefile_path)
715+
else:
716+
logger.debug(f'Generating screenshot locally for Archive {archive_id}')
717+
singlefile_contents = singlefile_path.read_bytes()
718+
screenshot_bytes = html_screenshot(singlefile_contents)
719+
720+
if not screenshot_bytes:
721+
raise RuntimeError(f'Failed to generate screenshot for Archive {archive_id}')
722+
723+
# Save screenshot next to singlefile with same naming pattern
724+
screenshot_path = singlefile_path.with_suffix('.png')
725+
screenshot_path.write_bytes(screenshot_bytes)
726+
logger.info(f'Generated screenshot for Archive {archive_id}: {screenshot_path}')
727+
728+
# Update the Archive to include the new screenshot file
729+
with get_db_session(commit=True) as session:
730+
archive = Archive.find_by_id(archive_id, session=session)
731+
archive.set_screenshot(screenshot_path)
732+
session.flush()
733+
734+
return screenshot_path

0 commit comments

Comments
 (0)