Open
Description
# example.py
import scrapy
from playwright.async_api import Page
class ExampleSpider(scrapy.Spider):
name = "example"
custom_settings = {
"TWISTED_REACTOR": "twisted.internet.asyncioreactor.AsyncioSelectorReactor",
"DOWNLOAD_HANDLERS": {
"https": "scrapy_playwright.handler.ScrapyPlaywrightDownloadHandler",
},
"_PLAYWRIGHT_THREADED_LOOP": True, # private setting, used for development & testing
}
def start_requests(self):
yield scrapy.Request(
url="https://example.org",
meta={"playwright": True, "playwright_include_page": True},
)
async def parse(self, response):
page: Page = response.meta["playwright_page"]
await page.screenshot(path="example.png", full_page=True)
await page.close()
$ scrapy runspider example.py
(...)
2024-07-23 16:09:14 [scrapy.core.scraper] ERROR: Spider error processing <GET https://example.org> (referer: None)
Traceback (most recent call last):
File "/.../scrapy-playwright/venv-scrapy-playwright/lib/python3.10/site-packages/twisted/internet/defer.py", line 1248, in adapt
extracted: _SelfResultT | Failure = result.result()
File "/.../scrapy-playwright/examples/books.py", line 23, in parse
await page.screenshot(path="example.png", full_page=True)
File "/.../scrapy-playwright/venv-scrapy-playwright/lib/python3.10/site-packages/playwright/async_api/_generated.py", line 9334, in screenshot
await self._impl_obj.screenshot(
File "/.../scrapy-playwright/venv-scrapy-playwright/lib/python3.10/site-packages/playwright/_impl/_page.py", line 734, in screenshot
encoded_binary = await self._channel.send("screenshot", params)
File "/.../scrapy-playwright/venv-scrapy-playwright/lib/python3.10/site-packages/playwright/_impl/_connection.py", line 59, in send
return await self._connection.wrap_api_call(
File "/.../scrapy-playwright/venv-scrapy-playwright/lib/python3.10/site-packages/playwright/_impl/_connection.py", line 514, in wrap_api_call
raise rewrite_error(error, f"{parsed_st['apiName']}: {error}") from None
ValueError: Page.screenshot: The future belongs to a different loop than the one specified as the loop argument
2024-07-23 16:09:14 [scrapy.core.engine] INFO: Closing spider (finished)
(...)
This error only affects Windows, it's caused by the threaded loop implementation.