diff --git a/scrapy_price_monitor/.gitignore b/scrapy_price_monitor/.gitignore index 57c0c1e..872c981 100644 --- a/scrapy_price_monitor/.gitignore +++ b/scrapy_price_monitor/.gitignore @@ -8,7 +8,6 @@ __pycache__/ # Distribution / packaging .Python -env/ build/ develop-eggs/ dist/ @@ -20,9 +19,13 @@ lib64/ parts/ sdist/ var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ *.egg-info/ .installed.cfg *.egg +MANIFEST # PyInstaller # Usually these files are written by a python script from a template @@ -37,13 +40,16 @@ pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ +.nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml -*,cover +*.cover +*.py,cover .hypothesis/ +.pytest_cache/ # Translations *.mo @@ -52,6 +58,8 @@ coverage.xml # Django stuff: *.log local_settings.py +db.sqlite3 +db.sqlite3-journal # Flask stuff: instance/ @@ -66,27 +74,58 @@ docs/_build/ # PyBuilder target/ -# IPython Notebook +# Jupyter Notebook .ipynb_checkpoints +# IPython +profile_default/ +ipython_config.py + # pyenv .python-version -# celery beat schedule file +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff celerybeat-schedule +celerybeat.pid -# dotenv -.env +# SageMath parsed files +*.sage.py -# virtualenv -.venv/ +# Environments +.env +.venv +env/ venv/ ENV/ +env.bak/ +venv.bak/ # Spyder project settings .spyderproject +.spyproject # Rope project settings .ropeproject -.scrapy \ No newline at end of file +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +.idea diff --git a/scrapy_price_monitor/README.md b/scrapy_price_monitor/README.md index dc32238..abb2aff 100644 --- a/scrapy_price_monitor/README.md +++ b/scrapy_price_monitor/README.md @@ -2,7 +2,8 @@ Scrapy Price Monitor ==================== This is a simple price monitor built with [Scrapy](https://github.com/scrapy/scrapy) -and [Scrapy Cloud](https://scrapinghub.com/scrapy-cloud). +and [Scrapy Cloud](https://www.zyte.com/scrapy-cloud/). It is an updated version of +[this sample](https://github.com/scrapinghub/sample-projects/tree/master/scrapy_price_monitor/_scrapy_price_monitor_OLD). It is basically a Scrapy project with one spider for each online retailer that we want to monitor prices from. In addition to the spiders, there's a Python @@ -19,11 +20,6 @@ the already supported retailers, just add a new key for that product and add the URL list as its value, such as: { - "headsetlogitech": [ - "https://www.amazon.com/.../B005GTO07O/", - "http://www.bestbuy.com/.../3436118.p", - "http://www.ebay.com/.../110985874014" - ], "NewProduct": [ "http://url.for.retailer.x", "http://url.for.retailer.y", @@ -34,36 +30,26 @@ the URL list as its value, such as: ## Supporting Further Retailers -This project currently only works with 3 online retailers, and you can list them -running: +To add a retailer, just create a spider to handle the product pages from it. +To include a spider for fake-website.com, you could run: - $ scrapy list - amazon.com - bestbuy.com - ebay.com + $ scrapy genspider fake-website.com fake-website.com -If the retailer that you want to monitor is not yet supported, just create a spider -to handle the product pages from it. To include a spider for samsclub.com, you -could run: +And then you can open the newly created `fake_website_com.py` file in your IDE to edit the file. - $ scrapy genspider samsclub.com samsclub.com - -And then, open the spider and add the extraction rules: - - $ scrapy edit samsclub.com - -Have a look at the current spiders and implement the new ones using the same +Have a look at the sample books.toscrape.com spider and implement the new ones using the same structure, subclassing `BaseSpider` instead of `scrapy.Spider`. This way, your spiders will automatically read the URLs list from `resources/urls.json`. ## Customizing the Price Monitor -The price monitor sends an email using Amazon SES service, so to run it you -have to set both `AWS_ACCESS_KEY` and `AWS_SECRET_KEY` variables in -`price_monitor/settings.py`. If you want to use another email service, -you have to rewrite the `send_email_alert` function in -`price_monitor/bin/monitor.py`. +The price monitor script uses an `send_alert` function in the `price_monitor/bin/alert.py` +file to send an alert. The current sample sends an email using Amazon SES +service, so to run it you have to set both `AWS_ACCESS_KEY` and `AWS_SECRET_KEY` +variables in the file, along with details for the email sender and intended recipient. +If you want to use another email service or another form of alert altogether, +you can rewrite this file and include an equivalent `send_alert` function. The price monitor can be further customized via parameters to the `price_monitor/bin/monitor.py` script. We will dig on those parameters @@ -74,17 +60,17 @@ later when showing how to schedule the project on Scrapy Cloud. 1. Clone this repo: - $ git clone git@github.com:stummjr/scrapy_price_monitor.git + $ git clone git@github.com:scrapinghub/sample-projects.git 2. Enter the folder and install the project dependencies: $ cd scrapy_price_monitor $ pip install -r requirements.txt -3. Create a free forever account on Scrapy Cloud: -https://app.scrapinghub.com/account/signup/. +3. Create an account on Zyte: +https://app.zyte.com/ -4. Create a Scrapy project on Scrapy Cloud and copy the project id from the project URL. +4. Scroll to Scrapy Cloud Projects, select Creat Project take note of the project ID in the new project's url. 5. Install [Scrapinghub command line tool (shub)](https://github.com/scrapinghub/shub): @@ -141,9 +127,9 @@ To do that, first add your Scrapy Cloud project id to [settings.py `SHUB_PROJ_ID Then run the spiders via command line: - $ scrapy crawl bestbuy.com + $ scrapy crawl books.toscrape.com -This will run the spider named as `bestbuy.com` and store the scraped data into +This will run the spider named as `books.toscrape.com` and store the scraped data into a Scrapy Cloud collection, under the project you set in the last step. You can also run the price monitor via command line: diff --git a/scrapy_price_monitor/bin/__init__.py b/scrapy_price_monitor/bin/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scrapy_price_monitor/bin/alert.py b/scrapy_price_monitor/bin/alert.py new file mode 100644 index 0000000..39e239f --- /dev/null +++ b/scrapy_price_monitor/bin/alert.py @@ -0,0 +1,30 @@ +# Below is sample code for sending alerts via an ASN Email service +# If you wish to alert through another means such as slack, text, etc replace this section with the appropiate code + +import boto +from jinja2 import Environment, PackageLoader + +from w3lib.html import remove_tags +import logging +logger = logging.getLogger(__name__) + +jinja_env = Environment(loader=PackageLoader('price_monitor', 'alert_template')) + +# settings for Amazon SES email service +AWS_ACCESS_KEY = 'AWS_ACCESS_KEY' +AWS_SECRET_KEY = 'AWS_ACCESS_KEY' +EMAIL_ALERT_FROM = 'Price Monitor ' +EMAIL_ALERT_TO = ['RECEIVER_EMAIL@provider.com'] + + +def send_alert(items): + ses = boto.connect_ses(AWS_ACCESS_KEY, AWS_SECRET_KEY) + html_body = jinja_env.get_template('email.html').render(items=items) + + ses.send_email( + EMAIL_ALERT_FROM, + 'Price drop alert', + remove_tags(html_body), + EMAIL_ALERT_TO, + html_body=html_body + ) \ No newline at end of file diff --git a/scrapy_price_monitor/bin/monitor.py b/scrapy_price_monitor/bin/monitor.py index a9dc370..1be49ef 100644 --- a/scrapy_price_monitor/bin/monitor.py +++ b/scrapy_price_monitor/bin/monitor.py @@ -4,14 +4,10 @@ import os from datetime import datetime, timedelta -import boto -from hubstorage import HubstorageClient -from jinja2 import Environment, PackageLoader from price_monitor import settings from price_monitor.utils import get_product_names, get_retailers_for_product -from w3lib.html import remove_tags - -jinja_env = Environment(loader=PackageLoader('price_monitor', 'templates')) +from price_monitor.collection_helper import CollectionHelper +from bin.alert import send_alert class DealsChecker(object): @@ -42,8 +38,13 @@ class DealsFetcher(object): def __init__(self, product_name, apikey, project_id, hours): self.product_name = product_name - project = HubstorageClient(apikey).get_project(project_id) - self.item_store = project.collections.new_store(product_name) + collection = CollectionHelper( + proj_id=project_id, + collection_name=product_name, + api_key=apikey, + create=True, + ) + self.item_store = collection.store self.load_items_from_last_n_hours(hours) def load_items_from_last_n_hours(self, n=24): @@ -75,21 +76,8 @@ def get_deals(self): return latest_deals, previous_deals -def send_email_alert(items): - ses = boto.connect_ses(settings.AWS_ACCESS_KEY, settings.AWS_SECRET_KEY) - html_body = jinja_env.get_template('email.html').render(items=items) - - ses.send_email( - settings.EMAIL_ALERT_FROM, - 'Price drop alert', - remove_tags(html_body), - settings.EMAIL_ALERT_TO, - html_body=html_body - ) - - def main(args): - items = [] + items = ['stuff'] for prod_name in get_product_names(): fetcher = DealsFetcher(prod_name, args.apikey, args.project, args.days * 24) checker = DealsChecker(*fetcher.get_deals(), args.threshold) @@ -98,7 +86,7 @@ def main(args): items.append(best_deal) if items: - send_email_alert(items) + send_alert(items) def parse_args(): diff --git a/scrapy_price_monitor/price_monitor/templates/email.html b/scrapy_price_monitor/price_monitor/alert_template/email.html similarity index 99% rename from scrapy_price_monitor/price_monitor/templates/email.html rename to scrapy_price_monitor/price_monitor/alert_template/email.html index c51ef0c..af44913 100644 --- a/scrapy_price_monitor/price_monitor/templates/email.html +++ b/scrapy_price_monitor/price_monitor/alert_template/email.html @@ -11,4 +11,3 @@

🎉 Hey, we found a good deal! 🎁

{% endfor %} - diff --git a/scrapy_price_monitor/price_monitor/collection_helper.py b/scrapy_price_monitor/price_monitor/collection_helper.py new file mode 100644 index 0000000..2134118 --- /dev/null +++ b/scrapy_price_monitor/price_monitor/collection_helper.py @@ -0,0 +1,91 @@ +import scrapinghub + + +class CollectionHelper: + """Adapter to make interacting with scraping collection easier""" + def __init__(self, proj_id, collection_name, api_key=None, create=False): + sh_client = scrapinghub.ScrapinghubClient(api_key) + project = sh_client.get_project(proj_id) + collections = project.collections + self.store = collections.get_store(collection_name) + self.writer = self.store.create_writer() + if create: + # a store is created by writing into it + # if create is true, write and then delete a placeholder + self.store.set({'_key': 'placeholder', 'value': 123}) + self.delete(['placeholder']) + + def get(self, key, default=None): + """ + Gets value of key + Args: + key: Key searching for + default: What to return if key not present + Returns: + The value of item with the key in collection or the default if not present. + """ + # I use the .list method here because .get only returns bytes. + search = self.store.list([key]) + if not search: + return default + return search[0]['value'] + + def set(self, key, value, flush=False): + """ + Set value at key + Args: + key: The key for the item + value: Thew value for the item + flush(bool): Whether to flush the writer + """ + self.writer.write({'_key': key, 'value': value}) + if flush: + # This is using a batch writer and will not write if the batch isn't filled + # The flush option will flush the writer, causing anything in the current batch to be written + self.flush_writer() + + def delete(self, keys): + """ + Delete keys from store. + Args: + keys(list): List of keys to delete + """ + self.store.delete(keys) + + def flush_writer(self): + """ + Flush the writer + """ + self.writer.flush() + + def iter_items(self): + """ + Create an iterable over all items in the collection + Returns(generator) + """ + return self.store.iter() + + def list_items(self): + """ + Create a list of all items in the collection + Returns(list) + """ + return list(self.iter_items()) + + def list_keys(self): + """ + Get a list of all keys in the collection + Returns(list) + """ + items_generator = self.iter_items() + keys = [i['_key'] for i in items_generator] + return keys + + def list_values(self): + """ + Get a list of all keys in the collection + Returns(list) + """ + items_generator = self.iter_items() + values = [i['value'] for i in items_generator] + return values \ No newline at end of file diff --git a/scrapy_price_monitor/price_monitor/items.py b/scrapy_price_monitor/price_monitor/items.py index 20a91f9..03b4a5b 100644 --- a/scrapy_price_monitor/price_monitor/items.py +++ b/scrapy_price_monitor/price_monitor/items.py @@ -1,14 +1,24 @@ -# -*- coding: utf-8 -*- +from scrapy import Item, Field +from scrapy.loader import ItemLoader +from itemloaders.processors import TakeFirst, MapCompose +from price_parser import Price + + +class Product(Item): + url = Field() + title = Field() + price = Field() + product_name = Field() + retailer = Field() + + +class ProductLoader(ItemLoader): + default_item_class = Product + default_output_processor = TakeFirst() + + title_in = MapCompose(lambda x: x.strip()) + price_in = MapCompose(lambda x: Price.fromstring(x).amount_float) -# Define here the models for your scraped items -# -# See documentation in: -# http://doc.scrapy.org/en/latest/topics/items.html -import scrapy -class PriceMonitorItem(scrapy.Item): - # define the fields for your item here like: - # name = scrapy.Field() - pass diff --git a/scrapy_price_monitor/price_monitor/pipelines.py b/scrapy_price_monitor/price_monitor/pipelines.py index 18de561..3c8b4b2 100644 --- a/scrapy_price_monitor/price_monitor/pipelines.py +++ b/scrapy_price_monitor/price_monitor/pipelines.py @@ -1,21 +1,28 @@ -# -*- coding: utf-8 -*- from price_monitor import settings -from hubstorage import HubstorageClient +from price_monitor.collection_helper import CollectionHelper from price_monitor.utils import reversed_timestamp, get_product_names -class CollectionStoragePipeline(object): - +class CollectionStoragePipeline: def open_spider(self, spider): - client = HubstorageClient(auth=settings.SHUB_KEY) - project = client.get_project(settings.SHUB_PROJ_ID) self.data_stores = {} for product_name in get_product_names(): - self.data_stores[product_name] = project.collections.new_store(product_name) + store = CollectionHelper( + proj_id=settings.SHUB_PROJ_ID, + collection_name=product_name, + api_key=settings.SHUB_KEY, + create=True, + ) + self.data_stores[product_name] = store def process_item(self, item, spider): key = "{}-{}-{}".format( reversed_timestamp(), item.get('product_name'), item.get('retailer') ) - self.data_stores[item['product_name']].set({'_key': key, 'value': item}) + store = self.data_stores[item['product_name']] + store.set(key, item) return item + + def close_spider(self, spider): + for store in self.data_stores.values(): + store.flush_writer() diff --git a/scrapy_price_monitor/price_monitor/resources/urls.json b/scrapy_price_monitor/price_monitor/resources/urls.json index 6bc20ba..046257f 100644 --- a/scrapy_price_monitor/price_monitor/resources/urls.json +++ b/scrapy_price_monitor/price_monitor/resources/urls.json @@ -1,27 +1,17 @@ { - "headsetlogitech": [ - "https://www.amazon.com/Logitech-Wireless-Headset-Over-Design/dp/B005GTO07O/", - "http://www.bestbuy.com/site/logitech-h600-wireless-headset-black/3436118.p", - "http://www.ebay.com/itm/N-Logitech-Wireless-Headset-H600-Over-The-Head-Design-981-000341-/110985874014" + "alightintheattic": [ + "https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html", + "https://fakelink" ], - "webcamlogitech": [ - "https://www.amazon.com/Logitech-Widescreen-Calling-Recording-Desktop/dp/B006JH8T3S/", - "http://www.bestbuy.com/site/logitech-c920-pro-webcam-black/4612476.p?skuId=4612476", - "http://www.ebay.com/itm/Logitech-HD-Pro-Webcam-C920-1080p-Widescreen-Video-Calling-and-Recording-/272381890214" + "shakespearessonnets": [ + "https://books.toscrape.com/catalogue/shakespeares-sonnets_989/index.html", + "https://fakelink" ], - "amazonechodot": [ - "https://www.amazon.com/dp/B01DFKC2SO", - "http://www.bestbuy.com/site/amazon-echo-dot/5578851.p?skuId=5578851", - "http://www.ebay.com/itm/Amazon-Echo-Dot-2nd-Generation-w-Alexa-Voice-Media-Device-All-New-2016-/201668562192" + "soumission": [ + "https://books.toscrape.com/catalogue/soumission_998/index.html", + "https://fakelink" ], - "nikoncoolpix": [ - "https://www.amazon.com/Nikon-COOLPIX-B500-Digital-Camera/dp/B01C3LEE9G/", - "http://www.bestbuy.com/site/nikon-coolpix-b500-16-0-megapixel-digital-camera-red/4997500.p?skuId=4997500", - "http://www.ebay.com/itm/Nikon-COOLPIX-B500-Digital-Camera-Red-/162225974018" - ], - "bluemicrophone": [ - "https://www.amazon.com/Blue-Snowball-iCE-Condenser-Microphone/dp/B014PYGTUQ/", - "http://www.bestbuy.com/site/blue-microphones-snowball-usb-cardioid-and-omnidirectional-electret-condenser-vocal-microphone-black/9918056.p?skuId=9918056", - "http://www.ebay.com/itm/Blue-Microphones-Snowball-Black-iCE-Condenser-Microphone-/172260373002" + "other_product": [ + "https://fakelink" ] } diff --git a/scrapy_price_monitor/price_monitor/settings.py b/scrapy_price_monitor/price_monitor/settings.py index 9888b56..0632144 100644 --- a/scrapy_price_monitor/price_monitor/settings.py +++ b/scrapy_price_monitor/price_monitor/settings.py @@ -1,27 +1,14 @@ -# -*- coding: utf-8 -*- import os - BOT_NAME = 'price_monitor' + SPIDER_MODULES = ['price_monitor.spiders'] NEWSPIDER_MODULE = 'price_monitor.spiders' -ROBOTSTXT_OBEY = True - -SHUB_KEY = os.getenv('$SHUB_KEY') -# if you want to run it locally, replace '999999' by your Scrapy Cloud project ID below -SHUB_PROJ_ID = os.getenv('SHUB_JOBKEY', '999999').split('/')[0] +# if you want to run it locally, replace None with your scrapy cloud API key +SHUB_KEY = None +# if you want to run it locally, replace PROJ_ID by your Scrapy Cloud project ID +SHUB_PROJ_ID = os.getenv('SHUB_JOBKEY', 'PROJ_ID').split('/')[0] - -# settings for Amazon SES email service -AWS_ACCESS_KEY = os.getenv('$AWS_ACCESS_KEY') -AWS_SECRET_KEY = os.getenv('$AWS_SECRET_KEY') -EMAIL_ALERT_FROM = 'Price Monitor ' -EMAIL_ALERT_TO = ['RECEIVER_EMAIL@provider.com'] - -# See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html ITEM_PIPELINES = { 'price_monitor.pipelines.CollectionStoragePipeline': 400, } - -AUTOTHROTTLE_ENABLED = True -# HTTPCACHE_ENABLED = True diff --git a/scrapy_price_monitor/price_monitor/spiders/base_spider.py b/scrapy_price_monitor/price_monitor/spiders/_base.py similarity index 87% rename from scrapy_price_monitor/price_monitor/spiders/base_spider.py rename to scrapy_price_monitor/price_monitor/spiders/_base.py index e726c9c..dfc2ad5 100644 --- a/scrapy_price_monitor/price_monitor/spiders/base_spider.py +++ b/scrapy_price_monitor/price_monitor/spiders/_base.py @@ -11,6 +11,6 @@ def start_requests(self): for name, urls in products.items(): for url in urls: if self.name in url: - now = datetime.now().strftime('%Y/%m/%d %H:%M:%S') + now = datetime.now().isoformat() item = {'product_name': name, 'retailer': self.name, 'when': now} yield scrapy.Request(url, meta={'item': item}) diff --git a/scrapy_price_monitor/price_monitor/spiders/amazon.py b/scrapy_price_monitor/price_monitor/spiders/amazon.py deleted file mode 100644 index 0f3ec3c..0000000 --- a/scrapy_price_monitor/price_monitor/spiders/amazon.py +++ /dev/null @@ -1,14 +0,0 @@ -from .base_spider import BaseSpider - - -class AmazonSpider(BaseSpider): - name = "amazon.com" - - def parse(self, response): - item = response.meta.get('item', {}) - item['url'] = response.url - item['title'] = response.css("span#productTitle::text").extract_first("").strip() - item['price'] = float( - response.css("span#priceblock_ourprice::text").re_first("\$(.*)") or 0 - ) - yield item diff --git a/scrapy_price_monitor/price_monitor/spiders/bestbuy.py b/scrapy_price_monitor/price_monitor/spiders/bestbuy.py deleted file mode 100644 index 03c49f6..0000000 --- a/scrapy_price_monitor/price_monitor/spiders/bestbuy.py +++ /dev/null @@ -1,14 +0,0 @@ -from .base_spider import BaseSpider - - -class BestbuySpider(BaseSpider): - name = "bestbuy.com" - - def parse(self, response): - item = response.meta.get('item', {}) - item['url'] = response.url - item['title'] = response.css("div#sku-title > h1 ::text").extract_first().strip() - item['price'] = float( - response.css('div.price-block ::attr(data-customer-price)').extract_first(default=0) - ) - yield item diff --git a/scrapy_price_monitor/price_monitor/spiders/books_toscrape.py b/scrapy_price_monitor/price_monitor/spiders/books_toscrape.py new file mode 100644 index 0000000..aec475b --- /dev/null +++ b/scrapy_price_monitor/price_monitor/spiders/books_toscrape.py @@ -0,0 +1,15 @@ +from price_monitor.spiders._base import BaseSpider +from price_monitor.items import ProductLoader + + +class BooksSpider(BaseSpider): + name = "books.toscrape.com" + + def parse(self, response): + item = response.meta.get('item', {}) + loader = ProductLoader(item=item, response=response) + loader.add_value('url', response.url) + loader.add_css('name', 'h1::text') + loader.add_css('price', '.price_color::text') + yield loader.load_item() + diff --git a/scrapy_price_monitor/price_monitor/spiders/ebay.py b/scrapy_price_monitor/price_monitor/spiders/ebay.py deleted file mode 100644 index 7721fa6..0000000 --- a/scrapy_price_monitor/price_monitor/spiders/ebay.py +++ /dev/null @@ -1,17 +0,0 @@ -from extruct.w3cmicrodata import MicrodataExtractor -from .base_spider import BaseSpider - - -class EbaySpider(BaseSpider): - name = "ebay.com" - - def parse(self, response): - extractor = MicrodataExtractor() - properties = extractor.extract(response.body_as_unicode()).get('items')[0].get('properties', {}) - item = response.meta.get('item', {}) - item['url'] = response.url - item['title'] = properties.get('name').replace('Details about', '').strip() - item['price'] = float( - properties.get('offers', {}).get('properties', {}).get('price', 0) - ) - yield item diff --git a/scrapy_price_monitor/price_monitor/utils.py b/scrapy_price_monitor/price_monitor/utils.py index 8deb616..cc17f6a 100644 --- a/scrapy_price_monitor/price_monitor/utils.py +++ b/scrapy_price_monitor/price_monitor/utils.py @@ -32,4 +32,4 @@ def get_retailers_for_product(product_name): data = json.loads( pkgutil.get_data("price_monitor", "resources/urls.json").decode() ) - return {get_retailer_name_from_url(url) for url in data[product_name]} + return {get_retailer_name_from_url(url) for url in data[product_name]} \ No newline at end of file diff --git a/scrapy_price_monitor/requirements.txt b/scrapy_price_monitor/requirements.txt index 2567afb..0f278d9 100644 --- a/scrapy_price_monitor/requirements.txt +++ b/scrapy_price_monitor/requirements.txt @@ -1,5 +1,9 @@ scrapy +price-parser +scrapinghub + +# sample alert script requirements +# replace with appropriate packages if not using ASN email alerts boto -extruct w3lib -jinja2 +jinja2 \ No newline at end of file diff --git a/scrapy_price_monitor/scrapinghub.yml b/scrapy_price_monitor/scrapinghub.yml index 7a8527c..43503c6 100644 --- a/scrapy_price_monitor/scrapinghub.yml +++ b/scrapy_price_monitor/scrapinghub.yml @@ -1,3 +1,4 @@ -requirements_file: requirements.txt -stacks: - default: scrapy:1.1-py3 +stack: scrapy:2.5 +version: GIT +requirements: + file: requirements.txt diff --git a/scrapy_price_monitor/scrapy.cfg b/scrapy_price_monitor/scrapy.cfg index d34a107..966b999 100644 --- a/scrapy_price_monitor/scrapy.cfg +++ b/scrapy_price_monitor/scrapy.cfg @@ -1,7 +1,7 @@ # Automatically created by: scrapy startproject # # For more information about the [deploy] section see: -# https://scrapyd.readthedocs.org/en/latest/deploy.html +# https://scrapyd.readthedocs.io/en/latest/deploy.html [settings] default = price_monitor.settings diff --git a/scrapy_price_monitor/setup.py b/scrapy_price_monitor/setup.py index 3e0698a..cc29a6e 100644 --- a/scrapy_price_monitor/setup.py +++ b/scrapy_price_monitor/setup.py @@ -3,10 +3,10 @@ from setuptools import setup, find_packages setup( - name='project', - version='1.0', - packages=find_packages(), - package_data={'price_monitor': ['resources/*.json', 'templates/*.html']}, - scripts=['bin/monitor.py'], - entry_points={'scrapy': ['settings = price_monitor.settings']}, + name = 'project', + version = '1.0', + packages = find_packages(), + package_data = {'price_monitor': ['resources/*.json', 'templates/*.html']}, + scripts = ['bin/monitor.py', 'bin/alert.py'], + entry_points = {'scrapy': ['settings = price_monitor.settings']}, )