From 91d5b5141c739fbeec9b45fbafadd455de2844f5 Mon Sep 17 00:00:00 2001 From: Jake Howard Date: Fri, 25 Nov 2022 14:25:27 +0000 Subject: [PATCH 1/3] Add support for wildcard hosts This uses the same implementation as `ALLOWED_HOSTS` --- README.md | 15 +++++++++++---- simple_robots/tests/tests.py | 13 +++++++++++++ simple_robots/views.py | 14 +++++++++----- 3 files changed, 33 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index a192e15..6039a51 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ django-simple-robots Most web applications shouldn't be indexed by Google. This app just provides a view that serves a "deny all" robots.txt. -In some cases, you do want your app to be indexed - but only in your production environment (not any staging environments). For this case, you can set `ROBOTS_ALLOW_HOST`. If the incoming hostname matches this setting, an "allow all" robots.txt will be served. Otherwise, the "deny all" will be served. +In some cases, you do want your app to be indexed - but only in your production environment (not any staging environments). For this case, you can set `ROBOTS_ALLOW_HOSTS`. If the incoming hostname matches this setting, an "allow all" robots.txt will be served. Otherwise, the "deny all" will be served. Tested against Django 2.2, 3.2 and 4.0 on Python 3.6, 3.7, 3.8, 3.9 and 3.10 @@ -26,14 +26,21 @@ In your root urlconf, add an entry as follows: # ..... other stuff ] -Then, add `simple_robots` to `INSTALLED_APPS` in your `settings.py` +Then, add `simple_robots` to `INSTALLED_APPS` in your `settings.py`. -Optionally, set `ROBOTS_ALLOW_HOST` settings variable. +Optionally, set `ROBOTS_ALLOW_HOSTS` settings variable. - ROBOTS_ALLOW_HOST = "myproductionurl.com" + ROBOTS_ALLOW_HOSTS = ["myproductionurl.com"] + +`ROBOTS_ALLOW_HOSTS` also supports multiple options, similar to [`ALLOWED_HOSTS`](https://docs.djangoproject.com/en/stable/ref/settings/#allowed-hosts): + + # Allow all subdomains of `myproductionurl.com` (including the apex) and exactly `myotherproductionurl.com` (no subdomains) + ROBOTS_ALLOW_HOSTS = [".myproductionurl.com", "myotherproductionurl.com"] That's it! +Note: Previous versions used `ROBOTS_ALLOW_HOST` to specify a single allowed host. This setting still exists for backwards compatibility. + ### Customization The allow and disallow template are stored at `robots.txt` and `robots-disallow.txt` respectively. You can override these in your projects templates directory to customize the responses. diff --git a/simple_robots/tests/tests.py b/simple_robots/tests/tests.py index 83a527b..c13ce92 100644 --- a/simple_robots/tests/tests.py +++ b/simple_robots/tests/tests.py @@ -13,6 +13,19 @@ def test_allow_if_host_matches(self): response = self.client.get("/robots.txt", HTTP_HOST="test.com") self.assertEqual(response.content, b"User-agent: *\nAllow: /\n") + @override_settings(ROBOTS_ALLOW_HOST=".test.com", ALLOWED_HOSTS=[".test.com"]) + def test_allow_if_host_matches_wildcard(self): + response = self.client.get("/robots.txt", HTTP_HOST="example.test.com") + self.assertEqual(response.content, b"User-agent: *\nAllow: /\n") + + @override_settings( + ROBOTS_ALLOW_HOSTS=["example.test.com", "example2.test.com"], + ALLOWED_HOSTS=[".test.com"], + ) + def test_allow_if_host_matches_multiple(self): + response = self.client.get("/robots.txt", HTTP_HOST="example2.test.com") + self.assertEqual(response.content, b"User-agent: *\nAllow: /\n") + @override_settings( ROBOTS_ALLOW_HOST="test.com", ALLOWED_HOSTS=["test.com", "somethingelse.com"] ) diff --git a/simple_robots/views.py b/simple_robots/views.py index 87c039d..bdf4930 100644 --- a/simple_robots/views.py +++ b/simple_robots/views.py @@ -1,7 +1,7 @@ from django.conf import settings +from django.http.request import validate_host from django.views.generic import TemplateView -ROBOTS_ALLOW_HOST_SETTING = "ROBOTS_ALLOW_HOST" ROBOTS_ALLOW_TEMPLATE = "robots.txt" ROBOTS_DISALLOW_TEMPLATE = "robots-disallow.txt" @@ -9,11 +9,15 @@ class ServeRobotsView(TemplateView): content_type = "text/plain" + def get_allowed_hosts(self): + # Maintain singular setting for backwards compatibility + if getattr(settings, "ROBOTS_ALLOW_HOST", ""): + return [settings.ROBOTS_ALLOW_HOST] + + return getattr(settings, "ROBOTS_ALLOW_HOSTS", []) + def get_template_names(self): - if ( - getattr(settings, ROBOTS_ALLOW_HOST_SETTING, None) - == self.request.get_host() - ): + if validate_host(self.request.get_host(), self.get_allowed_hosts()): return ROBOTS_ALLOW_TEMPLATE return ROBOTS_DISALLOW_TEMPLATE From 50f89115efe79b2a93b54fed835d6a285f76510c Mon Sep 17 00:00:00 2001 From: Jake Howard Date: Fri, 25 Nov 2022 14:26:29 +0000 Subject: [PATCH 2/3] Add syntax highlighting to readme code snippets --- README.md | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 6039a51..78fcdba 100644 --- a/README.md +++ b/README.md @@ -14,28 +14,36 @@ Tested against Django 2.2, 3.2 and 4.0 on Python 3.6, 3.7, 3.8, 3.9 and 3.10 Install from PIP - pip install django-simple-robots +```bash +pip install django-simple-robots +``` In your root urlconf, add an entry as follows: - from django.conf.urls import url - from simple_robots.views import serve_robots +```python +from django.conf.urls import url +from simple_robots.views import serve_robots - urlpatterns = [ - path("robots.txt", serve_robots), - # ..... other stuff - ] +urlpatterns = [ + path("robots.txt", serve_robots), + # ..... other stuff +] +``` Then, add `simple_robots` to `INSTALLED_APPS` in your `settings.py`. Optionally, set `ROBOTS_ALLOW_HOSTS` settings variable. - ROBOTS_ALLOW_HOSTS = ["myproductionurl.com"] +```python +ROBOTS_ALLOW_HOSTS = ["myproductionurl.com"] +``` `ROBOTS_ALLOW_HOSTS` also supports multiple options, similar to [`ALLOWED_HOSTS`](https://docs.djangoproject.com/en/stable/ref/settings/#allowed-hosts): - # Allow all subdomains of `myproductionurl.com` (including the apex) and exactly `myotherproductionurl.com` (no subdomains) - ROBOTS_ALLOW_HOSTS = [".myproductionurl.com", "myotherproductionurl.com"] +```python +# Allow all subdomains of `myproductionurl.com` (including the apex) and exactly `myotherproductionurl.com` (no subdomains) +ROBOTS_ALLOW_HOSTS = [".myproductionurl.com", "myotherproductionurl.com"] +``` That's it! From 51fe4c54f12e4f736088303e344af86ae332469f Mon Sep 17 00:00:00 2001 From: Jake Howard Date: Fri, 25 Nov 2022 14:30:28 +0000 Subject: [PATCH 3/3] Bump black to fix CI issues See https://github.com/psf/black/issues/2964 --- dev-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 39b8955..c4d94f1 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,3 +1,3 @@ -black==21.11b1 +black==22.8.0 flake8==4.0.1 isort==5.10.1