From 961c417efc23ca067d79ecb84026c6ec54175ecf Mon Sep 17 00:00:00 2001
From: thiswillbeyourgithub
<26625900+thiswillbeyourgithub@users.noreply.github.com>
Date: Sat, 26 Apr 2025 11:27:15 +0200
Subject: [PATCH] new: add crawler for mwmbl search engine
Signed-off-by: thiswillbeyourgithub <26625900+thiswillbeyourgithub@users.noreply.github.com>
---
README.md | 10 ++++++++++
docker-compose.yml | 34 ++++++++++++++++++++++++++++++++++
2 files changed, 44 insertions(+)
diff --git a/README.md b/README.md
index 789fdb6..b499286 100644
--- a/README.md
+++ b/README.md
@@ -237,6 +237,16 @@ Notes: this one is empty by default, add some sites to archive or crawl regulary
+#### mwmbl
+
+Docker image is built from the official [git repository](https://github.com/mwmbl/crawler-script/)
+
+> [Mwmbl](https://github.com/mwmbl/mwmbl) is a non-profit, open source search engine where the community determines the rankings. We aim to be a replacement for commercial search engines such as Google and Bing.
+
+[https://github.com/mwmbl/mwmbl](https://github.com/mwmbl/mwmbl)
+
+
+
---
diff --git a/docker-compose.yml b/docker-compose.yml
index 7cac2d6..c6ba5e8 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -223,6 +223,40 @@ services:
- "com.centurylinklabs.watchtower.scope=goodkarmakit"
+ mwmbl-crawler:
+ # Help crawl for mwmbl.org, the open source, non profit web search engine
+ # More info: https://book.mwmbl.org/
+ # Crawler source: https://github.com/mwmbl/crawler-script
+ environment:
+ - THREADS=1
+ restart: always
+ build:
+ context: .
+ dockerfile_inline: |
+ FROM alpine:latest AS repo
+ RUN apk add --no-cache git
+ WORKDIR /app
+ RUN git clone https://github.com/mwmbl/crawler-script.git .
+
+ FROM python:3.9-slim-bullseye
+ COPY --from=repo /app/entrypoint.sh /
+ RUN chmod +x /entrypoint.sh
+ RUN mkdir -p /srv/mwmbl/crawler-script
+ RUN useradd mwmbl -r -d /srv/mwmbl && \
+ chown mwmbl:mwmbl -R /srv/mwmbl
+ USER mwmbl
+ WORKDIR /srv/mwmbl/crawler-script
+ COPY --from=repo /app/justext /srv/mwmbl/crawler-script/justext
+ COPY --from=repo /app/LICENSE /app/README.md /app/pyproject.toml /app/poetry.lock /app/main.py /srv/mwmbl/crawler-script/
+ RUN python -m venv venv && \
+ . venv/bin/activate && \
+ pip install . && \
+ pip cache purge
+ ENTRYPOINT ["/entrypoint.sh"]
+
+
+
+
### Distribued storage projects
ipfs: