From 961c417efc23ca067d79ecb84026c6ec54175ecf Mon Sep 17 00:00:00 2001 From: thiswillbeyourgithub <26625900+thiswillbeyourgithub@users.noreply.github.com> Date: Sat, 26 Apr 2025 11:27:15 +0200 Subject: [PATCH] new: add crawler for mwmbl search engine Signed-off-by: thiswillbeyourgithub <26625900+thiswillbeyourgithub@users.noreply.github.com> --- README.md | 10 ++++++++++ docker-compose.yml | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/README.md b/README.md index 789fdb6..b499286 100644 --- a/README.md +++ b/README.md @@ -237,6 +237,16 @@ Notes: this one is empty by default, add some sites to archive or crawl regulary
+#### mwmbl + +Docker image is built from the official [git repository](https://github.com/mwmbl/crawler-script/) + +> [Mwmbl](https://github.com/mwmbl/mwmbl) is a non-profit, open source search engine where the community determines the rankings. We aim to be a replacement for commercial search engines such as Google and Bing. + +[https://github.com/mwmbl/mwmbl](https://github.com/mwmbl/mwmbl) + +
+ ---
diff --git a/docker-compose.yml b/docker-compose.yml index 7cac2d6..c6ba5e8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -223,6 +223,40 @@ services: - "com.centurylinklabs.watchtower.scope=goodkarmakit" + mwmbl-crawler: + # Help crawl for mwmbl.org, the open source, non profit web search engine + # More info: https://book.mwmbl.org/ + # Crawler source: https://github.com/mwmbl/crawler-script + environment: + - THREADS=1 + restart: always + build: + context: . + dockerfile_inline: | + FROM alpine:latest AS repo + RUN apk add --no-cache git + WORKDIR /app + RUN git clone https://github.com/mwmbl/crawler-script.git . + + FROM python:3.9-slim-bullseye + COPY --from=repo /app/entrypoint.sh / + RUN chmod +x /entrypoint.sh + RUN mkdir -p /srv/mwmbl/crawler-script + RUN useradd mwmbl -r -d /srv/mwmbl && \ + chown mwmbl:mwmbl -R /srv/mwmbl + USER mwmbl + WORKDIR /srv/mwmbl/crawler-script + COPY --from=repo /app/justext /srv/mwmbl/crawler-script/justext + COPY --from=repo /app/LICENSE /app/README.md /app/pyproject.toml /app/poetry.lock /app/main.py /srv/mwmbl/crawler-script/ + RUN python -m venv venv && \ + . venv/bin/activate && \ + pip install . && \ + pip cache purge + ENTRYPOINT ["/entrypoint.sh"] + + + + ### Distribued storage projects ipfs: