diff --git a/.dockerignore b/.dockerignore index f201e9225..956c90ab5 100644 --- a/.dockerignore +++ b/.dockerignore @@ -4,3 +4,4 @@ _build deps .elixir_ls priv +native/philomena/target diff --git a/config/runtime.exs b/config/runtime.exs index e5c35a870..64bd8090b 100644 --- a/config/runtime.exs +++ b/config/runtime.exs @@ -27,6 +27,7 @@ config :philomena, image_url_root: System.fetch_env!("IMAGE_URL_ROOT"), badge_url_root: System.fetch_env!("BADGE_URL_ROOT"), mailer_address: System.fetch_env!("MAILER_ADDRESS"), + mediaproc_addr: System.fetch_env!("MEDIAPROC_ADDR"), tag_file_root: System.fetch_env!("TAG_FILE_ROOT"), site_domains: System.fetch_env!("SITE_DOMAINS"), tag_url_root: System.fetch_env!("TAG_URL_ROOT"), diff --git a/docker-compose.yml b/docker-compose.yml index 1f13cb2f7..b1b62e481 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -30,6 +30,7 @@ services: - IMAGE_URL_ROOT=/img - BADGE_URL_ROOT=/badge-img - TAG_URL_ROOT=/tag-img + - MEDIAPROC_ADDR=mediaproc:1500 - OPENSEARCH_URL=http://opensearch:9200 - REDIS_HOST=valkey - DATABASE_URL=ecto://postgres:postgres@postgres/philomena_dev @@ -52,6 +53,7 @@ services: - app_deps_data:/srv/philomena/deps - app_native_data:/srv/philomena/priv/native depends_on: + - mediaproc - postgres - opensearch - valkey @@ -89,6 +91,18 @@ services: - .:/srv/philomena attach: false + mediaproc: + build: + context: . + dockerfile: ./docker/mediaproc/Dockerfile + attach: false + deploy: + resources: + limits: + cpus: '4' + memory: 8gb + pids: 8192 + web: build: context: . diff --git a/docker/app/Dockerfile b/docker/app/Dockerfile index 69e16ab8f..4780e50b7 100644 --- a/docker/app/Dockerfile +++ b/docker/app/Dockerfile @@ -1,28 +1,12 @@ FROM elixir:1.18.1-alpine -ADD https://api.github.com/repos/philomena-dev/FFmpeg/git/refs/heads/release/6.1 /tmp/ffmpeg_version.json -RUN (echo "https://github.com/philomena-dev/prebuilt-ffmpeg/raw/master"; cat /etc/apk/repositories) > /tmp/repositories \ - && cp /tmp/repositories /etc/apk/repositories \ - && apk update --allow-untrusted \ - && apk add inotify-tools build-base git ffmpeg ffmpeg-dev npm nodejs file-dev libjpeg-turbo-dev libpng-dev gifsicle optipng libjpeg-turbo-utils librsvg rsvg-convert imagemagick postgresql16-client wget rust cargo --allow-untrusted \ +RUN apk add inotify-tools build-base git npm nodejs postgresql16-client wget rust cargo \ && mix local.hex --force \ && mix local.rebar --force -ADD https://api.github.com/repos/philomena-dev/cli_intensities/git/refs/heads/master /tmp/cli_intensities_version.json -RUN git clone --depth 1 https://github.com/philomena-dev/cli_intensities /tmp/cli_intensities \ - && cd /tmp/cli_intensities \ - && make -j$(nproc) install - -ADD https://api.github.com/repos/philomena-dev/mediatools/git/refs/heads/master /tmp/mediatools_version.json -RUN git clone --depth 1 https://github.com/philomena-dev/mediatools /tmp/mediatools \ - && ln -s /usr/lib/librsvg-2.so.2 /usr/lib/librsvg-2.so \ - && cd /tmp/mediatools \ - && make -j$(nproc) install - COPY docker/app/run-development /usr/local/bin/run-development COPY docker/app/run-test /usr/local/bin/run-test -COPY docker/app/safe-rsvg-convert /usr/local/bin/safe-rsvg-convert COPY docker/app/purge-cache /usr/local/bin/purge-cache ENV PATH=$PATH:/root/.cargo/bin EXPOSE 5173 -CMD run-development +CMD ["/usr/local/bin/run-development"] diff --git a/docker/mediaproc/Dockerfile b/docker/mediaproc/Dockerfile new file mode 100644 index 000000000..ebe094f3e --- /dev/null +++ b/docker/mediaproc/Dockerfile @@ -0,0 +1,77 @@ +FROM rust:1.83-slim + +RUN apt update \ + && apt install -y build-essential git libmagic-dev libturbojpeg0-dev libpng-dev \ + gifsicle optipng libjpeg-turbo-progs librsvg2-bin librsvg2-dev file imagemagick \ + libx264-dev libx265-dev libvpx-dev libdav1d-dev libaom-dev libopus-dev \ + libmp3lame-dev libvorbis-dev libwebp-dev libjxl-dev yasm wget + +ADD https://api.github.com/repos/philomena-dev/FFmpeg/git/refs/heads/release/7.1 /tmp/ffmpeg_version.json +ADD https://api.github.com/repos/philomena-dev/cli_intensities/git/refs/heads/master /tmp/cli_intensities_version.json +ADD https://api.github.com/repos/philomena-dev/mediatools/git/refs/heads/master /tmp/mediatools_version.json + +RUN wget -qO /tmp/FFmpeg.tar.gz https://github.com/philomena-dev/FFmpeg/archive/refs/heads/release/7.1.tar.gz \ + && wget -qO /tmp/cli_intensities.tar.gz https://github.com/philomena-dev/cli_intensities/archive/refs/heads/master.tar.gz \ + && wget -qO /tmp/mediatools.tar.gz https://github.com/philomena-dev/mediatools/archive/refs/heads/master.tar.gz + +RUN cd /tmp \ + && tar -xf FFmpeg.tar.gz \ + && tar -xf cli_intensities.tar.gz \ + && tar -xf mediatools.tar.gz \ + && cd /tmp/FFmpeg-release-7.1 \ + && ./configure \ + --prefix=/usr \ + --disable-everything \ + --disable-stripping \ + --disable-static \ + --disable-ffplay \ + --disable-doc \ + --disable-htmlpages \ + --disable-manpages \ + --disable-podpages \ + --disable-txtpages \ + --disable-protocols \ + --enable-shared \ + --enable-pic \ + --enable-pthreads \ + --enable-gpl \ + --enable-avfilter \ + --enable-bsf=extract_extradata \ + --enable-decoder=aac,apng,av1,gif,h264,hevc,jpeg2000,jpegxl,libaom-av1,libdav1d,libvorbis,libvpx_vp8,libvpx_vp9,mp3,mjpeg,opus,png,vorbis,vp8,vp9,webvtt \ + --enable-demuxer=apng,gif,image2,image_gif_pipe,image_jpeg_pipe,image_png_pipe,image_webp_pipe,matroska,mjpeg,mjpeg_2000,mov,webm \ + --enable-encoder=aac,apng,gif,jpegxl,libmp3lame,libaom-av1,libvorbis,libopus,libvpx_vp8,libvpx_vp9,libx265,libx264,opus,mjpeg,png,vorbis,webvtt \ + --enable-filter=concat,palettegen,paletteuse,scale,setpts,setsar,settb,split,trim \ + --enable-libaom \ + --enable-libjxl \ + --enable-libdav1d \ + --enable-libopus \ + --enable-libmp3lame \ + --enable-libvpx \ + --enable-libvorbis \ + --enable-libx264 \ + --enable-libx265 \ + --enable-libwebp \ + --enable-muxer=apng,image2,gif,matroska,mp4,webp,webm \ + --enable-parser=aac,gif,h264,hevc,jpeg2000,jpegxl,mjpeg,opus,png,vorbis,vp8,vp9,webp \ + --enable-protocol=concat,data,file,subfile \ + && make -j$(nproc) install \ + && cd /tmp/cli_intensities-master \ + && make -j$(nproc) install \ + && cd /tmp/mediatools-master \ + && make -j$(nproc) install + +COPY native/philomena /tmp/philomena +COPY docker/mediaproc/safe-rsvg-convert /usr/bin/safe-rsvg-convert +ADD https://github.com/liamwhite/philomena-ris-inference-toolkit/releases/download/v1.0/dinov2-with-registers-base.pt /usr/share/dinov2-with-registers-base.pt + +RUN cd /tmp/philomena \ + && cargo build --release -p mediaproc_server \ + && cp target/release/mediaproc_server /usr/bin/mediaproc_server \ + && find target/release/build -regextype posix-extended -regex '^.*\.so(\.[0-9]+)*$' -exec cp '{}' /usr/lib/ ';' + +# Set up unprivileged user account +RUN useradd -ms /bin/bash mediaproc +USER mediaproc +WORKDIR /home/mediaproc +ENV RUST_LOG=trace +CMD ["/usr/bin/mediaproc_server", "0.0.0.0:1500", "/usr/share/dinov2-with-registers-base.pt"] diff --git a/docker/app/safe-rsvg-convert b/docker/mediaproc/safe-rsvg-convert similarity index 100% rename from docker/app/safe-rsvg-convert rename to docker/mediaproc/safe-rsvg-convert diff --git a/lib/philomena/duplicate_reports.ex b/lib/philomena/duplicate_reports.ex index a9cad67ba..05aa54c34 100644 --- a/lib/philomena/duplicate_reports.ex +++ b/lib/philomena/duplicate_reports.ex @@ -9,6 +9,8 @@ defmodule Philomena.DuplicateReports do alias Ecto.Multi alias Philomena.Repo + alias PhilomenaMedia.Features + alias PhilomenaQuery.Search alias Philomena.DuplicateReports.DuplicateReport alias Philomena.DuplicateReports.SearchQuery alias Philomena.DuplicateReports.Uploader @@ -20,7 +22,7 @@ defmodule Philomena.DuplicateReports do source = Repo.preload(source, :intensity) {source.intensity, source.image_aspect_ratio} - |> find_duplicates(dist: 0.2) + |> find_duplicates_by_intensities(dist: 0.2) |> where([i, _it], i.id != ^source.id) |> Repo.all() |> Enum.map(fn target -> @@ -30,7 +32,77 @@ defmodule Philomena.DuplicateReports do end) end - def find_duplicates({intensities, aspect_ratio}, opts \\ []) do + def find_duplicates_by_features(features = %Features{}, filter, opts \\ []) do + min_score = Keyword.get(opts, :min_score, 0) + limit = Keyword.get(opts, :limit, 25) + + # TODO: many issues with efficient filtering using k-NN plugin, + # use post_filter to work around for the time being + # + # https://github.com/opensearch-project/k-NN/issues/2222 + # https://github.com/opensearch-project/k-NN/issues/2339 + # https://github.com/opensearch-project/k-NN/issues/2347 + + query = %{ + query: %{ + nested: %{ + path: "vectors", + query: %{ + knn: %{ + "vectors.f": %{ + vector: features.features, + k: 100 + } + } + } + } + }, + post_filter: filter, + min_score: min_score + } + + images = + Image + |> Search.search_definition(query, %{page_size: limit}) + |> Search.search_records(preload(Image, [:user, :sources, tags: :aliases])) + + images + |> Map.put(:total_entries, min(images.total_entries, limit)) + |> Map.put(:total_pages, min(images.total_pages, 1)) + end + + @doc """ + Executes the reverse image search query from parameters. + + ## Examples + + iex> execute_search_query_by_features(%{"image" => ...}) + {:ok, [%Image{...}, ....]} + + iex> execute_search_query_by_features(%{"image" => ...}) + {:error, %Ecto.Changeset{}} + + """ + def execute_search_query_by_features(filter, attrs \\ %{}) do + %SearchQuery{} + |> SearchQuery.changeset(attrs) + |> Uploader.analyze_upload(attrs) + |> Ecto.Changeset.apply_action(:create) + |> case do + {:ok, search_query} -> + images = + search_query + |> generate_features() + |> find_duplicates_by_features(filter, limit: search_query.limit) + + {:ok, images} + + error -> + error + end + end + + def find_duplicates_by_intensities({intensities, aspect_ratio}, opts \\ []) do aspect_dist = Keyword.get(opts, :aspect_dist, 0.05) limit = Keyword.get(opts, :limit, 10) dist = Keyword.get(opts, :dist, 0.25) @@ -71,7 +143,7 @@ defmodule Philomena.DuplicateReports do {:error, %Ecto.Changeset{}} """ - def execute_search_query(attrs \\ %{}) do + def execute_search_query_by_intensities(attrs \\ %{}) do %SearchQuery{} |> SearchQuery.changeset(attrs) |> Uploader.analyze_upload(attrs) @@ -85,7 +157,7 @@ defmodule Philomena.DuplicateReports do images = {intensities, aspect} - |> find_duplicates(dist: dist, aspect_dist: dist, limit: limit) + |> find_duplicates_by_intensities(dist: dist, aspect_dist: dist, limit: limit) |> preload([:user, :intensity, [:sources, tags: :aliases]]) |> Repo.paginate(page_size: 50) @@ -103,6 +175,13 @@ defmodule Philomena.DuplicateReports do PhilomenaMedia.Processors.intensities(analysis, file) end + defp generate_features(search_query) do + analysis = SearchQuery.to_analysis(search_query) + file = search_query.uploaded_image + + PhilomenaMedia.Processors.features(analysis, file) + end + @doc """ Returns an `%Ecto.Changeset{}` for tracking search query changes. diff --git a/lib/philomena/image_vectors.ex b/lib/philomena/image_vectors.ex new file mode 100644 index 000000000..85268440d --- /dev/null +++ b/lib/philomena/image_vectors.ex @@ -0,0 +1,91 @@ +defmodule Philomena.ImageVectors do + @moduledoc """ + The ImageVectors context. + """ + + import Ecto.Query, warn: false + alias Philomena.Repo + + alias Philomena.ImageVectors.ImageVector + + @doc """ + Gets a single image_vector. + + Raises `Ecto.NoResultsError` if the Image vector does not exist. + + ## Examples + + iex> get_image_vector!(123) + %ImageVector{} + + iex> get_image_vector!(456) + ** (Ecto.NoResultsError) + + """ + def get_image_vector!(id), do: Repo.get!(ImageVector, id) + + @doc """ + Creates a image_vector. + + ## Examples + + iex> create_image_vector(%{field: value}) + {:ok, %ImageVector{}} + + iex> create_image_vector(%{field: bad_value}) + {:error, %Ecto.Changeset{}} + + """ + def create_image_vector(image, attrs \\ %PhilomenaMedia.Features{}) do + %ImageVector{image_id: image.id} + |> ImageVector.changeset(Map.from_struct(attrs)) + |> Repo.insert() + end + + @doc """ + Updates a image_vector. + + ## Examples + + iex> update_image_vector(image_vector, %{field: new_value}) + {:ok, %ImageVector{}} + + iex> update_image_vector(image_vector, %{field: bad_value}) + {:error, %Ecto.Changeset{}} + + """ + def update_image_vector(%ImageVector{} = image_vector, attrs) do + image_vector + |> ImageVector.changeset(attrs) + |> Repo.update() + end + + @doc """ + Deletes a image_vector. + + ## Examples + + iex> delete_image_vector(image_vector) + {:ok, %ImageVector{}} + + iex> delete_image_vector(image_vector) + {:error, %Ecto.Changeset{}} + + """ + def delete_image_vector(%ImageVector{} = image_vector) do + Repo.delete(image_vector) + end + + @doc """ + Returns an `%Ecto.Changeset{}` for tracking image_vector changes. + + ## Examples + + iex> change_image_vector(image_vector) + %Ecto.Changeset{data: %ImageVector{}} + + """ + def change_image_vector(%ImageVector{} = image_vector, attrs \\ %{}) do + ImageVector.changeset(image_vector, attrs) + end +end diff --git a/lib/philomena/image_vectors/batch_processor.ex b/lib/philomena/image_vectors/batch_processor.ex new file mode 100644 index 000000000..3fefc3189 --- /dev/null +++ b/lib/philomena/image_vectors/batch_processor.ex @@ -0,0 +1,88 @@ +defmodule Philomena.ImageVectors.BatchProcessor do + @moduledoc """ + Batch processing interface for Philomena. See the module documentation + in `m:Philomena.ImageVectors.Importer` for more information about how to + use the functions in this module during maintenance. + """ + + alias Philomena.Images + alias Philomena.Images.Image + alias Philomena.Images.Thumbnailer + alias Philomena.ImageVectors.ImageVector + alias Philomena.Maintenance + alias Philomena.Repo + + alias PhilomenaMedia.Analyzers + alias PhilomenaMedia.Processors + alias PhilomenaQuery.Batch + alias PhilomenaQuery.Search + + alias Philomena.Repo + import Ecto.Query + + @spec all_missing(String.t(), Keyword.t()) :: :ok + def all_missing(type \\ "full", opts \\ []) do + Image + |> from(as: :image) + |> where(not exists(where(ImageVector, [iv], iv.image_id == parent_as(:image).id))) + |> by_image_query(type, opts) + end + + @spec by_image_query(Ecto.Query.t(), String.t(), Keyword.t()) :: :ok + defp by_image_query(query, type, opts) do + max_concurrency = Keyword.get(opts, :max_concurrency, 4) + min = Repo.one(limit(order_by(query, asc: :id), 1)).id + max = Repo.one(limit(order_by(query, desc: :id), 1)).id + + query + |> Batch.query_batches(opts) + |> Task.async_stream( + fn query -> process_query(query, type, opts) end, + timeout: :infinity, + max_concurrency: max_concurrency + ) + |> Maintenance.log_progress("BatchProcessor/#{type}", min, max) + end + + @spec process_query(Ecto.Query.t(), String.t(), Keyword.t()) :: + Enumerable.t({:ok, integer()}) + defp process_query(query, type, batch_opts) do + images = Repo.all(query) + last_id = Enum.max_by(images, & &1.id).id + + values = + Enum.flat_map(images, fn image -> + try do + [process_image(image, type)] + rescue + ex -> + IO.puts("While processing #{image.id}: #{inspect(ex)}") + IO.puts(Exception.format_stacktrace(__STACKTRACE__)) + [] + end + end) + + {_count, nil} = Repo.insert_all(ImageVector, values, on_conflict: :nothing) + + :ok = + query + |> preload(^Images.indexing_preloads()) + |> Search.reindex(Image, batch_opts) + + last_id + end + + @spec process_image(%Image{}, String.t()) :: map() + defp process_image(image = %Image{}, type) do + file = Thumbnailer.download_image_file(image) + + {:ok, analysis} = Analyzers.analyze_path(file) + features = Processors.features(analysis, file) + + %{ + image_id: image.id, + type: type, + features: features.features + } + end +end diff --git a/lib/philomena/image_vectors/image_vector.ex b/lib/philomena/image_vectors/image_vector.ex new file mode 100644 index 000000000..123f7015b --- /dev/null +++ b/lib/philomena/image_vectors/image_vector.ex @@ -0,0 +1,19 @@ +defmodule Philomena.ImageVectors.ImageVector do + use Ecto.Schema + import Ecto.Changeset + + alias Philomena.Images.Image + + schema "image_vectors" do + belongs_to :image, Image + field :type, :string + field :features, {:array, :float} + end + + @doc false + def changeset(image_vector, attrs) do + image_vector + |> cast(attrs, [:type, :features]) + |> validate_required([:type, :features]) + end +end diff --git a/lib/philomena/image_vectors/importer.ex b/lib/philomena/image_vectors/importer.ex new file mode 100644 index 000000000..3715feb67 --- /dev/null +++ b/lib/philomena/image_vectors/importer.ex @@ -0,0 +1,86 @@ +defmodule Philomena.ImageVectors.Importer do + @moduledoc """ + Import logic for binary files produced by the export function of + https://github.com/philomena-dev/philomena-ris-inference-toolkit. + + Run the following commands in a long-running terminal, like screen or tmux. + The workflow for using the importer is as follows: + + 1. Use the batch inference toolkit to get the `features.bin`. + 2. Run `philomena eval 'Philomena.ImageVectors.Importer.import_from("/path/to/features.bin")'`. + 3. Backfill the remaining images: + `philomena eval 'Philomena.ImageVectors.BatchProcessor.all_missing("full", batch_size: 32)'` + 4. Downtime, delete and recreate the images index: + `philomena eval 'Philomena.SearchIndexer.recreate_reindex_schema_destructive!(Philomena.Images.Image)'`. + """ + + alias Philomena.ImageVectors.ImageVector + alias Philomena.Maintenance + alias Philomena.Repo + + # 4 bytes unsigned id + 768 floats per feature vector * 4 bytes per float + @row_size 4 + 768 * 4 + + @typedoc "A single feature row." + @type row :: %{ + image_id: integer(), + type: String.t(), + features: [float()] + } + + @spec import_from(Path.t()) :: :ok + def import_from(batch_inference_file, type \\ "full", max_concurrency \\ 4) do + {min, max} = get_min_and_max_id(batch_inference_file, type) + + batch_inference_file + |> File.stream!(@row_size) + |> Stream.chunk_every(1024) + |> Task.async_stream( + &process_chunk(&1, type), + timeout: :infinity, + max_concurrency: max_concurrency + ) + |> Maintenance.log_progress("Importer/#{type}", min, max) + end + + @spec process_chunk([binary()], String.t()) :: :ok + defp process_chunk(chunk, type) do + data = Enum.map(chunk, &unpack(&1, type)) + last_id = Enum.max_by(data, & &1.image_id).image_id + + {_count, nil} = Repo.insert_all(ImageVector, data, on_conflict: :nothing) + + last_id + end + + @spec unpack(binary(), String.t()) :: row() + defp unpack(row, type) do + <> = row + features = for <>, do: v + + %{ + image_id: image_id, + type: type, + features: features + } + end + + @spec get_min_and_max_id(Path.t(), String.t()) :: {integer(), integer()} + defp get_min_and_max_id(path, type) do + stat = File.stat!(path) + last_row = stat.size - @row_size + + %{image_id: min} = get_single_row(path, 0, type) + %{image_id: max} = get_single_row(path, last_row, type) + + {min, max} + end + + @spec get_single_row(Path.t(), integer(), String.t()) :: row() + defp get_single_row(path, offset, type) do + path + |> File.stream!(@row_size, read_offset: offset) + |> Enum.at(0) + |> unpack(type) + end +end diff --git a/lib/philomena/images.ex b/lib/philomena/images.ex index af0ef79f7..9c615bdef 100644 --- a/lib/philomena/images.ex +++ b/lib/philomena/images.ex @@ -858,6 +858,7 @@ defmodule Philomena.Images do [ :gallery_interactions, + :vectors, sources: sources_query, user: user_query, favers: user_query, diff --git a/lib/philomena/images/image.ex b/lib/philomena/images/image.ex index e02356dd8..bb7bda375 100644 --- a/lib/philomena/images/image.ex +++ b/lib/philomena/images/image.ex @@ -7,6 +7,7 @@ defmodule Philomena.Images.Image do alias Philomena.ImageVotes.ImageVote alias Philomena.ImageFaves.ImageFave alias Philomena.ImageHides.ImageHide + alias Philomena.ImageVectors.ImageVector alias Philomena.Images.Source alias Philomena.Images.Subscription alias Philomena.Users.User @@ -35,6 +36,7 @@ defmodule Philomena.Images.Image do has_many :subscriptions, Subscription has_many :source_changes, SourceChange, on_replace: :delete has_many :tag_changes, TagChange + has_many :vectors, ImageVector has_many :upvoters, through: [:upvotes, :user] has_many :downvoters, through: [:downvotes, :user] has_many :favers, through: [:faves, :user] diff --git a/lib/philomena/images/search_index.ex b/lib/philomena/images/search_index.ex index 35241ccde..55f06a127 100644 --- a/lib/philomena/images/search_index.ex +++ b/lib/philomena/images/search_index.ex @@ -11,6 +11,7 @@ defmodule Philomena.Images.SearchIndex do %{ settings: %{ index: %{ + knn: true, number_of_shards: 5, max_result_window: 10_000_000 } @@ -89,6 +90,26 @@ defmodule Philomena.Images.SearchIndex do namespace: %{type: "keyword"} } }, + vectors: %{ + type: "nested", + properties: %{ + f: %{ + type: "knn_vector", + dimension: 768, + data_type: "float", + mode: "on_disk", + method: %{ + name: "hnsw", + engine: "faiss", + space_type: "l2", + parameters: %{ + ef_construction: 128, + m: 16 + } + } + } + } + }, approved: %{type: "boolean"}, error_tag_count: %{type: "integer"}, rating_tag_count: %{type: "integer"}, @@ -160,6 +181,7 @@ defmodule Philomena.Images.SearchIndex do }, gallery_id: Enum.map(image.gallery_interactions, & &1.gallery_id), gallery_position: Map.new(image.gallery_interactions, &{&1.gallery_id, &1.position}), + vectors: image.vectors |> Enum.map(&%{f: &1.features}), favourited_by_users: image.favers |> Enum.map(&String.downcase(&1.name)), hidden_by_users: image.hiders |> Enum.map(&String.downcase(&1.name)), upvoters: image.upvoters |> Enum.map(&String.downcase(&1.name)), diff --git a/lib/philomena/images/thumbnailer.ex b/lib/philomena/images/thumbnailer.ex index b8be742ba..e463688d9 100644 --- a/lib/philomena/images/thumbnailer.ex +++ b/lib/philomena/images/thumbnailer.ex @@ -12,6 +12,7 @@ defmodule Philomena.Images.Thumbnailer do alias Philomena.DuplicateReports alias Philomena.ImageIntensities alias Philomena.ImagePurgeWorker + alias Philomena.ImageVectors alias Philomena.Images.Image alias Philomena.Repo @@ -105,6 +106,9 @@ defmodule Philomena.Images.Thumbnailer do defp apply_change(image, {:intensities, intensities}), do: ImageIntensities.create_image_intensity(image, intensities) + defp apply_change(image, {:features, features}), + do: ImageVectors.create_image_vector(image, features) + defp apply_change(image, {:replace_original, new_file}) do full = "full.#{image.image_format}" upload_file(image, new_file, full) @@ -139,7 +143,7 @@ defmodule Philomena.Images.Thumbnailer do |> Repo.update!() end - defp download_image_file(image) do + def download_image_file(image) do tempfile = Briefly.create!(extname: ".#{image.image_format}") path = Path.join(image_thumb_prefix(image), "full.#{image.image_format}") diff --git a/lib/philomena/native.ex b/lib/philomena/native.ex index 14eeaa17f..2d798fcd9 100644 --- a/lib/philomena/native.ex +++ b/lib/philomena/native.ex @@ -12,6 +12,14 @@ defmodule Philomena.Native do @spec camo_image_url(String.t()) :: String.t() def camo_image_url(_uri), do: :erlang.nif_error(:nif_not_loaded) + @spec async_get_features(String.t(), String.t()) :: :ok + def async_get_features(_server_addr, _path), + do: :erlang.nif_error(:nif_not_loaded) + + @spec async_process_command(String.t(), String.t(), [String.t()]) :: :ok + def async_process_command(_server_addr, _program, _arguments), + do: :erlang.nif_error(:nif_not_loaded) + @spec zip_open_writer(Path.t()) :: {:ok, reference()} | {:error, atom()} def zip_open_writer(_path), do: :erlang.nif_error(:nif_not_loaded) diff --git a/lib/philomena_media/analyzers/gif.ex b/lib/philomena_media/analyzers/gif.ex index 982d7a319..2c1365d0e 100644 --- a/lib/philomena_media/analyzers/gif.ex +++ b/lib/philomena_media/analyzers/gif.ex @@ -3,6 +3,7 @@ defmodule PhilomenaMedia.Analyzers.Gif do alias PhilomenaMedia.Analyzers.Analyzer alias PhilomenaMedia.Analyzers.Result + alias PhilomenaMedia.Remote @behaviour Analyzer @@ -20,7 +21,7 @@ defmodule PhilomenaMedia.Analyzers.Gif do end defp stats(file) do - case System.cmd("mediastat", [file]) do + case Remote.cmd("mediastat", [file]) do {output, 0} -> [_size, frames, width, height, num, den] = output diff --git a/lib/philomena_media/analyzers/jpeg.ex b/lib/philomena_media/analyzers/jpeg.ex index 60b29e04f..cca1d8810 100644 --- a/lib/philomena_media/analyzers/jpeg.ex +++ b/lib/philomena_media/analyzers/jpeg.ex @@ -3,6 +3,7 @@ defmodule PhilomenaMedia.Analyzers.Jpeg do alias PhilomenaMedia.Analyzers.Analyzer alias PhilomenaMedia.Analyzers.Result + alias PhilomenaMedia.Remote @behaviour Analyzer @@ -20,7 +21,7 @@ defmodule PhilomenaMedia.Analyzers.Jpeg do end defp stats(file) do - case System.cmd("mediastat", [file]) do + case Remote.cmd("mediastat", [file]) do {output, 0} -> [_size, _frames, width, height, num, den] = output diff --git a/lib/philomena_media/analyzers/png.ex b/lib/philomena_media/analyzers/png.ex index 83cb506f4..a01d68abf 100644 --- a/lib/philomena_media/analyzers/png.ex +++ b/lib/philomena_media/analyzers/png.ex @@ -3,6 +3,7 @@ defmodule PhilomenaMedia.Analyzers.Png do alias PhilomenaMedia.Analyzers.Analyzer alias PhilomenaMedia.Analyzers.Result + alias PhilomenaMedia.Remote @behaviour Analyzer @@ -20,7 +21,7 @@ defmodule PhilomenaMedia.Analyzers.Png do end defp stats(file) do - case System.cmd("mediastat", [file]) do + case Remote.cmd("mediastat", [file]) do {output, 0} -> [_size, frames, width, height, num, den] = output diff --git a/lib/philomena_media/analyzers/svg.ex b/lib/philomena_media/analyzers/svg.ex index f83a55f00..0b55a5681 100644 --- a/lib/philomena_media/analyzers/svg.ex +++ b/lib/philomena_media/analyzers/svg.ex @@ -3,6 +3,7 @@ defmodule PhilomenaMedia.Analyzers.Svg do alias PhilomenaMedia.Analyzers.Analyzer alias PhilomenaMedia.Analyzers.Result + alias PhilomenaMedia.Remote @behaviour Analyzer @@ -20,7 +21,7 @@ defmodule PhilomenaMedia.Analyzers.Svg do end defp stats(file) do - case System.cmd("svgstat", [file]) do + case Remote.cmd("svgstat", [file]) do {output, 0} -> [_size, _frames, width, height, _num, _den] = output diff --git a/lib/philomena_media/analyzers/webm.ex b/lib/philomena_media/analyzers/webm.ex index b215e01e1..fb785923e 100644 --- a/lib/philomena_media/analyzers/webm.ex +++ b/lib/philomena_media/analyzers/webm.ex @@ -3,6 +3,7 @@ defmodule PhilomenaMedia.Analyzers.Webm do alias PhilomenaMedia.Analyzers.Analyzer alias PhilomenaMedia.Analyzers.Result + alias PhilomenaMedia.Remote @behaviour Analyzer @@ -20,7 +21,7 @@ defmodule PhilomenaMedia.Analyzers.Webm do end defp stats(file) do - case System.cmd("mediastat", [file]) do + case Remote.cmd("mediastat", [file]) do {output, 0} -> [_size, frames, width, height, num, den] = output diff --git a/lib/philomena_media/features.ex b/lib/philomena_media/features.ex new file mode 100644 index 000000000..bb75472c1 --- /dev/null +++ b/lib/philomena_media/features.ex @@ -0,0 +1,51 @@ +defmodule PhilomenaMedia.Features do + @moduledoc """ + Features are a set of 768 weighted classification outputs produced from a + vision transformer (ViT). The individual classifications are arbitrary and + not meaningful to analyze, but the vectors can be used to compare similarity + between images using the cosine similarity measurement. + + Since cosine similarity is not a metric, it is substituted for normalized L2 + distance by the feature extractor; every vector that it returns is normalized, + and traversing the k nearest neighbors in a vector space index will iterate + vectors in the same order as their cosine similarity. + """ + + alias PhilomenaMedia.Remote + + @type t :: %__MODULE__{ + features: [float()] + } + + defstruct [:features] + + @doc """ + Gets the features of the given image file. + + The image file must be in the PNG or JPEG format. + + > #### Info {: .info} + > + > Clients should prefer to use `PhilomenaMedia.Processors.features/2`, as it handles + > media files of any type supported by this library, not just PNG or JPEG. + + ## Examples + + iex> Features.file("image.png") + {:ok, %Features{features: [0.03156396001577377, -0.04559657722711563, ...]}} + + iex> Features.file("nonexistent.jpg") + :error + + """ + @spec file(Path.t()) :: {:ok, t()} | :error + def file(input) do + case Remote.get_features(input) do + {:ok, features} -> + {:ok, %__MODULE__{features: features}} + + _error -> + :error + end + end +end diff --git a/lib/philomena_media/gif_preview.ex b/lib/philomena_media/gif_preview.ex index ac40044b4..beef1e781 100644 --- a/lib/philomena_media/gif_preview.ex +++ b/lib/philomena_media/gif_preview.ex @@ -3,6 +3,8 @@ defmodule PhilomenaMedia.GifPreview do GIF preview generation for video files. """ + alias PhilomenaMedia.Remote + @type duration :: float() @type dimensions :: {pos_integer(), pos_integer()} @@ -49,7 +51,7 @@ defmodule PhilomenaMedia.GifPreview do end) {_output, 0} = - System.cmd( + Remote.cmd( "ffmpeg", commands(decoder, video, gif, clamp(duration), dimensions, num_images, target_framerate) ) diff --git a/lib/philomena_media/intensities.ex b/lib/philomena_media/intensities.ex index ea0952952..f2df677b6 100644 --- a/lib/philomena_media/intensities.ex +++ b/lib/philomena_media/intensities.ex @@ -17,6 +17,8 @@ defmodule PhilomenaMedia.Intensities do of image dimensions, with poor precision and a poor-to-fair accuracy. """ + alias PhilomenaMedia.Remote + @type t :: %__MODULE__{ nw: float(), ne: float(), @@ -50,7 +52,7 @@ defmodule PhilomenaMedia.Intensities do """ @spec file(Path.t()) :: {:ok, t()} | :error def file(input) do - System.cmd("image-intensities", [input]) + Remote.cmd("image-intensities", [input]) |> case do {output, 0} -> [nw, ne, sw, se] = diff --git a/lib/philomena_media/mime.ex b/lib/philomena_media/mime.ex index 1b29aa759..3bd483050 100644 --- a/lib/philomena_media/mime.ex +++ b/lib/philomena_media/mime.ex @@ -27,7 +27,7 @@ defmodule PhilomenaMedia.Mime do """ @spec file(Path.t()) :: {:ok, t()} | {:unsupported_mime, t()} | :error def file(path) do - System.cmd("file", ["-b", "--mime-type", path]) + PhilomenaMedia.Remote.cmd("file", ["-b", "--mime-type", path]) |> case do {output, 0} -> true_mime(String.trim(output)) diff --git a/lib/philomena_media/processors.ex b/lib/philomena_media/processors.ex index b23ba0054..492400be6 100644 --- a/lib/philomena_media/processors.ex +++ b/lib/philomena_media/processors.ex @@ -58,6 +58,7 @@ defmodule PhilomenaMedia.Processors do """ alias PhilomenaMedia.Analyzers.Result + alias PhilomenaMedia.Features alias PhilomenaMedia.Intensities alias PhilomenaMedia.Processors.{Gif, Jpeg, Png, Svg, Webm} alias PhilomenaMedia.Mime @@ -185,6 +186,25 @@ defmodule PhilomenaMedia.Processors do processor(analysis.mime_type).post_process(analysis, file) end + @doc """ + Takes an analyzer result and file path and runs the appropriate processor's `features/2`, + returning the feature vector. + + This allows for generating feature vectors for file types that are not directly supported by + `m:PhilomenaMedia.Features`, and should be the preferred function to call when feature vectors + are needed. + + ## Example + + iex> PhilomenaMedia.Processors.features(%Result{...}, "video.webm") + %Features{features: [0.03156396001577377, -0.04559657722711563, ...]} + + """ + @spec features(Result.t(), Path.t()) :: Features.t() + def features(analysis, file) do + processor(analysis.mime_type).features(analysis, file) + end + @doc """ Takes an analyzer result and file path and runs the appropriate processor's `intensities/2`, returning the corner intensities. @@ -195,8 +215,8 @@ defmodule PhilomenaMedia.Processors do ## Example - iex> PhilomenaMedia.Processors.intensities(%Result{...}, "video.webm") - %Intensities{nw: 111.689148, ne: 116.228048, sw: 93.268433, se: 104.630064} + iex> PhilomenaMedia.Processors.intensities(%Result{...}, "video.webm") + %Intensities{nw: 111.689148, ne: 116.228048, sw: 93.268433, se: 104.630064} """ @spec intensities(Result.t(), Path.t()) :: Intensities.t() diff --git a/lib/philomena_media/processors/gif.ex b/lib/philomena_media/processors/gif.ex index 6e185f9fb..11391aec2 100644 --- a/lib/philomena_media/processors/gif.ex +++ b/lib/philomena_media/processors/gif.ex @@ -1,8 +1,10 @@ defmodule PhilomenaMedia.Processors.Gif do @moduledoc false + alias PhilomenaMedia.Features alias PhilomenaMedia.Intensities alias PhilomenaMedia.Analyzers.Result + alias PhilomenaMedia.Remote alias PhilomenaMedia.Processors.Processor alias PhilomenaMedia.Processors @@ -22,12 +24,14 @@ defmodule PhilomenaMedia.Processors.Gif do palette = palette(file) {:ok, intensities} = Intensities.file(preview) + {:ok, features} = Features.file(preview) scaled = Enum.flat_map(versions, &scale(palette, file, &1)) videos = generate_videos(file) [ intensities: intensities, + features: features, thumbnails: scaled ++ videos ++ [{:copy, preview, "rendered.png"}] ] end @@ -37,6 +41,12 @@ defmodule PhilomenaMedia.Processors.Gif do [replace_original: optimize(file)] end + @spec features(Result.t(), Path.t()) :: Features.t() + def features(analysis, file) do + {:ok, features} = Features.file(preview(analysis.duration, file)) + features + end + @spec intensities(Result.t(), Path.t()) :: Intensities.t() def intensities(analysis, file) do {:ok, intensities} = Intensities.file(preview(analysis.duration, file)) @@ -46,7 +56,7 @@ defmodule PhilomenaMedia.Processors.Gif do defp optimize(file) do optimized = Briefly.create!(extname: ".gif") - {_output, 0} = System.cmd("gifsicle", ["--careful", "-O2", file, "-o", optimized]) + {_output, 0} = Remote.cmd("gifsicle", ["--careful", "-O2", file, "-o", optimized]) optimized end @@ -54,7 +64,7 @@ defmodule PhilomenaMedia.Processors.Gif do defp preview(duration, file) do preview = Briefly.create!(extname: ".png") - {_output, 0} = System.cmd("mediathumb", [file, to_string(duration / 2), preview]) + {_output, 0} = Remote.cmd("mediathumb", [file, to_string(duration / 2), preview]) preview end @@ -63,7 +73,7 @@ defmodule PhilomenaMedia.Processors.Gif do palette = Briefly.create!(extname: ".png") {_output, 0} = - System.cmd("ffmpeg", [ + Remote.cmd("ffmpeg", [ "-loglevel", "0", "-y", @@ -88,7 +98,7 @@ defmodule PhilomenaMedia.Processors.Gif do filter_graph = "[0:v]#{scale_filter}[x];[x][1:v]#{palette_filter}" {_output, 0} = - System.cmd("ffmpeg", [ + Remote.cmd("ffmpeg", [ "-loglevel", "0", "-y", @@ -109,7 +119,7 @@ defmodule PhilomenaMedia.Processors.Gif do mp4 = Briefly.create!(extname: ".mp4") {_output, 0} = - System.cmd("ffmpeg", [ + Remote.cmd("ffmpeg", [ "-loglevel", "0", "-y", @@ -127,7 +137,7 @@ defmodule PhilomenaMedia.Processors.Gif do ]) {_output, 0} = - System.cmd("ffmpeg", [ + Remote.cmd("ffmpeg", [ "-loglevel", "0", "-y", diff --git a/lib/philomena_media/processors/jpeg.ex b/lib/philomena_media/processors/jpeg.ex index 604442571..776924c09 100644 --- a/lib/philomena_media/processors/jpeg.ex +++ b/lib/philomena_media/processors/jpeg.ex @@ -1,8 +1,10 @@ defmodule PhilomenaMedia.Processors.Jpeg do @moduledoc false + alias PhilomenaMedia.Features alias PhilomenaMedia.Intensities alias PhilomenaMedia.Analyzers.Result + alias PhilomenaMedia.Remote alias PhilomenaMedia.Processors.Processor alias PhilomenaMedia.Processors @@ -21,12 +23,14 @@ defmodule PhilomenaMedia.Processors.Jpeg do stripped = optimize(strip(file)) {:ok, intensities} = Intensities.file(stripped) + {:ok, features} = Features.file(stripped) scaled = Enum.flat_map(versions, &scale(stripped, &1)) [ replace_original: stripped, intensities: intensities, + features: features, thumbnails: scaled ] end @@ -34,6 +38,12 @@ defmodule PhilomenaMedia.Processors.Jpeg do @spec post_process(Result.t(), Path.t()) :: Processors.edit_script() def post_process(_analysis, _file), do: [] + @spec features(Result.t(), Path.t()) :: Features.t() + def features(_analysis, file) do + {:ok, features} = Features.file(file) + features + end + @spec intensities(Result.t(), Path.t()) :: Intensities.t() def intensities(_analysis, file) do {:ok, intensities} = Intensities.file(file) @@ -42,7 +52,7 @@ defmodule PhilomenaMedia.Processors.Jpeg do defp requires_lossy_transformation?(file) do with {output, 0} <- - System.cmd("identify", ["-format", "%[orientation]\t%[profile:icc]", file]), + Remote.cmd("identify", ["-format", "%[orientation]\t%[profile:icc]", file]), [orientation, profile] <- String.split(output, "\t") do orientation not in ["Undefined", "TopLeft"] or profile != "" else @@ -60,7 +70,7 @@ defmodule PhilomenaMedia.Processors.Jpeg do true -> # Transcode: strip EXIF, embedded profile and reorient image {_output, 0} = - System.cmd("convert", [ + Remote.cmd("convert", [ file, "-profile", srgb_profile(), @@ -71,7 +81,7 @@ defmodule PhilomenaMedia.Processors.Jpeg do _ -> # Transmux only: Strip EXIF without touching orientation - validate_return(System.cmd("jpegtran", ["-copy", "none", "-outfile", stripped, file])) + validate_return(Remote.cmd("jpegtran", ["-copy", "none", "-outfile", stripped, file])) end stripped @@ -80,7 +90,7 @@ defmodule PhilomenaMedia.Processors.Jpeg do defp optimize(file) do optimized = Briefly.create!(extname: ".jpg") - validate_return(System.cmd("jpegtran", ["-optimize", "-outfile", optimized, file])) + validate_return(Remote.cmd("jpegtran", ["-optimize", "-outfile", optimized, file])) optimized end @@ -90,7 +100,7 @@ defmodule PhilomenaMedia.Processors.Jpeg do scale_filter = "scale=w=#{width}:h=#{height}:force_original_aspect_ratio=decrease" {_output, 0} = - System.cmd("ffmpeg", [ + Remote.cmd("ffmpeg", [ "-loglevel", "0", "-y", @@ -103,7 +113,7 @@ defmodule PhilomenaMedia.Processors.Jpeg do scaled ]) - {_output, 0} = System.cmd("jpegtran", ["-optimize", "-outfile", scaled, scaled]) + {_output, 0} = Remote.cmd("jpegtran", ["-optimize", "-outfile", scaled, scaled]) [{:copy, scaled, "#{thumb_name}.jpg"}] end diff --git a/lib/philomena_media/processors/png.ex b/lib/philomena_media/processors/png.ex index 0fc4c50d3..79f50aa0d 100644 --- a/lib/philomena_media/processors/png.ex +++ b/lib/philomena_media/processors/png.ex @@ -1,8 +1,10 @@ defmodule PhilomenaMedia.Processors.Png do @moduledoc false + alias PhilomenaMedia.Features alias PhilomenaMedia.Intensities alias PhilomenaMedia.Analyzers.Result + alias PhilomenaMedia.Remote alias PhilomenaMedia.Processors.Processor alias PhilomenaMedia.Processors @@ -18,11 +20,13 @@ defmodule PhilomenaMedia.Processors.Png do animated? = analysis.animated? {:ok, intensities} = Intensities.file(file) + {:ok, features} = Features.file(file) scaled = Enum.flat_map(versions, &scale(file, animated?, &1)) [ intensities: intensities, + features: features, thumbnails: scaled ] end @@ -37,6 +41,12 @@ defmodule PhilomenaMedia.Processors.Png do end end + @spec features(Result.t(), Path.t()) :: Features.t() + def features(_analysis, file) do + {:ok, features} = Features.file(file) + features + end + @spec intensities(Result.t(), Path.t()) :: Intensities.t() def intensities(_analysis, file) do {:ok, intensities} = Intensities.file(file) @@ -49,7 +59,7 @@ defmodule PhilomenaMedia.Processors.Png do optimized = Briefly.create!(extname: ".png") {_output, 0} = - System.cmd("optipng", ["-fix", "-i0", "-o2", "-quiet", "-clobber", file, "-out", optimized]) + Remote.cmd("optipng", ["-fix", "-i0", "-o2", "-quiet", "-clobber", file, "-out", optimized]) # Remove useless .bak file File.rm(optimized <> ".bak") @@ -66,7 +76,7 @@ defmodule PhilomenaMedia.Processors.Png do {_output, 0} = cond do animated? -> - System.cmd("ffmpeg", [ + Remote.cmd("ffmpeg", [ "-loglevel", "0", "-y", @@ -82,10 +92,10 @@ defmodule PhilomenaMedia.Processors.Png do ]) true -> - System.cmd("ffmpeg", ["-loglevel", "0", "-y", "-i", file, "-vf", scale_filter, scaled]) + Remote.cmd("ffmpeg", ["-loglevel", "0", "-y", "-i", file, "-vf", scale_filter, scaled]) end - System.cmd("optipng", ["-i0", "-o1", "-quiet", "-clobber", scaled]) + Remote.cmd("optipng", ["-i0", "-o1", "-quiet", "-clobber", scaled]) [{:copy, scaled, "#{thumb_name}.png"}] end diff --git a/lib/philomena_media/processors/processor.ex b/lib/philomena_media/processors/processor.ex index 8b9f568f3..368d2d320 100644 --- a/lib/philomena_media/processors/processor.ex +++ b/lib/philomena_media/processors/processor.ex @@ -2,6 +2,7 @@ defmodule PhilomenaMedia.Processors.Processor do @moduledoc false alias PhilomenaMedia.Analyzers.Result + alias PhilomenaMedia.Features alias PhilomenaMedia.Processors alias PhilomenaMedia.Intensities @@ -22,6 +23,11 @@ defmodule PhilomenaMedia.Processors.Processor do """ @callback post_process(Result.t(), Path.t()) :: Processors.edit_script() + @doc """ + Generate a feature vector for the given path. + """ + @callback features(Result.t(), Path.t()) :: Features.t() + @doc """ Generate corner intensities for the given path. """ diff --git a/lib/philomena_media/processors/svg.ex b/lib/philomena_media/processors/svg.ex index aaa3dd5ca..8a6140d95 100644 --- a/lib/philomena_media/processors/svg.ex +++ b/lib/philomena_media/processors/svg.ex @@ -1,8 +1,10 @@ defmodule PhilomenaMedia.Processors.Svg do @moduledoc false + alias PhilomenaMedia.Features alias PhilomenaMedia.Intensities alias PhilomenaMedia.Analyzers.Result + alias PhilomenaMedia.Remote alias PhilomenaMedia.Processors.Processor alias PhilomenaMedia.Processors @@ -20,12 +22,14 @@ defmodule PhilomenaMedia.Processors.Svg do preview = preview(file) {:ok, intensities} = Intensities.file(preview) + {:ok, features} = Features.file(preview) scaled = Enum.flat_map(versions, &scale(preview, &1)) full = [{:copy, preview, "full.png"}] [ intensities: intensities, + features: features, thumbnails: scaled ++ full ++ [{:copy, preview, "rendered.png"}] ] end @@ -33,6 +37,12 @@ defmodule PhilomenaMedia.Processors.Svg do @spec post_process(Result.t(), Path.t()) :: Processors.edit_script() def post_process(_analysis, _file), do: [] + @spec features(Result.t(), Path.t()) :: Features.t() + def features(_analysis, file) do + {:ok, features} = Features.file(preview(file)) + features + end + @spec intensities(Result.t(), Path.t()) :: Intensities.t() def intensities(_analysis, file) do {:ok, intensities} = Intensities.file(preview(file)) @@ -42,7 +52,7 @@ defmodule PhilomenaMedia.Processors.Svg do defp preview(file) do preview = Briefly.create!(extname: ".png") - {_output, 0} = System.cmd("safe-rsvg-convert", [file, preview]) + {_output, 0} = Remote.cmd("safe-rsvg-convert", [file, preview]) preview end @@ -52,9 +62,9 @@ defmodule PhilomenaMedia.Processors.Svg do scale_filter = "scale=w=#{width}:h=#{height}:force_original_aspect_ratio=decrease" {_output, 0} = - System.cmd("ffmpeg", ["-loglevel", "0", "-y", "-i", preview, "-vf", scale_filter, scaled]) + Remote.cmd("ffmpeg", ["-loglevel", "0", "-y", "-i", preview, "-vf", scale_filter, scaled]) - {_output, 0} = System.cmd("optipng", ["-i0", "-o1", "-quiet", "-clobber", scaled]) + {_output, 0} = Remote.cmd("optipng", ["-i0", "-o1", "-quiet", "-clobber", scaled]) [{:copy, scaled, "#{thumb_name}.png"}] end diff --git a/lib/philomena_media/processors/webm.ex b/lib/philomena_media/processors/webm.ex index 0fcc11296..863a7dd6b 100644 --- a/lib/philomena_media/processors/webm.ex +++ b/lib/philomena_media/processors/webm.ex @@ -1,8 +1,10 @@ defmodule PhilomenaMedia.Processors.Webm do @moduledoc false + alias PhilomenaMedia.Features alias PhilomenaMedia.Intensities alias PhilomenaMedia.Analyzers.Result + alias PhilomenaMedia.Remote alias PhilomenaMedia.GifPreview alias PhilomenaMedia.Processors.Processor alias PhilomenaMedia.Processors @@ -33,6 +35,7 @@ defmodule PhilomenaMedia.Processors.Webm do mp4 = scale_mp4_only(decoder, stripped, dimensions, dimensions) {:ok, intensities} = Intensities.file(preview) + {:ok, features} = Features.file(preview) scaled = Enum.flat_map(versions, &scale(decoder, stripped, duration, dimensions, &1)) mp4 = [{:copy, mp4, "full.mp4"}] @@ -40,6 +43,7 @@ defmodule PhilomenaMedia.Processors.Webm do [ replace_original: stripped, intensities: intensities, + features: features, thumbnails: scaled ++ mp4 ++ [{:copy, preview, "rendered.png"}] ] end @@ -47,6 +51,12 @@ defmodule PhilomenaMedia.Processors.Webm do @spec post_process(Result.t(), Path.t()) :: Processors.edit_script() def post_process(_analysis, _file), do: [] + @spec features(Result.t(), Path.t()) :: Features.t() + def features(analysis, file) do + {:ok, features} = Features.file(preview(analysis.duration, file)) + features + end + @spec intensities(Result.t(), Path.t()) :: Intensities.t() def intensities(analysis, file) do {:ok, intensities} = Intensities.file(preview(analysis.duration, file)) @@ -56,7 +66,7 @@ defmodule PhilomenaMedia.Processors.Webm do defp preview(duration, file) do preview = Briefly.create!(extname: ".png") - {_output, 0} = System.cmd("mediathumb", [file, to_string(duration / 2), preview]) + {_output, 0} = Remote.cmd("mediathumb", [file, to_string(duration / 2), preview]) preview end @@ -65,7 +75,7 @@ defmodule PhilomenaMedia.Processors.Webm do stripped = Briefly.create!(extname: ".webm") {_output, 0} = - System.cmd("ffmpeg", [ + Remote.cmd("ffmpeg", [ "-loglevel", "0", "-y", @@ -110,7 +120,7 @@ defmodule PhilomenaMedia.Processors.Webm do mp4 = Briefly.create!(extname: ".mp4") {_output, 0} = - System.cmd("ffmpeg", [ + Remote.cmd("ffmpeg", [ "-loglevel", "0", "-y", @@ -170,7 +180,7 @@ defmodule PhilomenaMedia.Processors.Webm do mp4 = Briefly.create!(extname: ".mp4") {_output, 0} = - System.cmd("ffmpeg", [ + Remote.cmd("ffmpeg", [ "-loglevel", "0", "-y", @@ -213,7 +223,7 @@ defmodule PhilomenaMedia.Processors.Webm do defp select_decoder(file) do {output, 0} = - System.cmd("ffprobe", [ + Remote.cmd("ffprobe", [ "-loglevel", "0", "-select_streams", diff --git a/lib/philomena_media/remote.ex b/lib/philomena_media/remote.ex new file mode 100644 index 000000000..8ff27fc32 --- /dev/null +++ b/lib/philomena_media/remote.ex @@ -0,0 +1,30 @@ +defmodule PhilomenaMedia.Remote do + @doc """ + Out-of-process replacement for `System.cmd/2` that calls the requested + command elsewhere, translating file accesses, and returns the result. + """ + def cmd(command, args) do + :ok = Philomena.Native.async_process_command(mediaproc_addr(), command, args) + + receive do + {:process_command_reply, command_reply} -> + {command_reply.stdout, command_reply.status} + end + end + + @doc """ + Gets a feature vector for the given image path to use in reverse image search. + """ + def get_features(path) do + :ok = Philomena.Native.async_get_features(mediaproc_addr(), path) + + receive do + {:get_features_reply, get_features_reply} -> + get_features_reply + end + end + + defp mediaproc_addr do + Application.get_env(:philomena, :mediaproc_addr) + end +end diff --git a/lib/philomena_web/controllers/api/json/search/reverse_controller.ex b/lib/philomena_web/controllers/api/json/search/reverse_controller.ex index 4abe75602..1345d9f19 100644 --- a/lib/philomena_web/controllers/api/json/search/reverse_controller.ex +++ b/lib/philomena_web/controllers/api/json/search/reverse_controller.ex @@ -1,6 +1,7 @@ defmodule PhilomenaWeb.Api.Json.Search.ReverseController do use PhilomenaWeb, :controller + alias PhilomenaWeb.ImageLoader alias Philomena.DuplicateReports alias Philomena.Interactions @@ -9,12 +10,12 @@ defmodule PhilomenaWeb.Api.Json.Search.ReverseController do def create(conn, %{"image" => image_params}) do user = conn.assigns.current_user + image_params = Map.put(image_params, "limit", conn.params["limit"]) {images, total} = - image_params - |> Map.put("distance", conn.params["distance"]) - |> Map.put("limit", conn.params["limit"]) - |> DuplicateReports.execute_search_query() + conn + |> ImageLoader.reverse_filter() + |> DuplicateReports.execute_search_query_by_features(image_params) |> case do {:ok, images} -> {images, images.total_entries} diff --git a/lib/philomena_web/controllers/search/reverse_controller.ex b/lib/philomena_web/controllers/search/reverse_controller.ex index 0938642ab..a3e803ab9 100644 --- a/lib/philomena_web/controllers/search/reverse_controller.ex +++ b/lib/philomena_web/controllers/search/reverse_controller.ex @@ -1,6 +1,7 @@ defmodule PhilomenaWeb.Search.ReverseController do use PhilomenaWeb, :controller + alias PhilomenaWeb.ImageLoader alias Philomena.DuplicateReports.SearchQuery alias Philomena.DuplicateReports alias Philomena.Interactions @@ -14,7 +15,10 @@ defmodule PhilomenaWeb.Search.ReverseController do def create(conn, %{"image" => image_params}) when is_map(image_params) and image_params != %{} do - case DuplicateReports.execute_search_query(image_params) do + conn + |> ImageLoader.reverse_filter() + |> DuplicateReports.execute_search_query_by_features(image_params) + |> case do {:ok, images} -> changeset = DuplicateReports.change_search_query(%SearchQuery{}) interactions = Interactions.user_interactions(images, conn.assigns.current_user) diff --git a/lib/philomena_web/image_loader.ex b/lib/philomena_web/image_loader.ex index 81271e05b..074a13bef 100644 --- a/lib/philomena_web/image_loader.ex +++ b/lib/philomena_web/image_loader.ex @@ -29,10 +29,6 @@ defmodule PhilomenaWeb.ImageLoader do |> load_tags() |> render_bodies(conn) - user = conn.assigns.current_user - filter = conn.assigns.compiled_filter - filters = create_filters(conn, user, filter) - %{query: query, sorts: sort} = sorts.(body) definition = @@ -42,7 +38,7 @@ defmodule PhilomenaWeb.ImageLoader do query: %{ bool: %{ must: query, - must_not: filters + must_not: filters(conn) } }, sort: sort @@ -53,6 +49,21 @@ defmodule PhilomenaWeb.ImageLoader do {definition, tags} end + def reverse_filter(conn) do + %{ + bool: %{ + must_not: filters(conn) + } + } + end + + defp filters(conn) do + user = conn.assigns.current_user + filter = conn.assigns.compiled_filter + + create_filters(conn, user, filter) + end + defp create_filters(conn, user, filter) do show_hidden? = Canada.Can.can?(user, :hide, %Image{}) del = conn.params["del"] diff --git a/lib/philomena_web/templates/search/reverse/index.html.slime b/lib/philomena_web/templates/search/reverse/index.html.slime index 7e7146003..6390fdceb 100644 --- a/lib/philomena_web/templates/search/reverse/index.html.slime +++ b/lib/philomena_web/templates/search/reverse/index.html.slime @@ -3,11 +3,9 @@ h1 Reverse Search = form_for @changeset, ~p"/search/reverse", [multipart: true, as: :image], fn f -> .walloftext p - ' Basic image similarity search. Finds uploaded images similar to the one - ' provided based on simple intensities and uses the median frame of - ' animations; very low contrast images (such as sketches) will produce - ' poor results and, regardless of contrast, results may include seemingly - ' random images that look very different. + ' Advanced image similarity search. Finds uploaded images similar to the one + ' provided based on perceptual features and uses the median frame of + ' animations. .image-other #js-image-upload-previews @@ -26,14 +24,7 @@ h1 Reverse Search .field-error-js.hidden.js-scraper - h4 Optional settings - - .field - = label f, :distance, "Match distance (suggested values: between 0.2 and 0.5)" - br - = number_input f, :distance, min: 0, max: 1, step: 0.01, class: "input" - = error_tag f, :distance - + = hidden_input f, :limit, value: @conn.assigns.image_pagination.page_size = error_tag f, :limit .field diff --git a/mix.lock b/mix.lock index 288b5a248..969af3bdd 100644 --- a/mix.lock +++ b/mix.lock @@ -63,7 +63,7 @@ "redix": {:hex, :redix, "1.5.1", "a2386971e69bf23630fb3a215a831b5478d2ee7dc9ea7ac811ed89186ab5d7b7", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:nimble_options, "~> 0.5.0 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "85224eb2b683c516b80d472eb89b76067d5866913bf0be59d646f550de71f5c4"}, "remote_ip": {:hex, :remote_ip, "1.2.0", "fb078e12a44414f4cef5a75963c33008fe169b806572ccd17257c208a7bc760f", [:mix], [{:combine, "~> 0.10", [hex: :combine, repo: "hexpm", optional: false]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm", "2ff91de19c48149ce19ed230a81d377186e4412552a597d6a5137373e5877cb7"}, "req": {:hex, :req, "0.5.8", "50d8d65279d6e343a5e46980ac2a70e97136182950833a1968b371e753f6a662", [:mix], [{:brotli, "~> 0.3.1", [hex: :brotli, repo: "hexpm", optional: true]}, {:ezstd, "~> 1.0", [hex: :ezstd, repo: "hexpm", optional: true]}, {:finch, "~> 0.17", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mime, "~> 2.0.6 or ~> 2.1", [hex: :mime, repo: "hexpm", optional: false]}, {:nimble_csv, "~> 1.0", [hex: :nimble_csv, repo: "hexpm", optional: true]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "d7fc5898a566477e174f26887821a3c5082b243885520ee4b45555f5d53f40ef"}, - "rustler": {:hex, :rustler, "0.35.0", "1e2e379e1150fab9982454973c74ac9899bd0377b3882166ee04127ea613b2d9", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:req, "~> 0.5", [hex: :req, repo: "hexpm", optional: false]}, {:toml, "~> 0.6", [hex: :toml, repo: "hexpm", optional: false]}], "hexpm", "a176bea1bb6711474f9dfad282066f2b7392e246459bf4e29dfff6d828779fdf"}, + "rustler": {:hex, :rustler, "0.35.1", "ec81961ef9ee833d721dafb4449cab29b16b969a3063a842bb9e3ea912f6b938", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:req, "~> 0.5", [hex: :req, repo: "hexpm", optional: false]}, {:toml, "~> 0.6", [hex: :toml, repo: "hexpm", optional: false]}], "hexpm", "3713b2e70e68ec2bfa8291dfd9cb811fe64a770f254cd9c331f8b34fa7989115"}, "scrivener": {:hex, :scrivener, "2.7.2", "1d913c965ec352650a7f864ad7fd8d80462f76a32f33d57d1e48bc5e9d40aba2", [:mix], [], "hexpm", "7866a0ec4d40274efbee1db8bead13a995ea4926ecd8203345af8f90d2b620d9"}, "scrivener_ecto": {:git, "https://github.com/krns/scrivener_ecto.git", "eaad1ddd86a9c8ffa422479417221265a0673777", [ref: "eaad1ddd86a9c8ffa422479417221265a0673777"]}, "secure_compare": {:hex, :secure_compare, "0.1.0", "01b3c93c8edb696e8a5b38397ed48e10958c8a5ec740606656445bcbec0aadb8", [:mix], [], "hexpm", "6391a49eb4a6182f0d7425842fc774bbed715e78b2bfb0c83b99c94e02c78b5c"}, diff --git a/native/philomena/Cargo.lock b/native/philomena/Cargo.lock index 72be95b48..ec1ac5818 100644 --- a/native/philomena/Cargo.lock +++ b/native/philomena/Cargo.lock @@ -2,12 +2,32 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "addr2line" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" +dependencies = [ + "gimli", +] + [[package]] name = "adler2" version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" +[[package]] +name = "aes" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", +] + [[package]] name = "aho-corasick" version = "1.1.3" @@ -17,6 +37,61 @@ dependencies = [ "memchr", ] +[[package]] +name = "anstream" +version = "0.6.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" + +[[package]] +name = "anstyle-parse" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" +dependencies = [ + "anstyle", + "windows-sys 0.59.0", +] + +[[package]] +name = "anyhow" +version = "1.0.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" + [[package]] name = "arbitrary" version = "1.4.1" @@ -26,17 +101,91 @@ dependencies = [ "derive_arbitrary", ] +[[package]] +name = "async-trait" +version = "0.1.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f934833b4b7233644e5848f235df3f57ed8c80f1528a26c3dfa13d2147fa056" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "autocfg" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + +[[package]] +name = "backtrace" +version = "0.3.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets", +] + [[package]] name = "base64" version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "base64ct" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "bon" -version = "3.3.0" +version = "3.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f265cdb2e8501f1c952749e78babe8f1937be92c98120e5f78fc72d634682bad" +checksum = "fe7acc34ff59877422326db7d6f2d845a582b16396b6b08194942bf34c6528ab" dependencies = [ "bon-macros", "rustversion", @@ -44,9 +193,9 @@ dependencies = [ [[package]] name = "bon-macros" -version = "3.3.0" +version = "3.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38aa5c627cd7706490e5b003d685f8b9d69bc343b1a00b9fdd01e75fdf6827cf" +checksum = "4159dd617a7fbc9be6a692fe69dc2954f8e6bb6bb5e4d7578467441390d77fd0" dependencies = [ "darling", "ident_case", @@ -63,28 +212,68 @@ version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" +[[package]] +name = "bytemuck" +version = "1.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef657dfab802224e671f5818e9a4935f9b1957ed18e58292690cc39e7a4092a3" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "byteorder-lite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" + [[package]] name = "bytes" version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b" +[[package]] +name = "bzip2" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" +dependencies = [ + "bzip2-sys", + "libc", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.11+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "caseless" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "808dab3318747be122cb31d36de18d4d1c81277a76f8332a02b81a3d73463d7f" +checksum = "8b6fd507454086c8edfd769ca6ada439193cdb209c7681712ef6275cccbfe5d8" dependencies = [ - "regex", "unicode-normalization", ] [[package]] name = "cc" -version = "1.2.3" +version = "1.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27f657647bcff5394bf56c7317665bbf790a137a50eaaa5c6bfbb9e27a518f2d" +checksum = "a012a0df96dd6d06ba9a1b29d6402d1a5d77c6befd2566afdc26e10603dc93d7" dependencies = [ + "jobserver", + "libc", "shlex", ] @@ -94,6 +283,62 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common", + "inout", +] + +[[package]] +name = "clap" +version = "4.5.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9560b07a799281c7e0958b9296854d6fafd4c5f31444a7e5bb1ad6dde5ccf1bd" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "874e0dd3eb68bf99058751ac9712f622e61e6f393a94f7128fa26e3f02f5c7cd" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54b755194d6389280185988721fffba69495eed5ee9feeee9a599b53db80318c" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" + +[[package]] +name = "colorchoice" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" + [[package]] name = "comrak" version = "0.29.0" @@ -110,6 +355,21 @@ dependencies = [ "unicode_categories", ] +[[package]] +name = "constant_time_eq" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" + +[[package]] +name = "cpufeatures" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b80225097f2e5ae4e7179dd2266824648f3e2f49d9134d584b76389d31c4c3" +dependencies = [ + "libc", +] + [[package]] name = "crc32fast" version = "1.4.2" @@ -121,9 +381,25 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.20" +version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] [[package]] name = "darling" @@ -160,6 +436,15 @@ dependencies = [ "syn", ] +[[package]] +name = "deranged" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" +dependencies = [ + "powerfmt", +] + [[package]] name = "derive_arbitrary" version = "1.4.1" @@ -177,6 +462,17 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "339544cc9e2c4dc3fc7149fd630c5f22263a4fdf18a98afd0075784968b5cf00" +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", + "subtle", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -188,18 +484,98 @@ dependencies = [ "syn", ] +[[package]] +name = "educe" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4bd92664bf78c4d3dba9b7cdafce6fa15b13ed3ed16175218196942e99168a8" +dependencies = [ + "enum-ordinalize", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "entities" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5320ae4c3782150d900b79807611a59a99fc9a1d61d686faafc24b93fc8d7ca" +[[package]] +name = "enum-ordinalize" +version = "4.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea0dcfa4e54eeb516fe454635a95753ddd39acda650ce703031c6973e315dd5" +dependencies = [ + "enum-ordinalize-derive", +] + +[[package]] +name = "enum-ordinalize-derive" +version = "4.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d28318a75d4aead5c4db25382e8ef717932d0346600cacae6357eb5941bc5ff" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "env_filter" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" +dependencies = [ + "log", + "regex", +] + +[[package]] +name = "env_logger" +version = "0.11.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcaee3d8e3cfc3fd92428d477bc97fc29ec8716d180c0d74c643bb26166660e0" +dependencies = [ + "anstream", + "anstyle", + "env_filter", + "humantime", + "log", +] + [[package]] name = "equivalent" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +[[package]] +name = "errno" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" +dependencies = [ + "libc", + "windows-sys 0.59.0", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "fdeflate" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e6853b52649d4ac5c0bd02320cddc5ba956bdb407c4b75a2c6b75bf51500f8c" +dependencies = [ + "simd-adler32", +] + [[package]] name = "flate2" version = "1.0.35" @@ -226,52 +602,193 @@ dependencies = [ ] [[package]] -name = "hashbrown" -version = "0.15.2" +name = "futures" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] [[package]] -name = "heck" -version = "0.5.0" +name = "futures-channel" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +dependencies = [ + "futures-core", + "futures-sink", +] [[package]] -name = "http" -version = "0.2.12" +name = "futures-core" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" -dependencies = [ - "bytes", - "fnv", - "itoa", -] +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" [[package]] -name = "icu_collections" -version = "1.5.0" +name = "futures-executor" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" dependencies = [ - "displaydoc", - "yoke", - "zerofrom", - "zerovec", + "futures-core", + "futures-task", + "futures-util", ] [[package]] -name = "icu_locid" -version = "1.5.0" +name = "futures-io" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" -dependencies = [ - "displaydoc", - "litemap", - "tinystr", - "writeable", - "zerovec", -] +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "gimli" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" + +[[package]] +name = "half" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +dependencies = [ + "cfg-if", + "crunchy", +] + +[[package]] +name = "hashbrown" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest", +] + +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] [[package]] name = "icu_locid_transform" @@ -393,6 +910,20 @@ dependencies = [ "icu_properties", ] +[[package]] +name = "image" +version = "0.25.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd6f44aed642f18953a158afeb30206f4d50da59fbc66ecb53c66488de73563b" +dependencies = [ + "bytemuck", + "byteorder-lite", + "num-traits", + "png", + "zune-core", + "zune-jpeg", +] + [[package]] name = "indexmap" version = "2.7.0" @@ -403,11 +934,29 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "inout" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5" +dependencies = [ + "generic-array", +] + [[package]] name = "inventory" -version = "0.3.15" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b31349d02fe60f80bbbab1a9402364cad7460626d6030494b08ac4a2075bf81" +dependencies = [ + "rustversion", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f958d3d68f4167080a18141e10381e7634563984a537f2a49a30fd8e53ac5767" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" [[package]] name = "itoa" @@ -435,6 +984,15 @@ dependencies = [ "libc", ] +[[package]] +name = "jobserver" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" +dependencies = [ + "libc", +] + [[package]] name = "js-sys" version = "0.3.76" @@ -445,11 +1003,17 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + [[package]] name = "libc" -version = "0.2.168" +version = "0.2.169" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aaeb2981e0606ca11d79718f8bb01164f1d6ed75080182d3abf017e6d244b6d" +checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" [[package]] name = "libloading" @@ -461,12 +1025,28 @@ dependencies = [ "windows-targets", ] +[[package]] +name = "linux-raw-sys" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" + [[package]] name = "litemap" version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + [[package]] name = "lockfree-object-pool" version = "0.1.6" @@ -479,6 +1059,51 @@ version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +[[package]] +name = "matrixmultiply" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9380b911e3e96d10c1f415da0876389aaf1b56759054eeb0de7df940c456ba1a" +dependencies = [ + "autocfg", + "rawpointer", +] + +[[package]] +name = "mediaproc" +version = "0.1.0" +dependencies = [ + "once_cell", + "serde", + "tarpc", + "tokio", +] + +[[package]] +name = "mediaproc_client" +version = "0.1.0" +dependencies = [ + "clap", + "mediaproc", + "tokio", +] + +[[package]] +name = "mediaproc_server" +version = "0.1.0" +dependencies = [ + "clap", + "env_logger", + "futures", + "image", + "mediaproc", + "tarpc", + "tch", + "tempfile", + "tokio", + "tracing", +] + [[package]] name = "memchr" version = "2.7.4" @@ -487,11 +1112,78 @@ checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "miniz_oxide" -version = "0.8.0" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +checksum = "4ffbe83022cedc1d264172192511ae958937694cd57ce297164951b8b3568394" dependencies = [ "adler2", + "simd-adler32", +] + +[[package]] +name = "mio" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.52.0", +] + +[[package]] +name = "ndarray" +version = "0.15.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32" +dependencies = [ + "matrixmultiply", + "num-complex", + "num-integer", + "num-traits", + "rawpointer", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "object" +version = "0.36.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" +dependencies = [ + "memchr", ] [[package]] @@ -500,6 +1192,89 @@ version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" +[[package]] +name = "opentelemetry" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "570074cc999d1a58184080966e5bd3bf3a9a4af650c3b05047c2621e7405cd17" +dependencies = [ + "futures-core", + "futures-sink", + "js-sys", + "once_cell", + "pin-project-lite", + "thiserror 1.0.69", +] + +[[package]] +name = "opentelemetry-semantic-conventions" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cefe0543875379e47eb5f1e68ff83f45cc41366a92dfd0d073d513bf68e9a05" + +[[package]] +name = "opentelemetry_sdk" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2c627d9f4c9cdc1f21a29ee4bfbd6028fcb8bcf2a857b43f3abdf72c9c862f3" +dependencies = [ + "async-trait", + "futures-channel", + "futures-executor", + "futures-util", + "once_cell", + "opentelemetry", + "percent-encoding", + "rand", + "thiserror 1.0.69", +] + +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + +[[package]] +name = "password-hash" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7676374caaee8a325c9e7a2ae557f216c5563a171d6997b0ef8a65af35147700" +dependencies = [ + "base64ct", + "rand_core", + "subtle", +] + +[[package]] +name = "pbkdf2" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83a0692ec44e4cf1ef28ca317f14f8f07da2d95ec3fa01f86e4467b725e60917" +dependencies = [ + "digest", + "hmac", + "password-hash", + "sha2", +] + [[package]] name = "percent-encoding" version = "2.3.1" @@ -510,229 +1285,757 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" name = "philomena" version = "0.3.0" dependencies = [ - "base64", + "base64 0.21.7", "comrak", "http", "jemallocator", + "mediaproc", "once_cell", "regex", - "ring", + "ring 0.16.20", "rustler", + "tokio", "url", - "zip", + "zip 2.2.2", ] [[package]] -name = "prettyplease" -version = "0.2.25" +name = "pin-project" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e2ec53ad785f4d35dac0adea7f7dc6f1bb277ad84a680c7afefeae05d1f5916" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64d1ec885c64d0457d564db4ec299b2dae3f9c02808b8ad9c3a089c591b18033" +checksum = "d56a66c0c55993aa927429d0f8a0abfd74f084e4d9c192cffed01e418d83eefb" dependencies = [ "proc-macro2", + "quote", "syn", ] [[package]] -name = "proc-macro2" -version = "1.0.92" +name = "pin-project-lite" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" + +[[package]] +name = "png" +version = "0.17.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82151a2fc869e011c153adc57cf2789ccb8d9906ce52c0b39a6b5697749d7526" dependencies = [ - "unicode-ident", + "bitflags 1.3.2", + "crc32fast", + "fdeflate", + "flate2", + "miniz_oxide", ] [[package]] -name = "quote" -version = "1.0.37" +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "ppv-lite86" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "prettyplease" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "483f8c21f64f3ea09fe0f30f5d48c3e8eefe5dac9129f0075f76593b4c1da705" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rawpointer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" + +[[package]] +name = "redox_syscall" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" +dependencies = [ + "bitflags 2.6.0", +] + +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-lite" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a" + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + +[[package]] +name = "ring" +version = "0.16.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" +dependencies = [ + "cc", + "libc", + "once_cell", + "spin 0.5.2", + "untrusted 0.7.1", + "web-sys", + "winapi", +] + +[[package]] +name = "ring" +version = "0.17.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" +dependencies = [ + "cc", + "cfg-if", + "getrandom", + "libc", + "spin 0.9.8", + "untrusted 0.9.0", + "windows-sys 0.52.0", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" + +[[package]] +name = "rustix" +version = "0.38.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a78891ee6bf2340288408954ac787aa063d8e8817e9f53abb37c695c6d834ef6" +dependencies = [ + "bitflags 2.6.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.59.0", +] + +[[package]] +name = "rustler" +version = "0.35.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a9f6bb374bf0a1431cff92ee6a89e39b9978fa4dbccc4137605be4ed5118779" +dependencies = [ + "inventory", + "libloading", + "regex-lite", + "rustler_codegen", +] + +[[package]] +name = "rustler_codegen" +version = "0.35.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7914359a19fff34b2f0e9d4b7d4b02ca5ee597e52804c7472802b25e1ec543e" +dependencies = [ + "heck", + "inventory", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "rustls" +version = "0.23.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5065c3f250cbd332cd894be57c40fa52387247659b14a2d6041d121547903b1b" +dependencies = [ + "log", + "once_cell", + "ring 0.17.8", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-pki-types" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2bf47e6ff922db3825eb750c4e2ff784c6ff8fb9e13046ef6a1d1c5401b0b37" + +[[package]] +name = "rustls-webpki" +version = "0.102.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" +dependencies = [ + "ring 0.17.8", + "rustls-pki-types", + "untrusted 0.9.0", +] + +[[package]] +name = "rustversion" +version = "1.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4" + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "safetensors" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d93279b86b3de76f820a8854dd06cbc33cfa57a417b19c47f6a25280112fb1df" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "serde" +version = "1.0.217" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.217" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.135" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b0d7ba2887406110130a978386c4e1befb98c674b4fba677954e4db976630d9" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "sha2" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "signal-hook-registry" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" +dependencies = [ + "libc", +] + +[[package]] +name = "simd-adler32" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" + +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + +[[package]] +name = "slug" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "882a80f72ee45de3cc9a5afeb2da0331d58df69e4e7d8eeb5d3c7784ae67e724" +dependencies = [ + "deunicode", + "wasm-bindgen", +] + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "socket2" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "spin" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" + +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + +[[package]] +name = "syn" +version = "2.0.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46f71c0377baf4ef1cc3e3402ded576dccc315800fbc62dfc7fe04b009773b4a" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "synstructure" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2", + "quote", + "syn", ] [[package]] -name = "regex" -version = "1.11.1" +name = "tarpc" +version = "0.35.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +checksum = "14d1be17be018ebeec4c489449adb5ef227746775974c311ce79e09886ef83c7" dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", + "anyhow", + "fnv", + "futures", + "humantime", + "opentelemetry", + "opentelemetry-semantic-conventions", + "pin-project", + "rand", + "serde", + "static_assertions", + "tarpc-plugins", + "thiserror 1.0.69", + "tokio", + "tokio-serde", + "tokio-util", + "tracing", + "tracing-opentelemetry", ] [[package]] -name = "regex-automata" -version = "0.4.9" +name = "tarpc-plugins" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +checksum = "e0e3d9676af494694e11a3e367a4bfa7f6d1d5566bd0fe9aceb4aa9281122ab8" dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", + "proc-macro2", + "quote", + "syn", ] [[package]] -name = "regex-lite" -version = "0.1.6" +name = "tch" +version = "0.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a" +checksum = "bb3500c87ef72447c23b33ed6f15fac45a616b09bcac53e62e0e4386bddb3b9d" +dependencies = [ + "half", + "lazy_static", + "libc", + "ndarray", + "rand", + "safetensors", + "thiserror 1.0.69", + "torch-sys", + "zip 0.6.6", +] [[package]] -name = "regex-syntax" -version = "0.8.5" +name = "tempfile" +version = "3.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +checksum = "9a8a559c81686f576e8cd0290cd2a24a2a9ad80c98b3478856500fcbd7acd704" +dependencies = [ + "cfg-if", + "fastrand", + "getrandom", + "once_cell", + "rustix", + "windows-sys 0.59.0", +] [[package]] -name = "ring" -version = "0.16.20" +name = "thiserror" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "cc", - "libc", - "once_cell", - "spin", - "untrusted", - "web-sys", - "winapi", + "thiserror-impl 1.0.69", ] [[package]] -name = "rustler" -version = "0.35.0" +name = "thiserror" +version = "2.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b705f2c3643cc170d8888cb6bad589155d9c0248f3104ef7a04c2b7ffbaf13fc" +checksum = "f072643fd0190df67a8bab670c20ef5d8737177d6ac6b2e9a236cb096206b2cc" dependencies = [ - "inventory", - "libloading", - "regex-lite", - "rustler_codegen", + "thiserror-impl 2.0.9", ] [[package]] -name = "rustler_codegen" -version = "0.35.0" +name = "thiserror-impl" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ad56caff00562948bd6ac33c18dbc579e5a1bbee2d7f2f54073307e57f6b57a" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ - "heck", - "inventory", "proc-macro2", "quote", "syn", ] [[package]] -name = "rustversion" -version = "1.0.18" +name = "thiserror-impl" +version = "2.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" +checksum = "7b50fa271071aae2e6ee85f842e2e28ba8cd2c5fb67f11fcb1fd70b276f9e7d4" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] -name = "serde" -version = "1.0.215" +name = "thread_local" +version = "1.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" +checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" dependencies = [ - "serde_derive", + "cfg-if", + "once_cell", ] [[package]] -name = "serde_derive" -version = "1.0.215" +name = "time" +version = "0.3.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" +checksum = "35e7868883861bd0e56d9ac6efcaaca0d6d5d82a2a7ec8209ff492c07cf37b21" dependencies = [ - "proc-macro2", - "quote", - "syn", + "deranged", + "num-conv", + "powerfmt", + "serde", + "time-core", ] [[package]] -name = "shlex" -version = "1.3.0" +name = "time-core" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" [[package]] -name = "simd-adler32" -version = "0.3.7" +name = "tinystr" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +dependencies = [ + "displaydoc", + "zerovec", +] [[package]] -name = "slug" -version = "0.1.6" +name = "tinyvec" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "882a80f72ee45de3cc9a5afeb2da0331d58df69e4e7d8eeb5d3c7784ae67e724" +checksum = "022db8904dfa342efe721985167e9fcd16c29b226db4397ed752a761cfce81e8" dependencies = [ - "deunicode", - "wasm-bindgen", + "tinyvec_macros", ] [[package]] -name = "smallvec" -version = "1.13.2" +name = "tinyvec_macros" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] -name = "spin" -version = "0.5.2" +name = "tokio" +version = "1.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" +checksum = "5cec9b21b0450273377fc97bd4c33a8acffc8c996c987a7c5b319a0083707551" +dependencies = [ + "backtrace", + "bytes", + "libc", + "mio", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "socket2", + "tokio-macros", + "windows-sys 0.52.0", +] [[package]] -name = "stable_deref_trait" -version = "1.2.0" +name = "tokio-macros" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] -name = "strsim" -version = "0.11.1" +name = "tokio-serde" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +checksum = "caf600e7036b17782571dd44fa0a5cea3c82f60db5137f774a325a76a0d6852b" +dependencies = [ + "bincode", + "bytes", + "educe", + "futures-core", + "futures-sink", + "pin-project", + "serde", + "serde_json", +] [[package]] -name = "syn" -version = "2.0.90" +name = "tokio-util" +version = "0.7.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "919d3b74a5dd0ccd15aeb8f93e7006bd9e14c295087c9896a110f490752bcf31" +checksum = "d7fcaa8d55a2bdd6b83ace262b016eca0d79ee02818c5c1bcdf0305114081078" dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "slab", + "tokio", ] [[package]] -name = "synstructure" -version = "0.13.1" +name = "torch-sys" +version = "0.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +checksum = "61b87ed41261d4278060c3ba3e735c224687cf312403e4565f2ca75310279d73" dependencies = [ - "proc-macro2", - "quote", - "syn", + "anyhow", + "cc", + "libc", + "serde", + "serde_json", + "ureq", + "zip 0.6.6", ] [[package]] -name = "thiserror" -version = "2.0.6" +name = "tracing" +version = "0.1.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fec2a1820ebd077e2b90c4df007bebf344cd394098a13c563957d0afc83ea47" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ - "thiserror-impl", + "log", + "pin-project-lite", + "tracing-attributes", + "tracing-core", ] [[package]] -name = "thiserror-impl" -version = "2.0.6" +name = "tracing-attributes" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d65750cab40f4ff1929fb1ba509e9914eb756131cef4210da8d5d700d26f6312" +checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", @@ -740,29 +2043,41 @@ dependencies = [ ] [[package]] -name = "tinystr" -version = "0.7.6" +name = "tracing-core" +version = "0.1.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" dependencies = [ - "displaydoc", - "zerovec", + "once_cell", + "valuable", ] [[package]] -name = "tinyvec" -version = "1.8.0" +name = "tracing-opentelemetry" +version = "0.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938" +checksum = "dc58af5d3f6c5811462cabb3289aec0093f7338e367e5a33d28c0433b3c7360b" dependencies = [ - "tinyvec_macros", + "js-sys", + "once_cell", + "opentelemetry", + "opentelemetry_sdk", + "tracing", + "tracing-core", + "tracing-subscriber", + "web-time", ] [[package]] -name = "tinyvec_macros" -version = "0.1.1" +name = "tracing-subscriber" +version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" +checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" +dependencies = [ + "sharded-slab", + "thread_local", + "tracing-core", +] [[package]] name = "typed-arena" @@ -770,6 +2085,12 @@ version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + [[package]] name = "unicode-ident" version = "1.0.14" @@ -797,6 +2118,30 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "ureq" +version = "2.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d" +dependencies = [ + "base64 0.22.1", + "flate2", + "log", + "once_cell", + "rustls", + "rustls-pki-types", + "serde", + "serde_json", + "url", + "webpki-roots", +] + [[package]] name = "url" version = "2.5.4" @@ -820,6 +2165,30 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + [[package]] name = "wasm-bindgen" version = "0.2.99" @@ -884,6 +2253,25 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "webpki-roots" +version = "0.26.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d642ff16b7e79272ae451b7322067cdc17cadf68c23264be9d94a32319efe7e" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "winapi" version = "0.3.9" @@ -906,6 +2294,24 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-targets" version = "0.52.6" @@ -1006,6 +2412,27 @@ dependencies = [ "synstructure", ] +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "byteorder", + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "zerofrom" version = "0.1.5" @@ -1027,6 +2454,12 @@ dependencies = [ "synstructure", ] +[[package]] +name = "zeroize" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" + [[package]] name = "zerovec" version = "0.10.4" @@ -1051,9 +2484,29 @@ dependencies = [ [[package]] name = "zip" -version = "2.2.1" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "760394e246e4c28189f19d488c058bf16f564016aefac5d32bb1f3b51d5e9261" +dependencies = [ + "aes", + "byteorder", + "bzip2", + "constant_time_eq", + "crc32fast", + "crossbeam-utils", + "flate2", + "hmac", + "pbkdf2", + "sha1", + "time", + "zstd", +] + +[[package]] +name = "zip" +version = "2.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99d52293fc86ea7cf13971b3bb81eb21683636e7ae24c729cdaf1b7c4157a352" +checksum = "ae9c1ea7b3a5e1f4b922ff856a129881167511563dc219869afe3787fc0c1a45" dependencies = [ "arbitrary", "crc32fast", @@ -1062,7 +2515,7 @@ dependencies = [ "flate2", "indexmap", "memchr", - "thiserror", + "thiserror 2.0.9", "zopfli", ] @@ -1079,3 +2532,47 @@ dependencies = [ "once_cell", "simd-adler32", ] + +[[package]] +name = "zstd" +version = "0.11.2+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "5.0.2+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" +dependencies = [ + "libc", + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.13+zstd.1.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa" +dependencies = [ + "cc", + "pkg-config", +] + +[[package]] +name = "zune-core" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f423a2c17029964870cfaabb1f13dfab7d092a62a29a89264f4d36990ca414a" + +[[package]] +name = "zune-jpeg" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99a5bab8d7dedf81405c4bb1f2b83ea057643d9cb28778cea9eecddeedd2e028" +dependencies = [ + "zune-core", +] diff --git a/native/philomena/Cargo.toml b/native/philomena/Cargo.toml index 0e42330e3..3a9fec3fb 100644 --- a/native/philomena/Cargo.toml +++ b/native/philomena/Cargo.toml @@ -9,15 +9,25 @@ name = "philomena" path = "src/lib.rs" crate-type = ["dylib"] +[workspace] +members = [ + "mediaproc", + "mediaproc_client", + "mediaproc_server", +] +default-members = ["mediaproc"] + [dependencies] base64 = "0.21" comrak = { git = "https://github.com/philomena-dev/comrak", branch = "philomena-0.29.2", default-features = false } http = "0.2" jemallocator = { version = "0.5.0", features = ["disable_initial_exec_tls"] } +mediaproc = { path = "./mediaproc" } once_cell = "1.20" regex = "1" ring = "0.16" rustler = "0.35" +tokio = { version = "1.0", features = ["full"] } url = "2.5" zip = { version = "2.2.0", features = ["deflate"], default-features = false } diff --git a/native/philomena/mediaproc/Cargo.toml b/native/philomena/mediaproc/Cargo.toml new file mode 100644 index 000000000..827294642 --- /dev/null +++ b/native/philomena/mediaproc/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "mediaproc" +version = "0.1.0" +edition = "2021" + +[dependencies] +once_cell = "1.20" +serde = { version = "1.0", features = ["derive"] } +tarpc = { version = "0.35", features = ["full"] } +tokio = { version = "1.0", features = ["full"] } diff --git a/native/philomena/mediaproc/src/client.rs b/native/philomena/mediaproc/src/client.rs new file mode 100644 index 000000000..90d86073e --- /dev/null +++ b/native/philomena/mediaproc/src/client.rs @@ -0,0 +1,150 @@ +use std::collections::{HashMap, HashSet}; +use std::ffi::OsString; +use std::path::Path; +use std::time::{Duration, Instant}; + +use crate::{CommandReply, ExecuteCommandError, FileMap, MediaProcessorClient}; +use once_cell::sync::Lazy; +use tarpc::context::Context; + +#[derive(Default)] +struct CallParameters { + /// Mapping from replaced name to original name. + replacements: HashMap, + /// List of post-processed arguments. + arguments: Vec, + /// Mapping of replaced name to file contents. + file_map: FileMap, +} + +/// List of file extensions which can be forwarded. +static FORWARDED_EXTS: Lazy> = Lazy::new(|| { + vec![ + "gif", "jpg", "jpeg", "png", "svg", "webm", "webp", "mp4", "icc", + ] + .into_iter() + .map(Into::into) + .collect() +}); + +fn forwarded_ext(path: &Path) -> Option<&str> { + match path.extension() { + Some(ext) if FORWARDED_EXTS.contains(ext) => ext.to_str(), + _ => None, + } +} + +fn create_replacements(arguments: impl Iterator) -> CallParameters { + use std::fs::read; + + // Maps original name to replaced name. + let mut processed = HashMap::::new(); + let mut counter: usize = 0; + + let mut output = CallParameters::default(); + + output.arguments = arguments + .map(|arg| { + let path = Path::new(&arg); + + // Avoid adding additional replacements if the same file is passed multiple times. + if let Some(replaced_name) = processed.get(&arg) { + return replaced_name.clone(); + } + + // Only try things that look like paths. + if !path.is_absolute() { + return arg; + } + + // Don't forward paths that don't exist or can't be read. + let Ok(contents) = read(path) else { + return arg; + }; + + // Only forward extension if extension is in allow list. + let replaced_name = match forwarded_ext(path) { + Some(ext) => format!("{}.{}", counter, ext), + None => format!("{}", counter), + }; + + counter = counter.saturating_add(1); + + processed.insert(arg.clone(), replaced_name.clone()); // original -> replaced + output.replacements.insert(replaced_name.clone(), arg); // replaced -> original + output.file_map.insert(replaced_name.clone(), contents); // replaced -> [contents] + + replaced_name + }) + .collect(); + + output +} + +fn update_replacements( + replacements: HashMap, + file_map: FileMap, +) -> Result<(), ExecuteCommandError> { + use std::fs::write; + + for (replaced_name, contents) in file_map { + let original_name = replacements + .get(&replaced_name) + .ok_or(ExecuteCommandError::InvalidFileMapName)?; + + write(original_name, contents).map_err(|_| ExecuteCommandError::LocalFilesystemError)?; + } + + Ok(()) +} + +pub fn context_with_deadline(secs_from_now: u64) -> Context { + let mut context = Context::current(); + context.deadline = Instant::now() + Duration::from_secs(secs_from_now); + context +} + +pub fn context_with_1_hour_deadline() -> Context { + context_with_deadline(60 * 60) +} + +pub fn context_with_10_second_deadline() -> Context { + context_with_deadline(10) +} + +pub async fn execute_command( + client: &MediaProcessorClient, + program: String, + arguments: Vec, + ctx: Context, +) -> Result { + let call_params = create_replacements(arguments.into_iter()); + let (reply, file_map) = client + .execute_command(ctx, program, call_params.arguments, call_params.file_map) + .await + .map_err(|_| ExecuteCommandError::UnknownError)??; + + update_replacements(call_params.replacements, file_map)?; + + Ok(reply) +} + +pub async fn connect_to_socket_server(server_addr: &str) -> Option { + let codec = tarpc::tokio_serde::formats::Bincode::default; + + for addr in tokio::net::lookup_host(server_addr).await.ok()? { + let mut transport = tarpc::serde_transport::tcp::connect(addr, codec); + transport.config_mut().max_frame_length(usize::MAX); + + let transport = match transport.await { + Ok(transport) => transport, + _ => continue, + }; + + return Some( + MediaProcessorClient::new(tarpc::client::Config::default(), transport).spawn(), + ); + } + + None +} diff --git a/native/philomena/mediaproc/src/lib.rs b/native/philomena/mediaproc/src/lib.rs new file mode 100644 index 000000000..10faf7c48 --- /dev/null +++ b/native/philomena/mediaproc/src/lib.rs @@ -0,0 +1,82 @@ +use once_cell::sync::Lazy; +use serde::{Deserialize, Serialize}; +use std::collections::{HashMap, HashSet}; + +pub mod client; + +#[tarpc::service] +pub trait MediaProcessor { + /// Executes a command on the media processor server. + async fn execute_command( + program: String, + arguments: Vec, + file_map: FileMap, + ) -> Result<(CommandReply, FileMap), ExecuteCommandError>; + + /// Runs feature extraction on an image file bytes (PNG or JPEG). + async fn get_features(image: Vec) -> Result, FeatureExtractionError>; +} + +/// Errors which can occur during command execution. +#[derive(Debug, Deserialize, Serialize)] +pub enum ExecuteCommandError { + /// Failed to connect to server. + ConnectionError, + /// Requested program was not allowed to be executed. + UnpermittedProgram(String), + /// Failed to launch program. + ExecutionError, + /// File map name character was not allowed ('..', '/', '\\'). + InvalidFileMapName, + /// Generic filesystem error. + RemoteFilesystemError, + /// Generic filesystem error. + LocalFilesystemError, + /// Unknown error. + UnknownError, +} + +/// Errors which can occur during image feature extraction. +#[derive(Debug, Deserialize, Serialize)] +pub enum FeatureExtractionError { + /// Failed to connect to server. + ConnectionError, + /// Generic filesystem error. + LocalFilesystemError, + /// Unrecognized image format. + UnknownImageFormat, + /// Failed to decode the image. + ImageDecodeError, +} + +/// Enumeration of permitted program names. +pub static PERMITTED_PROGRAMS: Lazy> = Lazy::new(|| { + vec![ + "convert", + "ffprobe", + "ffmpeg", + "file", + "gifsicle", + "identify", + "image-intensities", + "jpegtran", + "mediastat", + "mediathumb", + "optipng", + "safe-rsvg-convert", + "svgstat", + ] + .into_iter() + .collect() +}); + +/// Mapping between file name and file contents. +pub type FileMap = HashMap>; + +/// Output reply after command execution has finished. +#[derive(Debug, Deserialize, Serialize)] +pub struct CommandReply { + pub status: u8, + pub stdout: Vec, + pub stderr: Vec, +} diff --git a/native/philomena/mediaproc_client/Cargo.toml b/native/philomena/mediaproc_client/Cargo.toml new file mode 100644 index 000000000..19c782974 --- /dev/null +++ b/native/philomena/mediaproc_client/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "mediaproc_client" +version = "0.1.0" +edition = "2021" + +[dependencies] +clap = { version = "4.5", features = ["derive"] } +mediaproc = { path = "../mediaproc" } +tokio = { version = "1.0", features = ["full"] } diff --git a/native/philomena/mediaproc_client/src/main.rs b/native/philomena/mediaproc_client/src/main.rs new file mode 100644 index 000000000..22949a41a --- /dev/null +++ b/native/philomena/mediaproc_client/src/main.rs @@ -0,0 +1,99 @@ +use std::io::Write; +use std::process::ExitCode; + +use clap::{Parser, Subcommand}; +use mediaproc::client; +use mediaproc::MediaProcessorClient; + +#[derive(Parser, Debug)] +#[command(version, about = "RPC Media Processor Client", long_about = None)] +struct Arguments { + /// Server address to connect to, like localhost:1500 + server_addr: String, + + /// Subcommand to execute. + #[command(subcommand)] + invocation_type: InvocationType, +} + +#[derive(Subcommand, Debug)] +enum InvocationType { + /// Execute a command with the given arguments on the remote server. + ExecuteCommand { + /// Program name to execute. + /// + /// One of convert, ffprobe, ffmpeg, file, gifsicle, identify, + /// image-intensities, jpegtran, mediastat, optipng, safe-rsvg-convert. + program: String, + /// Arguments to pass to program. + args: Vec, + }, + /// Get DINOv2 features from the given image file (PNG or JPEG). + ExtractFeatures { + /// Filename to extract from. + file_name: String, + }, +} + +#[tokio::main(flavor = "current_thread")] +async fn main() -> ExitCode { + let args = Arguments::parse(); + let client = client::connect_to_socket_server(&args.server_addr) + .await + .expect("failed to connect to server"); + + match args.invocation_type { + InvocationType::ExecuteCommand { program, args } => { + run_command_client(&client, program, args).await + } + InvocationType::ExtractFeatures { file_name } => { + run_feature_extraction_client(&client, file_name).await + } + } +} + +async fn run_command_client( + client: &MediaProcessorClient, + program: String, + args: Vec, +) -> ExitCode { + let ctx = client::context_with_1_hour_deadline(); + let reply = client::execute_command(client, program, args, ctx) + .await + .unwrap(); + + write_then_drop(std::io::stderr(), reply.stderr); + write_then_drop(std::io::stdout(), reply.stdout); + + reply.status.into() +} + +fn write_then_drop(mut stream: impl Write, data: Vec) { + stream.write_all(&data).unwrap() +} + +async fn run_feature_extraction_client( + client: &MediaProcessorClient, + file_name: String, +) -> ExitCode { + let image = std::fs::read(file_name).unwrap(); + let features = client + .get_features(client::context_with_10_second_deadline(), image) + .await + .unwrap() + .unwrap(); + + // Manual intersperse implementation, until rust adds it properly + let mut started = false; + for component in features { + if started { + print!(" {}", component); + } else { + print!("{}", component); + started = true; + } + } + println!(); + + ExitCode::SUCCESS +} diff --git a/native/philomena/mediaproc_server/Cargo.toml b/native/philomena/mediaproc_server/Cargo.toml new file mode 100644 index 000000000..38df0a7a9 --- /dev/null +++ b/native/philomena/mediaproc_server/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "mediaproc_server" +version = "0.1.0" +edition = "2021" + +[dependencies] +env_logger = "0.11" +clap = { version = "4.5", features = ["derive"] } +futures = "0.3" +image = { version = "0.25.2", default-features = false, features = ["jpeg", "png"] } +mediaproc = { path = "../mediaproc" } +tarpc = { version = "0.35", features = ["full"] } +tch = { version = "0.18.1", features = ["download-libtorch"] } +tempfile = "3" +tokio = { version = "1.0", features = ["full"] } +tracing = "0.1" diff --git a/native/philomena/mediaproc_server/src/command_server.rs b/native/philomena/mediaproc_server/src/command_server.rs new file mode 100644 index 000000000..0108ef2cb --- /dev/null +++ b/native/philomena/mediaproc_server/src/command_server.rs @@ -0,0 +1,63 @@ +use std::collections::HashSet; +use std::os::unix::process::ExitStatusExt; + +use mediaproc::{CommandReply, ExecuteCommandError, FileMap, PERMITTED_PROGRAMS}; +use tokio::process::Command; + +fn validate_name(name: &str) -> Result<(), ExecuteCommandError> { + if name == "." || name.contains("..") || name.contains('/') || name.contains('\\') { + return Err(ExecuteCommandError::InvalidFileMapName); + } + + Ok(()) +} + +pub async fn execute_command( + program: String, + arguments: Vec, + file_map: FileMap, +) -> Result<(CommandReply, FileMap), ExecuteCommandError> { + use std::fs::{read, write}; + + // Check program name. + if !PERMITTED_PROGRAMS.contains(&program.as_ref()) { + return Err(ExecuteCommandError::UnpermittedProgram(program)); + } + + // Create a new temporary directory which we will work in. + let dir = tempfile::tempdir().map_err(|_| ExecuteCommandError::RemoteFilesystemError)?; + + // Verify and write out all files. + let mut files = HashSet::::new(); + for (name, contents) in file_map { + validate_name(&name)?; + files.insert(name.clone()); + + let path = dir.path().join(name); + write(path, contents).map_err(|_| ExecuteCommandError::RemoteFilesystemError)?; + } + + // Run the command. + let output = Command::new(program) + .args(arguments) + .current_dir(dir.path()) + .output() + .await + .map_err(|_| ExecuteCommandError::ExecutionError)?; + + // Read back all files. + let mut file_map = FileMap::new(); + for name in files { + let path = dir.path().join(name.clone()); + let contents = read(path).map_err(|_| ExecuteCommandError::RemoteFilesystemError)?; + file_map.insert(name, contents); + } + + let reply = CommandReply { + status: output.status.into_raw() as u8, + stdout: output.stdout, + stderr: output.stderr, + }; + + Ok((reply, file_map)) +} diff --git a/native/philomena/mediaproc_server/src/dinov2.rs b/native/philomena/mediaproc_server/src/dinov2.rs new file mode 100644 index 000000000..d6c65b9ad --- /dev/null +++ b/native/philomena/mediaproc_server/src/dinov2.rs @@ -0,0 +1,106 @@ +use std::io::Cursor; +use tch::{CModule, Device, IValue, Tensor}; + +use super::io; +use crate::FeatureExtractionError; + +/// Each DINOv2 patch is 14x14 +pub const PATCH_DIM: i64 = 14; + +pub struct ModelResult { + pub patches: (i64, i64), + pub image: Tensor, + pub features: Tensor, + pub last_hidden_state: Tensor, +} + +fn infer(image: &Tensor, model: &CModule) -> (Tensor, Tensor) { + // These cases intentionally panic because their outputs depend on the model, + // not on the image file input, and invalid model format is not recoverable. + let output = model + .forward_is(&[IValue::Tensor(image.shallow_clone())]) + .unwrap(); + + let mut results = match output { + IValue::Tuple(elements) if elements.len() == 2 => elements, + _ => unreachable!("expected (last_hidden_state, pooler_output)"), + }; + + let mut results = results.drain(..); + + match (results.next(), results.next()) { + (Some(IValue::Tensor(last_hidden_state)), Some(IValue::Tensor(pooler_output))) => { + (last_hidden_state, pooler_output) + } + _ => unreachable!("expected 2-tuple of tensors"), + } +} + +fn scaled_result(pooler_output: &Tensor) -> Tensor { + let scaled_norm = pooler_output.norm().pow_tensor_scalar(-1); + pooler_output.multiply(&scaled_norm) +} + +pub fn get_model_result( + image: R, + model: &CModule, + device: Device, +) -> Result +where + R: std::io::Read + std::io::Seek, +{ + // Get image and and dimensions for calculation. + let image = io::load_image(image, device)?; + + // Features are unstable across different global scales, and + // somewhat stable across dimensional scales. + // + // Use 18 (252x252) instead of 16 (224x224) to produce a more detailed + // result and attention map at almost exactly the same computational cost. + // + // It is possible for highly non-square models to have meaningful feature extraction, + // but in practice it makes no difference identifying scales which keep the aspect + // ratio, and does not produce feature vectors which are similar enough to identify + // crops. + let image_scale = 1; + let patches = (18 * image_scale, 18 * image_scale); + + // Scale image into appropriate shape. + let image = io::resize_image_by_patch_count(image, patches, PATCH_DIM); + + // The pooler output is the [CLS] token generated by the model. + // It contains high-quality, robust features from the input image. + let (last_hidden_state, pooler_output) = infer(&image, model); + + Ok(ModelResult { + patches, + image, + features: scaled_result(&pooler_output.squeeze()), + last_hidden_state, + }) +} + +pub struct Executor { + device: Device, + model: CModule, +} + +impl Executor { + pub fn new(model_path: &str) -> Option { + let (device, model) = io::device_and_model(model_path)?; + Some(Self { device, model }) + } + + pub fn extract(&self, image: &[u8]) -> Result, FeatureExtractionError> { + let image = Cursor::new(image); + let model_result = get_model_result(image, &self.model, self.device)?; + let features = model_result + .features + .iter::() + .unwrap() + .map(|f| f as f32) + .collect(); + + Ok(features) + } +} diff --git a/native/philomena/mediaproc_server/src/io.rs b/native/philomena/mediaproc_server/src/io.rs new file mode 100644 index 000000000..43f8618b5 --- /dev/null +++ b/native/philomena/mediaproc_server/src/io.rs @@ -0,0 +1,100 @@ +use image::{DynamicImage, ImageBuffer, ImageReader, Pixel}; +use std::io::BufReader; +use tch::{CModule, Device, Tensor}; + +use crate::FeatureExtractionError; + +pub fn device_and_model(model_path: &str) -> Option<(Device, CModule)> { + let device = Device::cuda_if_available(); + let model = CModule::load_on_device(model_path, device).ok()?; + + Some((device, model)) +} + +fn into_tensor>( + image: ImageBuffer>, + device: Device, +) -> Tensor { + let w: i64 = image.width().into(); + let h: i64 = image.height().into(); + let c: i64 = P::CHANNEL_COUNT.into(); + + // Extra scope to ensure we eagerly drop the original image buffer + let pixels = { + let pixels: Vec = image.pixels().flat_map(|p| p.channels()).copied().collect(); + + Tensor::from_slice(&pixels) + }; + + pixels.to(device).reshape([h, w, c]).permute([2, 0, 1]) +} + +fn strip_transparency(image: DynamicImage, device: Device) -> Tensor { + let w: i64 = image.width().into(); + let h: i64 = image.height().into(); + + match image { + DynamicImage::ImageRgb8(..) + | DynamicImage::ImageLuma8(..) + | DynamicImage::ImageLuma16(..) + | DynamicImage::ImageRgb16(..) + | DynamicImage::ImageRgb32F(..) => { + return into_tensor(image.into_rgb32f(), device); + } + _ => {} + }; + + // Get channels. + let (alpha, color) = { + let pixels = into_tensor(image.into_rgba32f(), device); + let alpha = pixels.slice(0, 3, 4, 1).broadcast_to([3, h, w]); + let color = pixels.slice(0, 0, 3, 1); + + (alpha, color) + }; + + // Detect whether premultiplication should be applied by checking + // for channels with values above the alpha level. + // + // Note that the only input format which we can get where this would + // be relevant, PNG, explicitly says it does not carry premultiplied alpha, + // but many tools will store premultiplied alpha anyway... + let ones = Tensor::ones([3, h, w], (tch::Kind::Float, device)); + let mask = alpha.where_self(&color.gt_tensor(&alpha).any(), &ones); + let color = color.multiply(&mask); + + // Pure transparency is rescaled to be 8 steps blacker than black. + const ALPHA_LEVEL: f64 = 8.0 / 255.0; + const COLOR_LEVEL: f64 = 1.0 - ALPHA_LEVEL; + + // Unwrap is guaranteed safe because the dimensions and data type match + color + .multiply_scalar(COLOR_LEVEL) + .f_add(&alpha.multiply_scalar(ALPHA_LEVEL)) + .unwrap() +} + +pub fn load_image(image: R, device: Device) -> Result +where + R: std::io::Read + std::io::Seek, +{ + let image = BufReader::new(image); + let image = ImageReader::new(image) + .with_guessed_format() + .map_err(|_| FeatureExtractionError::UnknownImageFormat)? + .decode() + .map_err(|_| FeatureExtractionError::ImageDecodeError)?; + + Ok(strip_transparency(image, device)) +} + +fn resize_tensor(image: Tensor, size: (i64, i64)) -> Tensor { + image.upsample_bicubic2d([size.0, size.1], true, None, None) +} + +pub fn resize_image_by_patch_count(image: Tensor, patches: (i64, i64), patch_dim: i64) -> Tensor { + let height = patches.0 * patch_dim; + let width = patches.1 * patch_dim; + + resize_tensor(image.unsqueeze(0), (height, width)) +} diff --git a/native/philomena/mediaproc_server/src/main.rs b/native/philomena/mediaproc_server/src/main.rs new file mode 100644 index 000000000..0466fac99 --- /dev/null +++ b/native/philomena/mediaproc_server/src/main.rs @@ -0,0 +1,86 @@ +use std::net::SocketAddr; +use std::sync::Arc; + +use clap::Parser; +use dinov2::Executor; +use futures::{future, Future, StreamExt}; +use mediaproc::{ + CommandReply, ExecuteCommandError, FeatureExtractionError, FileMap, MediaProcessor, +}; +use tarpc::context; +use tarpc::server::Channel; + +mod command_server; +mod dinov2; +mod io; +mod signal; + +#[derive(Parser, Debug)] +#[command(version, about = "RPC Media Processor Server", long_about = None)] +struct Arguments { + /// Socket address to bind to, like 127.0.0.1:1500 + server_addr: SocketAddr, + + /// DINOv2 with registers base model to load. + model_path: String, +} + +#[derive(Clone)] +struct MediaProcessorServer(Arc); + +impl MediaProcessor for MediaProcessorServer { + async fn execute_command( + self, + _: context::Context, + program: String, + arguments: Vec, + file_map: FileMap, + ) -> Result<(CommandReply, FileMap), ExecuteCommandError> { + command_server::execute_command(program, arguments, file_map).await + } + + async fn get_features( + self, + _: context::Context, + image: Vec, + ) -> Result, FeatureExtractionError> { + self.0.extract(&image) + } +} + +fn main() { + env_logger::init(); + + let args = Arguments::parse(); + let executor = Executor::new(&args.model_path).expect("failed to load Torch JIT model"); + let executor = Arc::new(executor); + + serve(&args, executor); +} + +async fn spawn(fut: impl Future + Send + 'static) { + tokio::spawn(fut); +} + +#[tokio::main] +async fn serve(args: &Arguments, executor: Arc) { + signal::install_handlers(); + + let codec = tarpc::tokio_serde::formats::Bincode::default; + let mut listener = tarpc::serde_transport::tcp::listen(args.server_addr, codec) + .await + .unwrap(); + + listener.config_mut().max_frame_length(usize::MAX); + listener + // Ignore accept errors. + .filter_map(|r| future::ready(r.ok())) + .map(tarpc::server::BaseChannel::with_defaults) + .map(move |channel| { + let server = MediaProcessorServer(executor.clone()); + + tokio::spawn(channel.execute(server.serve()).for_each(spawn)); + }) + .collect() + .await +} diff --git a/native/philomena/mediaproc_server/src/signal.rs b/native/philomena/mediaproc_server/src/signal.rs new file mode 100644 index 000000000..68135d362 --- /dev/null +++ b/native/philomena/mediaproc_server/src/signal.rs @@ -0,0 +1,15 @@ +use tokio::signal::unix::{signal, SignalKind}; + +pub fn install_handlers() { + let mut sigterm = signal(SignalKind::terminate()).unwrap(); + let mut sigint = signal(SignalKind::interrupt()).unwrap(); + + tokio::spawn(async move { + tokio::select! { + _ = sigterm.recv() => tracing::debug!("Received SIGTERM"), + _ = sigint.recv() => tracing::debug!("Received SIGINT"), + }; + + std::process::exit(1); + }); +} diff --git a/native/philomena/src/asyncnif.rs b/native/philomena/src/asyncnif.rs new file mode 100644 index 000000000..faf845844 --- /dev/null +++ b/native/philomena/src/asyncnif.rs @@ -0,0 +1,26 @@ +use once_cell::sync::Lazy; +use rustler::{Atom, Env, OwnedEnv, Term}; +use std::future::Future; +use std::marker::Send; +use tokio::runtime::Runtime; + +static RUNTIME: Lazy = Lazy::new(|| Runtime::new().unwrap()); + +pub fn call_async(caller_env: Env, fut: F, w: W) -> Atom +where + F: Future + Send + 'static, + W: for<'a> FnOnce(Env<'a>, T) -> Term<'a>, + W: Send + 'static, +{ + let pid = caller_env.pid(); + + RUNTIME.spawn(async move { + let output = fut.await; + let owned_env = OwnedEnv::new(); + owned_env.run(move |env| { + let _ = env.send(&pid, w(env, output)); + }); + }); + + rustler::types::atom::ok() +} diff --git a/native/philomena/src/lib.rs b/native/philomena/src/lib.rs index 6f7f72b2b..0de54fe5b 100644 --- a/native/philomena/src/lib.rs +++ b/native/philomena/src/lib.rs @@ -1,10 +1,12 @@ use jemallocator::Jemalloc; -use rustler::{Atom, Binary}; +use rustler::{Atom, Binary, Env}; use std::collections::HashMap; +mod asyncnif; mod camo; mod domains; mod markdown; +mod remote; #[cfg(test)] mod tests; mod zip; @@ -35,6 +37,25 @@ fn camo_image_url(input: &str) -> String { camo::image_url_careful(input) } +// Remote NIF wrappers. + +#[rustler::nif] +fn async_get_features(env: Env, server_addr: String, path: String) -> Atom { + let fut = remote::get_features(server_addr, path); + asyncnif::call_async(env, fut, remote::get_features_reply_with_env) +} + +#[rustler::nif] +fn async_process_command( + env: Env, + server_addr: String, + program: String, + arguments: Vec, +) -> Atom { + let fut = remote::process_command(server_addr, program, arguments); + asyncnif::call_async(env, fut, remote::command_reply_with_env) +} + // Zip NIF wrappers. #[rustler::nif] diff --git a/native/philomena/src/remote.rs b/native/philomena/src/remote.rs new file mode 100644 index 000000000..1d5533db4 --- /dev/null +++ b/native/philomena/src/remote.rs @@ -0,0 +1,100 @@ +use mediaproc::client; +use mediaproc::{CommandReply, FeatureExtractionError}; +use rustler::{atoms, Encoder, Env, NifStruct, OwnedBinary, Term}; + +atoms! { + nil, + ok, + error, + get_features_reply, + process_command_reply, +} + +#[derive(NifStruct)] +#[module = "Elixir.Philomena.Native.CommandReply"] +struct CommandReply_<'a> { + stdout: Term<'a>, + stderr: Term<'a>, + status: u8, +} + +fn binary_or_nil<'a>(env: Env<'a>, data: Vec) -> Term<'a> { + match OwnedBinary::new(data.len()) { + Some(mut binary) => { + binary.copy_from_slice(&data); + binary.release(env).to_term(env) + } + None => nil().to_term(env), + } +} + +pub async fn process_command( + server_addr: String, + program: String, + arguments: Vec, +) -> CommandReply { + let client = match client::connect_to_socket_server(&server_addr).await { + Some(client) => client, + None => { + return CommandReply { + stdout: vec![], + stderr: "failed to connect to server".into(), + status: 255, + } + } + }; + + let ctx = client::context_with_1_hour_deadline(); + match client::execute_command(&client, program, arguments, ctx).await { + Ok(reply) => reply, + Err(err) => CommandReply { + stdout: vec![], + stderr: format!("failed to execute command: {err:?}").into(), + status: 255, + }, + } +} + +pub async fn get_features( + server_addr: String, + path: String, +) -> Result, FeatureExtractionError> { + let client = match client::connect_to_socket_server(&server_addr).await { + Some(client) => client, + None => return Err(FeatureExtractionError::ConnectionError), + }; + + let image = std::fs::read(path).map_err(|_| FeatureExtractionError::LocalFilesystemError)?; + let ctx = client::context_with_10_second_deadline(); + + client + .get_features(ctx, image) + .await + .map_err(|_| FeatureExtractionError::ConnectionError)? +} + +/// Converts the response into a {:process_command_reply, %CommandReply{...}} +/// message which gets sent back to the caller. +pub fn command_reply_with_env<'a>(env: Env<'a>, r: CommandReply) -> Term<'a> { + ( + process_command_reply(), + CommandReply_ { + stdout: binary_or_nil(env, r.stdout), + stderr: binary_or_nil(env, r.stderr), + status: r.status, + }, + ) + .encode(env) +} + +/// Converts the response into a {:get_features_reply, {:ok, [0.1, ..., 0.1]}} +/// message which gets sent back to the caller. +pub fn get_features_reply_with_env<'a>( + env: Env<'a>, + r: Result, FeatureExtractionError>, +) -> Term<'a> { + match r { + Ok(features) => (get_features_reply(), (ok(), features)).encode(env), + Err(e) => (get_features_reply(), (error(), format!("{e:?}"))).encode(env), + } +} diff --git a/priv/repo/migrations/20250109155442_create_image_vectors.exs b/priv/repo/migrations/20250109155442_create_image_vectors.exs new file mode 100644 index 000000000..251b86b3b --- /dev/null +++ b/priv/repo/migrations/20250109155442_create_image_vectors.exs @@ -0,0 +1,14 @@ +defmodule Philomena.Repo.Migrations.CreateImageVectors do + use Ecto.Migration + + def change do + # NB: this is normalized, the float array is not divisible + create table(:image_vectors) do + add :image_id, references(:images, on_delete: :delete_all), null: false + add :type, :string, null: false + add :features, {:array, :float}, null: false + end + + create unique_index(:image_vectors, [:image_id, :type]) + end +end diff --git a/priv/repo/seeds_development.exs b/priv/repo/seeds_development.exs index 47bcd1e7e..0d2c56457 100644 --- a/priv/repo/seeds_development.exs +++ b/priv/repo/seeds_development.exs @@ -46,7 +46,7 @@ request_attributes = [ IO.puts "---- Generating images" for image_def <- resources["remote_images"] do - file = Briefly.create!() + file = Briefly.create!(extname: ".png") now = DateTime.utc_now() |> DateTime.to_unix(:microsecond) IO.puts "Fetching #{image_def["url"]} ..." diff --git a/priv/repo/structure.sql b/priv/repo/structure.sql index e449fbf49..9d3df6165 100644 --- a/priv/repo/structure.sql +++ b/priv/repo/structure.sql @@ -2,12 +2,13 @@ -- PostgreSQL database dump -- --- Dumped from database version 16.4 --- Dumped by pg_dump version 16.6 +-- Dumped from database version 17.2 +-- Dumped by pg_dump version 17.2 SET statement_timeout = 0; SET lock_timeout = 0; SET idle_in_transaction_session_timeout = 0; +SET transaction_timeout = 0; SET client_encoding = 'UTF8'; SET standard_conforming_strings = on; SELECT pg_catalog.set_config('search_path', '', false); @@ -959,6 +960,37 @@ CREATE TABLE public.image_taggings ( ); +-- +-- Name: image_vectors; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.image_vectors ( + id bigint NOT NULL, + image_id bigint NOT NULL, + type character varying(255) NOT NULL, + features double precision[] NOT NULL +); + + +-- +-- Name: image_vectors_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.image_vectors_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: image_vectors_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.image_vectors_id_seq OWNED BY public.image_vectors.id; + + -- -- Name: image_votes; Type: TABLE; Schema: public; Owner: - -- @@ -2365,6 +2397,13 @@ ALTER TABLE ONLY public.image_features ALTER COLUMN id SET DEFAULT nextval('publ ALTER TABLE ONLY public.image_intensities ALTER COLUMN id SET DEFAULT nextval('public.image_intensities_id_seq'::regclass); +-- +-- Name: image_vectors id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.image_vectors ALTER COLUMN id SET DEFAULT nextval('public.image_vectors_id_seq'::regclass); + + -- -- Name: images id; Type: DEFAULT; Schema: public; Owner: - -- @@ -2727,6 +2766,14 @@ ALTER TABLE ONLY public.image_intensities ADD CONSTRAINT image_intensities_pkey PRIMARY KEY (id); +-- +-- Name: image_vectors image_vectors_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.image_vectors + ADD CONSTRAINT image_vectors_pkey PRIMARY KEY (id); + + -- -- Name: images images_pkey; Type: CONSTRAINT; Schema: public; Owner: - -- @@ -3192,6 +3239,13 @@ CREATE UNIQUE INDEX image_tag_locks_image_id_tag_id_index ON public.image_tag_lo CREATE INDEX image_tag_locks_tag_id_index ON public.image_tag_locks USING btree (tag_id); +-- +-- Name: image_vectors_image_id_type_index; Type: INDEX; Schema: public; Owner: - +-- + +CREATE UNIQUE INDEX image_vectors_image_id_type_index ON public.image_vectors USING btree (image_id, type); + + -- -- Name: images_hidden_from_users_approved_index; Type: INDEX; Schema: public; Owner: - -- @@ -5381,6 +5435,14 @@ ALTER TABLE ONLY public.image_tag_locks ADD CONSTRAINT image_tag_locks_tag_id_fkey FOREIGN KEY (tag_id) REFERENCES public.tags(id) ON DELETE CASCADE; +-- +-- Name: image_vectors image_vectors_image_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.image_vectors + ADD CONSTRAINT image_vectors_image_id_fkey FOREIGN KEY (image_id) REFERENCES public.images(id) ON DELETE CASCADE; + + -- -- Name: moderation_logs moderation_logs_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - -- @@ -5448,3 +5510,4 @@ INSERT INTO public."schema_migrations" (version) VALUES (20220321173359); INSERT INTO public."schema_migrations" (version) VALUES (20240723122759); INSERT INTO public."schema_migrations" (version) VALUES (20240728191353); INSERT INTO public."schema_migrations" (version) VALUES (20241216165826); +INSERT INTO public."schema_migrations" (version) VALUES (20250109155442);