diff --git a/.github/workflows/docker-image-build.yml b/.github/workflows/docker-image-build.yml index 39a55868..76d95a5f 100644 --- a/.github/workflows/docker-image-build.yml +++ b/.github/workflows/docker-image-build.yml @@ -1,6 +1,7 @@ name: Build & Push Multi-Arch Docker Image on: + workflow_dispatch: push: branches: - master @@ -20,10 +21,10 @@ jobs: - name: Checkout code uses: actions/checkout@v4 - - name: Set up JDK 11 + - name: Set up JDK 25 uses: actions/setup-java@v4 with: - java-version: '11' + java-version: '25' distribution: 'temurin' cache: maven - name: Build with Maven @@ -62,4 +63,4 @@ jobs: platforms: linux/amd64,linux/arm64 push: true tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} \ No newline at end of file + labels: ${{ steps.meta.outputs.labels }} diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index d0a00f1f..b6d28709 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -12,10 +12,10 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - name: Set up JDK 11 + - name: Set up JDK 25 uses: actions/setup-java@v4 with: - java-version: '11' + java-version: '25' distribution: 'temurin' cache: maven - name: Build with Maven @@ -28,10 +28,10 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - name: Set up JDK 11 + - name: Set up JDK 25 uses: actions/setup-java@v4 with: - java-version: '11' + java-version: '25' distribution: 'temurin' cache: maven - name: Run Maven tests diff --git a/.gitignore b/.gitignore index ea7ce18f..29321624 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,8 @@ test_output .classpath *.bak .vscode/ +.idea/ .project +CLAUDE.local.md +/docker/compose/.env +/docker/compose/certs/ diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..1400289c --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,83 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Overview + +TIMAAT is a Jakarta EE web application for collaborative video annotation, packaged as a WAR deployed to Tomcat 10. It is the successor to FIPOP (the persistence unit and database are still named `FIPOP`). Backend is Java + Jersey; frontend is server-served HTML + RequireJS modules in `src/main/webapp/js`. + +## Build & Run + +- `mvn package` — builds `target/TIMAAT.war`. Deploy by copying into Tomcat's `webapps/` directory; app is then reachable at `http://localhost:8080/TIMAAT`. +- `mvn test` — runs the JUnit 5 suite. Surefire is configured with `-javaagent:.../mockito-core.jar` (see pom.xml:67) because Mockito 5 needs the agent attached on Java 21+. Don't drop that argLine. +- Run a single test: `mvn test -Dtest=FfmpegAudioEngineTest` or `mvn test -Dtest=FfmpegAudioEngineTest#methodName`. +- Java toolchain: `pom.xml` sets `maven.compiler.release=25`, but `.github/workflows/maven.yml` still pins JDK 11 — the CI config is stale relative to the source level. Local builds need JDK 25. +- Annotation processing: EclipseLink's `CanonicalModelProcessor` runs at compile time and generates JPA static metamodel classes into `target/generated-sources/annotations`. + +## Runtime configuration + +The app reads `~/.timaat/timaat.properties` (Linux/macOS) or `%HOMEDRIVE%%HOMEPATH%\.timaat\timaat.properties` (Windows). Template lives at `src/main/resources/timaat-default.properties`. Required keys: `database.url`, `database.user`, `database.password`, `storage.location` (filesystem root for media), `app.ffmpeg.location`, `app.imagemagick.location`. Async task pool sizing: `app.task.coreParallelCount`, `app.task.maxParallelCount`, `app.task.queueSize`. + +For Docker deployments these are surfaced as env vars (`DATABASE_USER`, `DATABASE_HOST`, `APP_TASK_MAXPARALLELCOUNT`, …) — see `docs/docker.md`. `docker/timaat-entrypoint.sh` writes the properties file from env on container start. + +## Database + +MySQL/MariaDB schema `FIPOP`, collation `utf8mb4_general_ci`. + +- Fresh install: load `src/main/resources/database/fipop.sql`. +- Upgrade: apply `src/main/resources/database/db_update.sql` (it is idempotent across versioned blocks). Schema version lives in the `db_version` table; bump it for every schema change and document the change in `docs/database.md`. +- JPA persistence unit `FIPOP-JPA` is declared in `src/main/resources/META-INF/persistence.xml`. Every entity class in `de.bitgilde.TIMAAT.model.FIPOP` must be listed there — EclipseLink does not auto-discover. When adding an entity, add a `` line. + +## Architecture + +### Request lifecycle + +`de.bitgilde.TIMAAT.TIMAATApp` (`@ApplicationPath("timaatapp")`) is the Jersey `Application`. It explicitly enumerates resource and filter classes in `addRestResourceClasses` — new endpoints must be added there or they will 404. Filters in `rest/filter/` (`AuthenticationFilter`, `CORSFilter`, `RangeResponseFilter`) wrap every request; auth is JWT (jjwt) with Argon2 password hashing. + +REST endpoints live in `de.bitgilde.TIMAAT.rest.endpoint.Endpoint*` (one class per domain area: Medium, Annotation, Analysis, Actor, Music, …). They delegate to storage components rather than holding business logic themselves. + +### Dependency injection + +Jersey HK2 is the DI container. All wiring lives in **one** place: `de.bitgilde.TIMAAT.di.binder.TIMAATBinder`. Singletons (file storages, engines, task service, the entity-storage layer) are bound there. When adding a new service or storage, register it in `TIMAATBinder.configure()` — endpoints get instances via `@Inject` only because of bindings here. Note the multi-contract bindings (e.g. `bind(DbTaskStorage.class).to(DbTaskStorage.class).to(TaskStorage.class).to(TaskStateUpdater.class)`) used so one implementation satisfies multiple registry lookups. + +`TIMAATBinder` also constructs the `EntityManagerFactory` (it has to run before the persistence layer is touched) and exposes it as a binding. + +### Storage layering + +Three parallel storage concepts that are easy to confuse: + +- `storage/entity/**` — DB-backed entity storage classes (`MediumStorage`, `AnnotationStorage`, `TranscriptionStorage`, …). These are the JPA access layer for the FIPOP entities. +- `storage/file/**` — filesystem storage for binary blobs (`VideoFileStorage`, `AudioFileStorage`, `ImageFileStorage`, `TranscriptionFileStorage`, `TemporaryFileStorage`). They write under `storage.location` with the layout documented in `docs/fs-storage.md` (per-medium subdirectories keyed by medium ID). When that layout changes, add a migration script under `src/main/resources/scripts/fs-migration/`. +- `db/` — low-level JDBC helpers (`DbAccessComponent`) used when JPA is too heavy. + +The corresponding domain models are in `model/FIPOP/` (JPA entities, generated/maintained against the schema) and `rest/model/` (DTOs used over the wire). + +### Async task framework (`service/task/`) + +Long-running work (audio analysis, transcription preparation) goes through this framework rather than being run inline in a REST handler. + +- `api/Task` + `TaskType` + concrete `Task` subclasses describe a unit of work. +- `storage/TaskStorageRegistry` and `storage/TaskStateUpdaterRegistry` route a task to the right persistence handler by its `TaskType`. To add a new task type: define `XxxTask`, register its `TaskStorage` and `TaskStateUpdater` bindings in `TIMAATBinder`, and register a corresponding `TaskExecutor` in `execution/TaskExecutorFactory`. +- `TaskService` is the entry point — it persists the task, then hands it to `TaskExecutorService`. On startup it resumes any tasks left in a non-terminal state. +- `TaskExecutorService` owns a thread pool sized by the `app.task.*` properties. + +### Transcription (`service/transcription/`) + +`TranscriptionService` orchestrates speech-to-text via the external `studio.nko-dev:speech-to-text-service-client-uni-erfurt` client. It plugs into the task framework as a `TaskStateUpdater` (see binding in `TIMAATBinder.java:87`). Transcription file output goes through `TranscriptionFileStorage`; DB rows are persisted via `TranscriptionStorage`. + +### Media processing (`processing/`) + +`processing/audio/FfmpegAudioEngine` and `processing/video/FfmpegVideoEngine` shell out to `ffmpeg` (path from `app.ffmpeg.location`). Audio analysis produces waveform + frequency files written via the binary readers/writers in `processing/audio/io/` — these are the only components with meaningful unit-test coverage today (`src/test/java/de/bitgilde/TIMAAT/audio/io/`). + +### Realtime channels + +- `notification/NotificationWebSocket` — WebSocket endpoint for user-targeted push notifications; subscriptions tracked in `UserSubscriptions`. +- `sse/EntityUpdateEventService` + `rest/endpoint/EndpointEntityUpdateEvents` — Server-Sent Events stream so the UI can react to entity changes from other sessions. + +### Publication output (`publication/`) + +Generates standalone publishable bundles from annotations using template files in `src/main/resources/*.template` (registered to the WAR by the war-plugin config in `pom.xml`). `PublicationAuthenticationFilter` + `PublicationServlet` serve the protected publication endpoints; they are added to the Jersey resource set alongside the REST endpoints. + +## Frontend + +`src/main/webapp/` is plain HTML/CSS/JS, no build step. JS is organized as RequireJS modules under `js/` with vendor libs vendored under `third-party/` (jQuery, Bootstrap, datatables, leaflet, wavesurferjs, dropzone, summernote, select2, …). The frontend talks to the backend over the `/timaatapp/*` REST API and the SSE + WebSocket channels above. \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 1f71c965..c88b30f1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM tomcat:10.1.47-jre11-temurin-noble +FROM tomcat:10.1.47-jre25-temurin-noble LABEL org.opencontainers.image.title="TIMAAT Web Application" LABEL org.opencontainers.image.description="Java-based web application running on Tomcat, providing TIMAAT services." LABEL org.opencontainers.image.version="${TIMAAT_VERSION}" diff --git a/docker/compose/.env.example b/docker/compose/.env.example new file mode 100644 index 00000000..cfd8e788 --- /dev/null +++ b/docker/compose/.env.example @@ -0,0 +1,18 @@ +# Copy this file to ".env" (same directory as docker-compose-prod.yaml) and adjust. +# docker compose interpolates these values when constructing the stack. + +# Public hostname the reverse proxy serves (used in the Traefik router rule). +TIMAAT_DOMAIN=timaat.example.org + +# Host path of the Docker socket Traefik reads container labels from. +# Native Linux daemon: leave unset (defaults to /var/run/docker.sock). +# Docker Desktop (macOS/Windows): point to the per-user socket, e.g. +# DOCKER_SOCKET=/Users//.docker/run/docker.sock +DOCKER_SOCKET=/var/run/docker.sock + +# Host directory that contains the externally provided TLS certificate + key. +# It is bind-mounted read-only into the reverse-proxy container at /certs. +# Expected files inside this directory (rename in traefik/dynamic/tls.yaml if different): +# - timaat.crt (full certificate chain, PEM) +# - timaat.key (private key, PEM) +TIMAAT_TLS_CERT_DIR=/etc/ssl/timaat \ No newline at end of file diff --git a/docker/compose/docker-compose-prod.yaml b/docker/compose/docker-compose-prod.yaml new file mode 100644 index 00000000..2102f482 --- /dev/null +++ b/docker/compose/docker-compose-prod.yaml @@ -0,0 +1,104 @@ +name: timaat +services: + db: + image: mariadb:11.4 + ports: + - "3306:3306" + networks: + - timaat-backend + command: --lower_case_table_names=1 + environment: + MYSQL_ROOT_PASSWORD_FILE: /run/secrets/database-root-password + volumes: + - db-data:/var/lib/mysql + secrets: + - database-root-password + healthcheck: + test: [ "CMD-SHELL", "mariadb-admin ping -u root --password=$(cat /run/secrets/database-root-password)" ] + interval: 5s + timeout: 3s + retries: 10 + start_period: 10s + webservice: + image: christophguentherunierfurt/timaat:latest + pull_policy: always + networks: + - timaat-backend + # No host port binding anymore - the webservice is only reachable through the + # Traefik reverse proxy on the internal timaat-backend network. + expose: + - "8080" + environment: + DATABASE_USER: root + DATABASE_PASSWORD_FILE: /run/secrets/database-root-password + DATABASE_HOST: db + DATABASE_PORT: 3306 + volumes: + - file-storage:/var/lib/timaat/storage + secrets: + - database-root-password + depends_on: + db: + condition: service_healthy + labels: + - "traefik.enable=true" + # Route incoming HTTPS traffic for the configured host to this service. + - "traefik.http.routers.timaat.rule=Host(`${TIMAAT_DOMAIN}`)" + - "traefik.http.routers.timaat.entrypoints=websecure" + - "traefik.http.routers.timaat.tls=true" + # Forward to the container's internal HTTP port 8080. + - "traefik.http.services.timaat.loadbalancer.server.port=8080" + - "traefik.http.routers.timaat.middlewares=owasp-secure-headers@file" + # Redirect requests to the host root ("/") to the application context + # path ("/TIMAAT/"). The more specific Path(`/`) rule wins over the bare + # Host rule above by Traefik's automatic priority (longer rule = higher). + - "traefik.http.routers.timaat-root.rule=Host(`${TIMAAT_DOMAIN}`) && Path(`/`)" + - "traefik.http.routers.timaat-root.entrypoints=websecure" + - "traefik.http.routers.timaat-root.tls=true" + - "traefik.http.routers.timaat-root.service=timaat" + - "traefik.http.routers.timaat-root.middlewares=redirect-root-to-timaat@file" + reverse-proxy: + image: traefik:v3.7.1 + networks: + - timaat-backend + ports: + # Only HTTPS is exposed to the host; the proxy listens on 443 internally + # and is published on the host's port 443. + - "443:443" + command: + # Do NOT expose the API/dashboard. + - "--api.dashboard=false" + - "--api.insecure=false" + # Single HTTPS entrypoint on port 443. + - "--entrypoints.websecure.address=:443" + # Docker provider: read router/service/middleware definitions from labels, + # but only for containers that explicitly opt in (traefik.enable=true). + - "--providers.docker=true" + - "--providers.docker.exposedbydefault=false" + - "--providers.docker.network=timaat-backend" + # File provider: TLS certificate material is configured via a mounted + # dynamic configuration file. + - "--providers.file.directory=/etc/traefik/dynamic" + - "--providers.file.watch=true" + # Keep logs lean and do not advertise version information anywhere. + - "--log.level=INFO" + - "--accesslog=false" + volumes: + - "${DOCKER_SOCKET:-/var/run/docker.sock}:/var/run/docker.sock:ro" + # Dynamic TLS configuration (references the certificate paths inside the container). + - ./traefik/dynamic:/etc/traefik/dynamic:ro + # Externally provided certificate + key, mounted from a host path supplied + # via the TIMAAT_TLS_CERT_DIR variable (see .env.example). + - "${TIMAAT_TLS_CERT_DIR}:/certs:ro" + depends_on: + - webservice +networks: + timaat-backend: + driver: bridge + attachable: false +volumes: + db-data: + file-storage: +secrets: + database-root-password: + file: passwords/database-root-password.txt diff --git a/docker/compose/traefik/dynamic/middlewares.yaml b/docker/compose/traefik/dynamic/middlewares.yaml new file mode 100644 index 00000000..ff7603f0 --- /dev/null +++ b/docker/compose/traefik/dynamic/middlewares.yaml @@ -0,0 +1,37 @@ +# Traefik dynamic configuration: HTTP middlewares. +# +# OWASP recommended secure response headers. +# https://github.com/OWASP/www-project-secure-headers/blob/master/ci/headers_add.json +# +# Referenced from the webservice router as "owasp-secure-headers@file". +http: + middlewares: + owasp-secure-headers: + headers: + customResponseHeaders: + Cache-Control: "no-store, max-age=0" + Clear-Site-Data: "\"cache\",\"cookies\",\"storage\"" + # 'unsafe-inline' on script-src/style-src is required because the app + # ships inline