|
17 | 17 | get_reference_by_file_path, |
18 | 18 | get_reference_tags, |
19 | 19 | get_or_create_reference, |
| 20 | + list_references_by_asset_id, |
20 | 21 | reference_exists, |
21 | 22 | remove_missing_tag_for_asset_id, |
22 | 23 | set_reference_metadata, |
| 24 | + set_reference_system_metadata, |
23 | 25 | set_reference_tags, |
24 | 26 | update_asset_hash_and_mime, |
25 | 27 | upsert_asset, |
|
29 | 31 | from app.assets.helpers import get_utc_now, normalize_tags |
30 | 32 | from app.assets.services.bulk_ingest import batch_insert_seed_assets |
31 | 33 | from app.assets.services.file_utils import get_size_and_mtime_ns |
| 34 | +from app.assets.services.image_dimensions import extract_image_dimensions |
32 | 35 | from app.assets.services.path_utils import ( |
33 | 36 | compute_relative_filename, |
34 | 37 | get_name_and_tags_from_asset_path, |
@@ -118,6 +121,14 @@ def _ingest_file_from_path( |
118 | 121 | user_metadata=user_metadata, |
119 | 122 | ) |
120 | 123 |
|
| 124 | + _maybe_store_image_dimensions( |
| 125 | + session, |
| 126 | + reference_id=reference_id, |
| 127 | + file_path=locator, |
| 128 | + mime_type=mime_type, |
| 129 | + current_system_metadata=ref.system_metadata, |
| 130 | + ) |
| 131 | + |
121 | 132 | try: |
122 | 133 | remove_missing_tag_for_asset_id(session, asset_id=asset.id) |
123 | 134 | except Exception: |
@@ -288,6 +299,13 @@ def _register_existing_asset( |
288 | 299 | user_metadata=new_meta, |
289 | 300 | ) |
290 | 301 |
|
| 302 | + _backfill_image_dimensions_from_siblings( |
| 303 | + session, |
| 304 | + asset_id=asset.id, |
| 305 | + new_reference_id=ref.id, |
| 306 | + current_system_metadata=ref.system_metadata, |
| 307 | + ) |
| 308 | + |
291 | 309 | if tags is not None: |
292 | 310 | set_reference_tags( |
293 | 311 | session, |
@@ -334,6 +352,87 @@ def _update_metadata_with_filename( |
334 | 352 | ) |
335 | 353 |
|
336 | 354 |
|
| 355 | +_IMAGE_DIMENSION_KEYS = ("kind", "width", "height") |
| 356 | + |
| 357 | + |
| 358 | +def _maybe_store_image_dimensions( |
| 359 | + session: Session, |
| 360 | + reference_id: str, |
| 361 | + file_path: str, |
| 362 | + mime_type: str | None, |
| 363 | + current_system_metadata: dict | None, |
| 364 | +) -> None: |
| 365 | + """Populate ``kind``/``width``/``height`` on system_metadata for image refs. |
| 366 | +
|
| 367 | + Non-image MIME types are a no-op. Pre-existing keys (e.g. enricher-written |
| 368 | + safetensors metadata, download provenance) are preserved by merge. |
| 369 | + """ |
| 370 | + if not mime_type or not mime_type.startswith("image/"): |
| 371 | + return |
| 372 | + |
| 373 | + dims = extract_image_dimensions(file_path, mime_type=mime_type) |
| 374 | + if not dims: |
| 375 | + return |
| 376 | + |
| 377 | + current = current_system_metadata or {} |
| 378 | + merged = dict(current) |
| 379 | + merged.update(dims) |
| 380 | + if merged != current: |
| 381 | + set_reference_system_metadata( |
| 382 | + session, |
| 383 | + reference_id=reference_id, |
| 384 | + system_metadata=merged, |
| 385 | + ) |
| 386 | + |
| 387 | + |
| 388 | +def _backfill_image_dimensions_from_siblings( |
| 389 | + session: Session, |
| 390 | + asset_id: str, |
| 391 | + new_reference_id: str, |
| 392 | + current_system_metadata: dict | None, |
| 393 | +) -> None: |
| 394 | + """Copy image dimension keys from any sibling reference of the same asset. |
| 395 | +
|
| 396 | + The from-hash path doesn't read the file bytes, so dimensions can't be |
| 397 | + extracted there directly. When another reference of the same asset already |
| 398 | + carries image dimensions, copy them onto the new reference so consumers |
| 399 | + see consistent metadata regardless of how the asset was registered. |
| 400 | +
|
| 401 | + Best-effort: missing siblings, non-image siblings, or absent dimension |
| 402 | + keys leave the target reference unchanged. |
| 403 | + """ |
| 404 | + current = current_system_metadata or {} |
| 405 | + if current.get("kind") == "image" and "width" in current and "height" in current: |
| 406 | + return |
| 407 | + |
| 408 | + for sibling in list_references_by_asset_id(session, asset_id): |
| 409 | + if sibling.id == new_reference_id: |
| 410 | + continue |
| 411 | + meta = sibling.system_metadata or {} |
| 412 | + if meta.get("kind") != "image": |
| 413 | + continue |
| 414 | + width = meta.get("width") |
| 415 | + height = meta.get("height") |
| 416 | + if ( |
| 417 | + type(width) is not int |
| 418 | + or type(height) is not int |
| 419 | + or width <= 0 |
| 420 | + or height <= 0 |
| 421 | + ): |
| 422 | + continue |
| 423 | + merged = dict(current) |
| 424 | + merged["kind"] = "image" |
| 425 | + merged["width"] = width |
| 426 | + merged["height"] = height |
| 427 | + if merged != current: |
| 428 | + set_reference_system_metadata( |
| 429 | + session, |
| 430 | + reference_id=new_reference_id, |
| 431 | + system_metadata=merged, |
| 432 | + ) |
| 433 | + return |
| 434 | + |
| 435 | + |
337 | 436 | def _sanitize_filename(name: str | None, fallback: str) -> str: |
338 | 437 | n = os.path.basename((name or "").strip() or fallback) |
339 | 438 | return n if n else fallback |
|
0 commit comments