Skip to content

Commit fd96cd5

Browse files
authored
Verify images before adding to batch for embedding (#18885)
1 parent e0c1fea commit fd96cd5

File tree

1 file changed

+26
-7
lines changed

1 file changed

+26
-7
lines changed

frigate/embeddings/embeddings.py

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
"""SQLite-vec embeddings database."""
22

33
import datetime
4+
import io
45
import logging
56
import os
67
import threading
78
import time
89

910
from numpy import ndarray
11+
from PIL import Image
1012
from playhouse.shortcuts import model_to_dict
1113

1214
from frigate.comms.inter_process import InterProcessRequestor
@@ -199,27 +201,44 @@ def batch_embed_thumbnail(
199201
@param: upsert If embedding should be upserted into vec DB
200202
"""
201203
start = datetime.datetime.now().timestamp()
202-
ids = list(event_thumbs.keys())
203-
embeddings = self.vision_embedding(list(event_thumbs.values()))
204+
valid_ids = []
205+
valid_thumbs = []
206+
for eid, thumb in event_thumbs.items():
207+
try:
208+
img = Image.open(io.BytesIO(thumb))
209+
img.verify() # Will raise if corrupt
210+
valid_ids.append(eid)
211+
valid_thumbs.append(thumb)
212+
except Exception as e:
213+
logger.warning(
214+
f"Embeddings reindexing: Skipping corrupt thumbnail for event {eid}: {e}"
215+
)
216+
217+
if not valid_thumbs:
218+
logger.warning(
219+
"Embeddings reindexing: No valid thumbnails to embed in this batch."
220+
)
221+
return []
222+
223+
embeddings = self.vision_embedding(valid_thumbs)
204224

205225
if upsert:
206226
items = []
207-
208-
for i in range(len(ids)):
209-
items.append(ids[i])
227+
for i in range(len(valid_ids)):
228+
items.append(valid_ids[i])
210229
items.append(serialize(embeddings[i]))
211230
self.image_eps.update()
212231

213232
self.db.execute_sql(
214233
"""
215234
INSERT OR REPLACE INTO vec_thumbnails(id, thumbnail_embedding)
216235
VALUES {}
217-
""".format(", ".join(["(?, ?)"] * len(ids))),
236+
""".format(", ".join(["(?, ?)"] * len(valid_ids))),
218237
items,
219238
)
220239

221240
duration = datetime.datetime.now().timestamp() - start
222-
self.text_inference_speed.update(duration / len(ids))
241+
self.text_inference_speed.update(duration / len(valid_ids))
223242

224243
return embeddings
225244

0 commit comments

Comments
 (0)