|
1 | 1 | """SQLite-vec embeddings database.""" |
2 | 2 |
|
3 | 3 | import datetime |
| 4 | +import io |
4 | 5 | import logging |
5 | 6 | import os |
6 | 7 | import threading |
7 | 8 | import time |
8 | 9 |
|
9 | 10 | from numpy import ndarray |
| 11 | +from PIL import Image |
10 | 12 | from playhouse.shortcuts import model_to_dict |
11 | 13 |
|
12 | 14 | from frigate.comms.inter_process import InterProcessRequestor |
@@ -199,27 +201,44 @@ def batch_embed_thumbnail( |
199 | 201 | @param: upsert If embedding should be upserted into vec DB |
200 | 202 | """ |
201 | 203 | start = datetime.datetime.now().timestamp() |
202 | | - ids = list(event_thumbs.keys()) |
203 | | - embeddings = self.vision_embedding(list(event_thumbs.values())) |
| 204 | + valid_ids = [] |
| 205 | + valid_thumbs = [] |
| 206 | + for eid, thumb in event_thumbs.items(): |
| 207 | + try: |
| 208 | + img = Image.open(io.BytesIO(thumb)) |
| 209 | + img.verify() # Will raise if corrupt |
| 210 | + valid_ids.append(eid) |
| 211 | + valid_thumbs.append(thumb) |
| 212 | + except Exception as e: |
| 213 | + logger.warning( |
| 214 | + f"Embeddings reindexing: Skipping corrupt thumbnail for event {eid}: {e}" |
| 215 | + ) |
| 216 | + |
| 217 | + if not valid_thumbs: |
| 218 | + logger.warning( |
| 219 | + "Embeddings reindexing: No valid thumbnails to embed in this batch." |
| 220 | + ) |
| 221 | + return [] |
| 222 | + |
| 223 | + embeddings = self.vision_embedding(valid_thumbs) |
204 | 224 |
|
205 | 225 | if upsert: |
206 | 226 | items = [] |
207 | | - |
208 | | - for i in range(len(ids)): |
209 | | - items.append(ids[i]) |
| 227 | + for i in range(len(valid_ids)): |
| 228 | + items.append(valid_ids[i]) |
210 | 229 | items.append(serialize(embeddings[i])) |
211 | 230 | self.image_eps.update() |
212 | 231 |
|
213 | 232 | self.db.execute_sql( |
214 | 233 | """ |
215 | 234 | INSERT OR REPLACE INTO vec_thumbnails(id, thumbnail_embedding) |
216 | 235 | VALUES {} |
217 | | - """.format(", ".join(["(?, ?)"] * len(ids))), |
| 236 | + """.format(", ".join(["(?, ?)"] * len(valid_ids))), |
218 | 237 | items, |
219 | 238 | ) |
220 | 239 |
|
221 | 240 | duration = datetime.datetime.now().timestamp() - start |
222 | | - self.text_inference_speed.update(duration / len(ids)) |
| 241 | + self.text_inference_speed.update(duration / len(valid_ids)) |
223 | 242 |
|
224 | 243 | return embeddings |
225 | 244 |
|
|
0 commit comments