diff --git a/modules/chat.py b/modules/chat.py
index 42c0d46d5a..459f099cbd 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -7,10 +7,12 @@
import re
import shutil
import time
+import mimetypes
from datetime import datetime
from functools import partial
from pathlib import Path
+import filetype
import gradio as gr
import yaml
from jinja2.ext import loopcontrols
@@ -25,7 +27,7 @@
convert_to_markdown,
make_thumbnail
)
-from modules.image_utils import open_image_safely
+from modules.image_utils import is_mime_type_vision_supported, open_image_safely
from modules.logging_colors import logger
from modules.text_generation import (
generate_reply,
@@ -239,7 +241,7 @@ def generate_chat_prompt(user_input, state, **kwargs):
image_refs = ""
for attachment in metadata[user_key]["attachments"]:
- if attachment.get("type") == "image":
+ if is_mime_type_vision_supported(attachment.get("type")):
# Add image reference for multimodal models
image_refs += "<__media__>"
elif state.get('include_past_attachments', True):
@@ -280,7 +282,7 @@ def generate_chat_prompt(user_input, state, **kwargs):
image_refs = ""
for attachment in metadata[user_key]["attachments"]:
- if attachment.get("type") == "image":
+ if is_mime_type_vision_supported(attachment.get("type")):
image_refs += "<__media__>"
else:
filename = attachment.get("name", "file")
@@ -590,51 +592,48 @@ def add_message_attachment(history, row_idx, file_path, is_user=True):
# Get file info using pathlib
path = Path(file_path)
filename = path.name
- file_extension = path.suffix.lower()
+
+ # Get MIME type from path
+ mime_type: str | None
+ mime_type, _ = mimetypes.guess_file_type(path)
+
+ # Get MIME type from file
+ if mime_type is None:
+ mime_type = filetype.guess_mime(path)
try:
- # Handle image files
- if file_extension in ['.jpg', '.jpeg', '.png', '.webp', '.bmp', '.gif']:
+ if is_mime_type_vision_supported(mime_type):
+ # Handle image files
# Convert image to base64
with open(path, 'rb') as f:
image_data = base64.b64encode(f.read()).decode('utf-8')
- # Determine MIME type from extension
- mime_type_map = {
- '.jpg': 'image/jpeg',
- '.jpeg': 'image/jpeg',
- '.png': 'image/png',
- '.webp': 'image/webp',
- '.bmp': 'image/bmp',
- '.gif': 'image/gif'
- }
- mime_type = mime_type_map.get(file_extension, 'image/jpeg')
-
# Format as data URL
data_url = f"data:{mime_type};base64,{image_data}"
# Generate unique image ID
- image_id = len([att for att in history['metadata'][key]["attachments"] if att.get("type") == "image"]) + 1
+ image_id = len([att for att in history['metadata'][key]["attachments"] if is_mime_type_vision_supported(att.get("type"))]) + 1
attachment = {
"name": filename,
- "type": "image",
+ "type": mime_type,
"image_data": data_url,
"image_id": image_id,
}
- elif file_extension == '.pdf':
+ elif mime_type == 'application/pdf':
# Process PDF file
content = extract_pdf_text(path)
attachment = {
"name": filename,
- "type": "application/pdf",
+ "type": mime_type,
"content": content,
}
- elif file_extension == '.docx':
+ elif mime_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
+ # Process .docx file
content = extract_docx_text(path)
attachment = {
"name": filename,
- "type": "application/docx",
+ "type": mime_type,
"content": content,
}
else:
@@ -644,7 +643,7 @@ def add_message_attachment(history, row_idx, file_path, is_user=True):
attachment = {
"name": filename,
- "type": "text/plain",
+ "type": mime_type or "text/plain",
"content": content,
}
@@ -858,7 +857,7 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
user_key = f"user_{i}"
if user_key in output['metadata'] and "attachments" in output['metadata'][user_key]:
for attachment in output['metadata'][user_key]["attachments"]:
- if attachment.get("type") == "image":
+ if is_mime_type_vision_supported(attachment.get("type")):
all_image_attachments.append(attachment)
# Add all collected image attachments to state for the generation
diff --git a/modules/html_generator.py b/modules/html_generator.py
index 312b66ad07..8125147582 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -10,6 +10,7 @@
from PIL import Image, ImageOps
from modules import shared
+from modules.image_utils import is_mime_type_vision_supported
from modules.sane_markdown_lists import SaneListExtension
from modules.utils import get_available_chat_styles
@@ -462,12 +463,13 @@ def format_message_attachments(history, role, index):
attachments_html = '
'
for attachment in attachments:
name = html.escape(attachment["name"])
+ mime_type = attachment.get("type")
- if attachment.get("type") == "image":
+ if is_mime_type_vision_supported(mime_type):
image_data = attachment.get("image_data", "")
attachments_html += (
f'
'
- f'

'
+ f'

'
f'
{name}
'
f'
'
)
diff --git a/modules/image_utils.py b/modules/image_utils.py
index d2809fef36..5f90dc1119 100644
--- a/modules/image_utils.py
+++ b/modules/image_utils.py
@@ -97,7 +97,7 @@ def convert_image_attachments_to_pil(image_attachments: List[dict]) -> List[Imag
"""Convert webui image_attachments format to PIL Images."""
pil_images = []
for attachment in image_attachments:
- if attachment.get('type') == 'image' and 'image_data' in attachment:
+ if is_mime_type_vision_supported(attachment.get('type')) and 'image_data' in attachment:
try:
image = decode_base64_image(attachment['image_data'])
if image.mode != 'RGB':
@@ -116,3 +116,42 @@ def convert_openai_messages_to_images(messages: List[dict]) -> List[Image.Image]
_, images = process_message_content(message['content'])
all_images.extend(images)
return all_images
+
+
+def is_mime_type_vision_supported(mime_type: str) -> bool:
+ return mime_type in {
+ 'image/jpeg',
+ 'image/png',
+ 'image/webp',
+ 'image/bmp',
+ 'image/gif',
+ 'image/tiff',
+ 'image/avif',
+ # Uncommon pillow readable mime types
+ 'image/x-dds',
+ 'image/dds',
+ 'image/x-eps',
+ 'image/x-icns',
+ 'image/x-icon',
+ 'vnd.microsoft.icon',
+ 'image/jp2',
+ 'image/x-jp2-codestream',
+ 'image/jpx',
+ 'image/vnd.zbrush.pcx',
+ 'image/x-portable-pixmap',
+ 'image/qoi',
+ 'image/x-qoi',
+ 'image/x-sgi',
+ 'image/x-tga',
+ 'image/x-xbitmap',
+ 'image/x-win-bitmap',
+ 'image/fits',
+ 'image/vnd.fpx',
+ 'image/x-fpx',
+ 'image/x-photo-cd',
+ 'image/vnd.adobe.photoshop',
+ 'image/x-sun-raster',
+ 'image/emf',
+ 'image/wmf',
+ 'image/x-xpixmap',
+ }
diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt
index dd74ed52e9..63b05e980f 100644
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@@ -6,6 +6,7 @@ datasets
diffusers==0.36.*
einops
fastapi==0.112.4
+filetype==1.2.0
flash-linear-attention==0.4.*
html2text==2025.4.15
huggingface-hub==0.36.0
diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt
index 3475c161ac..1375acfd0a 100644
--- a/requirements/full/requirements_amd.txt
+++ b/requirements/full/requirements_amd.txt
@@ -5,6 +5,7 @@ datasets
diffusers==0.36.*
einops
fastapi==0.112.4
+filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt
index 05077a32a6..67b4c5ce10 100644
--- a/requirements/full/requirements_amd_noavx2.txt
+++ b/requirements/full/requirements_amd_noavx2.txt
@@ -5,6 +5,7 @@ datasets
diffusers==0.36.*
einops
fastapi==0.112.4
+filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt
index f591fa251e..cf86336ad1 100644
--- a/requirements/full/requirements_apple_intel.txt
+++ b/requirements/full/requirements_apple_intel.txt
@@ -5,6 +5,7 @@ datasets
diffusers==0.36.*
einops
fastapi==0.112.4
+filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt
index 6ff4753d4b..85dda581c2 100644
--- a/requirements/full/requirements_apple_silicon.txt
+++ b/requirements/full/requirements_apple_silicon.txt
@@ -5,6 +5,7 @@ datasets
diffusers==0.36.*
einops
fastapi==0.112.4
+filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt
index bd0776daea..9375e4cf7d 100644
--- a/requirements/full/requirements_cpu_only.txt
+++ b/requirements/full/requirements_cpu_only.txt
@@ -5,6 +5,7 @@ datasets
diffusers==0.36.*
einops
fastapi==0.112.4
+filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt
index 39a06cdbe0..291ec8ca7d 100644
--- a/requirements/full/requirements_cpu_only_noavx2.txt
+++ b/requirements/full/requirements_cpu_only_noavx2.txt
@@ -5,6 +5,7 @@ datasets
diffusers==0.36.*
einops
fastapi==0.112.4
+filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt
index 3ff17704be..e664fabff0 100644
--- a/requirements/full/requirements_noavx2.txt
+++ b/requirements/full/requirements_noavx2.txt
@@ -6,6 +6,7 @@ datasets
diffusers==0.36.*
einops
fastapi==0.112.4
+filetype==1.2.0
flash-linear-attention==0.4.*
html2text==2025.4.15
huggingface-hub==0.36.0
diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt
index dc740dc86f..fe1fc16bdf 100644
--- a/requirements/full/requirements_nowheels.txt
+++ b/requirements/full/requirements_nowheels.txt
@@ -5,6 +5,7 @@ datasets
diffusers==0.36.*
einops
fastapi==0.112.4
+filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt
index 2f6bf51142..f6b5d08c16 100644
--- a/requirements/portable/requirements.txt
+++ b/requirements/portable/requirements.txt
@@ -1,5 +1,6 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
+filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
diff --git a/requirements/portable/requirements_amd.txt b/requirements/portable/requirements_amd.txt
index 209610ceb9..c657f61e57 100644
--- a/requirements/portable/requirements_amd.txt
+++ b/requirements/portable/requirements_amd.txt
@@ -1,5 +1,6 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
+filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
diff --git a/requirements/portable/requirements_amd_noavx2.txt b/requirements/portable/requirements_amd_noavx2.txt
index 29a1f72b04..abf9ef73f0 100644
--- a/requirements/portable/requirements_amd_noavx2.txt
+++ b/requirements/portable/requirements_amd_noavx2.txt
@@ -1,5 +1,6 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
+filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt
index 8812ac39f5..cf3c653710 100644
--- a/requirements/portable/requirements_apple_intel.txt
+++ b/requirements/portable/requirements_apple_intel.txt
@@ -1,5 +1,6 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
+filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt
index c627cfd456..1d8d5a6936 100644
--- a/requirements/portable/requirements_apple_silicon.txt
+++ b/requirements/portable/requirements_apple_silicon.txt
@@ -1,5 +1,6 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
+filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt
index 5ca660adfc..3f6859971b 100644
--- a/requirements/portable/requirements_cpu_only.txt
+++ b/requirements/portable/requirements_cpu_only.txt
@@ -1,5 +1,6 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
+filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt
index 5bb282eb18..8c9848e2bd 100644
--- a/requirements/portable/requirements_cpu_only_noavx2.txt
+++ b/requirements/portable/requirements_cpu_only_noavx2.txt
@@ -1,5 +1,6 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
+filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt
index ecce4a2f7d..b07e2c885e 100644
--- a/requirements/portable/requirements_noavx2.txt
+++ b/requirements/portable/requirements_noavx2.txt
@@ -1,5 +1,6 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
+filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
diff --git a/requirements/portable/requirements_nowheels.txt b/requirements/portable/requirements_nowheels.txt
index c9831985e5..6a05bef68f 100644
--- a/requirements/portable/requirements_nowheels.txt
+++ b/requirements/portable/requirements_nowheels.txt
@@ -1,5 +1,6 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
+filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt
index 4b3adddc3d..21c57e3c46 100644
--- a/requirements/portable/requirements_vulkan.txt
+++ b/requirements/portable/requirements_vulkan.txt
@@ -1,5 +1,6 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
+filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt
index 8d6acdd78e..f800adbf16 100644
--- a/requirements/portable/requirements_vulkan_noavx2.txt
+++ b/requirements/portable/requirements_vulkan_noavx2.txt
@@ -1,5 +1,6 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
+filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6