From c506523211ea5aa13463f24e1338fd6a507e1f09 Mon Sep 17 00:00:00 2001 From: tt5416 <1776822920@qq.com> Date: Sat, 21 Mar 2026 21:25:47 +0800 Subject: [PATCH 1/2] Add diffusion demo app for photo-to-style generation --- diffusion/photo2style/app.py | 506 +++++++++++++++++++++++++++++++++++ 1 file changed, 506 insertions(+) create mode 100644 diffusion/photo2style/app.py diff --git a/diffusion/photo2style/app.py b/diffusion/photo2style/app.py new file mode 100644 index 0000000..883a390 --- /dev/null +++ b/diffusion/photo2style/app.py @@ -0,0 +1,506 @@ +# -*- coding: utf-8 -*- +""" +Photo2Style Demo (MindSpore 2.7.0 + MindNLP 0.5.1) +""" + +# ---- Compatibility shim (MindTorch vs HF Diffusers) ---- +try: + import mindtorch.autograd.function as _mt_func + if not hasattr(_mt_func, "FunctionCtx"): + class FunctionCtx: + pass + _mt_func.FunctionCtx = FunctionCtx +except Exception as _e: + print(f"[WARN] mindtorch FunctionCtx shim skipped: {_e}") +# --------------------------------------------------------- + +import os +import sys +import traceback +from functools import lru_cache +from typing import Dict, Tuple + +import numpy as np +from PIL import Image, ImageDraw, ImageEnhance, ImageFilter, ImageOps, ImageChops + +import mindspore as ms +import mindnlp # IMPORTANT: import mindnlp BEFORE diffusers (it patches HF stack) +from diffusers import DDIMScheduler, StableDiffusionImg2ImgPipeline +import gradio as gr + + +# ----------------------------- +# 0. 版本与设备(Ascend-only) +# ----------------------------- +EXPECTED_MS = "2.7.0" +EXPECTED_MNLP = "0.5.1" +EXPECTED_PY_MIN = (3, 10) +EXPECTED_PY_MAX = (3, 12) +MS_DTYPE = ms.float16 + +# 关键:Diffusers 的 device_map 只支持 "cuda"/"balanced" +# 在 MindNLP + Ascend 场景下,应使用 "cuda" 让 MindNLP 接管并映射到 NPU +DEVICE_MAP_STRATEGY = "cuda" + + +def _version_prefix(v: str) -> str: + return ".".join(str(v).split(".")[:3]) + + +def _set_context() -> None: + ms.set_context(mode=ms.PYNATIVE_MODE) + ms.set_device("Ascend", int(os.getenv("DEVICE_ID", "0"))) + + +def _check_versions() -> None: + device_target = ms.get_context("device_target") + if device_target != "Ascend": + raise RuntimeError(f"Ascend-only demo, but device_target={device_target}") + + py_v = sys.version_info[:3] + if not (EXPECTED_PY_MIN <= py_v < EXPECTED_PY_MAX): + print( + f"[WARN] Python version is {py_v[0]}.{py_v[1]}.{py_v[2]}, " + f"recommended range is >= {EXPECTED_PY_MIN[0]}.{EXPECTED_PY_MIN[1]} and < {EXPECTED_PY_MAX[0]}.{EXPECTED_PY_MAX[1]}." + ) + + ms_v = _version_prefix(getattr(ms, "__version__", "")) + mnlp_v = _version_prefix(getattr(mindnlp, "__version__", "")) + + if ms_v != EXPECTED_MS: + print(f"[WARN] MindSpore version is {ms_v}, expected {EXPECTED_MS}.") + if mnlp_v != EXPECTED_MNLP: + print(f"[WARN] MindNLP version is {mnlp_v}, expected {EXPECTED_MNLP}.") + + +def _smoke_test_ascend() -> None: + a = ms.Tensor(np.random.randn(1024, 1024).astype(np.float16)) + b = ms.Tensor(np.random.randn(1024, 1024).astype(np.float16)) + _ = ms.ops.matmul(a, b) + print("[OK] Ascend smoke test done.") + + +_set_context() +_check_versions() +_smoke_test_ascend() + + +# ----------------------------- +# 1. 风格模板(只保留更可交付的风格) +# ----------------------------- +STYLE_PRESETS: Dict[str, Dict[str, object]] = { + "吉卜力(Ghibli)": { + "model_id": "nitrosocke/Ghibli-Diffusion", + "prompt": ( + "portrait of the same exact person, same identity, same facial proportions, same jawline, same hairstyle, " + "ghibli style, studio ghibli anime film still, hand-painted anime illustration, clean lineart, soft cel shading, " + "natural expression, upper body, masterpiece" + ), + "negative": ( + "different person, changed face, aged face, chubby face, child face, huge anime eyes, lowres, blurry, " + "bad face, deformed face, disfigured, mutated, cross-eyed, extra eyes, bad anatomy, watermark, text, logo" + ), + "identity_strength_cap": 0.48, + "default_strength": 0.38, + "style_face_blend": 0.18, + "line_preserve": 0.22, + "detail_preserve": 0.28, + }, + "卡通插画(Cartoon)": { + "model_id": "lavaman131/cartoonify", + "prompt": ( + "portrait of the same exact person, same identity, same facial proportions, same jawline, same hairstyle, " + "disney pixar style, polished cartoon illustration, animated feature film character portrait, clean cartoon lineart, " + "simplified facial planes, soft cel shading, readable silhouette, stylized but recognizable face, upper body, masterpiece" + ), + "negative": ( + "different person, changed face, exaggerated face, huge eyes, tiny chin, malformed mouth, over-smoothed face, waxy skin, lowres, blurry, " + "deformed, bad anatomy, watermark, text, logo" + ), + "identity_strength_cap": 0.56, + "default_strength": 0.50, + "style_face_blend": 0.32, + "line_preserve": 0.24, + "detail_preserve": 0.28, + "global_cartoon_boost": 0.34, + "face_cartoon_boost": 0.26, + }, +} +DEFAULT_STYLE = "吉卜力(Ghibli)" + + +# ----------------------------- +# 2. Pipeline 缓存(按模型 id 复用) +# ----------------------------- +@lru_cache(maxsize=2) +def load_pipe(model_id: str) -> StableDiffusionImg2ImgPipeline: + pipe = StableDiffusionImg2ImgPipeline.from_pretrained( + model_id, + ms_dtype=MS_DTYPE, + device_map=DEVICE_MAP_STRATEGY, + ) + pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config) + + try: + pipe.enable_attention_slicing() + except Exception: + pass + try: + pipe.set_progress_bar_config(disable=True) + except Exception: + pass + try: + pipe.safety_checker = None + pipe.requires_safety_checker = False + except Exception: + pass + + print(f"[OK] pipeline loaded: {model_id}", flush=True) + return pipe + + +# ----------------------------- +# 3. 图像预处理与身份增强 +# ----------------------------- +def _prep_image(img: Image.Image, size: int = 512) -> Image.Image: + img = ImageOps.exif_transpose(img).convert("RGB") + img = ImageEnhance.Sharpness(img).enhance(1.15) + img = ImageEnhance.Contrast(img).enhance(1.04) + return ImageOps.fit( + img, + (size, size), + method=Image.Resampling.LANCZOS, + centering=(0.5, 0.28), # 往上偏一点,更照顾脸部区域 + ) + + +def _estimate_face_box(size: int) -> Tuple[int, int, int, int]: + """ + 更紧的人脸经验框:主要覆盖额头、眼鼻口和少量下巴, + 尽量少吃到西装、肩部和背景,避免脸部融合区被过度软化。 + """ + x0 = int(size * 0.31) + y0 = int(size * 0.11) + x1 = int(size * 0.69) + y1 = int(size * 0.50) + return x0, y0, x1, y1 + + +def _make_soft_face_mask(width: int, height: int, blur_radius: int) -> Image.Image: + mask = Image.new("L", (width, height), 0) + draw = ImageDraw.Draw(mask) + pad_w = int(width * 0.08) + pad_h = int(height * 0.08) + inner = (pad_w, pad_h, width - pad_w, height - pad_h) + draw.rounded_rectangle(inner, radius=max(8, min(width, height) // 9), fill=220) + core_pad_w = int(width * 0.16) + core_pad_h = int(height * 0.15) + core = (core_pad_w, core_pad_h, width - core_pad_w, height - core_pad_h) + draw.ellipse(core, fill=255) + return mask.filter(ImageFilter.GaussianBlur(radius=blur_radius)) + + +def _to_pil_image(obj) -> Image.Image: + if isinstance(obj, Image.Image): + return obj.convert("RGB") + if isinstance(obj, ms.Tensor): + arr = obj.asnumpy() + arr = np.clip(arr, 0, 255).astype(np.uint8) + return Image.fromarray(arr).convert("RGB") + if isinstance(obj, np.ndarray): + arr = np.clip(obj, 0, 255).astype(np.uint8) + return Image.fromarray(arr).convert("RGB") + raise RuntimeError(f"Unsupported output type: {type(obj)}") + + +def _color_transfer_keep_structure(src_face: Image.Image, ref_style_face: Image.Image) -> Image.Image: + """ + 用风格图的颜色统计迁移到原脸上: + 保留原脸的几何结构/五官位置,只借用风格脸的颜色与明暗分布。 + """ + src = np.asarray(src_face.convert("RGB")).astype(np.float32) + ref = np.asarray(ref_style_face.convert("RGB")).astype(np.float32) + out = np.empty_like(src) + for c in range(3): + s = src[..., c] + r = ref[..., c] + s_mean, s_std = float(s.mean()), float(s.std()) + 1e-6 + r_mean, r_std = float(r.mean()), float(r.std()) + 1e-6 + out[..., c] = (s - s_mean) * (r_std / s_std) + r_mean + out = np.clip(out, 0, 255).astype(np.uint8) + return Image.fromarray(out, mode="RGB") + + +def _stylize_original_face(face_img: Image.Image, style_name: str) -> Image.Image: + img = face_img.convert("RGB") + if style_name == "吉卜力(Ghibli)": + img = img.filter(ImageFilter.SMOOTH) + img = ImageOps.posterize(img, 6) + img = ImageEnhance.Color(img).enhance(1.04) + img = ImageEnhance.Contrast(img).enhance(1.02) + img = ImageEnhance.Sharpness(img).enhance(1.06) + else: + img = img.filter(ImageFilter.MedianFilter(size=3)) + img = img.filter(ImageFilter.SMOOTH_MORE) + img = ImageOps.posterize(img, 5) + img = ImageEnhance.Color(img).enhance(1.10) + img = ImageEnhance.Contrast(img).enhance(1.10) + img = ImageEnhance.Sharpness(img).enhance(1.18) + return img + + +def _cartoon_postprocess(img: Image.Image, amount: float = 0.3) -> Image.Image: + if amount <= 0: + return img.convert("RGB") + base = img.convert("RGB") + smooth = base.filter(ImageFilter.MedianFilter(size=3)).filter(ImageFilter.SMOOTH_MORE) + flat = ImageOps.posterize(smooth, 5) + flat = ImageEnhance.Color(flat).enhance(1.08) + flat = ImageEnhance.Contrast(flat).enhance(1.10) + + edge = base.convert("L").filter(ImageFilter.FIND_EDGES).filter(ImageFilter.GaussianBlur(radius=0.7)) + edge = ImageOps.autocontrast(edge) + edge = edge.point(lambda p: max(36, 255 - int(p * 1.55))) + edge_rgb = Image.merge("RGB", (edge, edge, edge)) + cartoon = ImageChops.multiply(flat, edge_rgb) + cartoon = ImageEnhance.Sharpness(cartoon).enhance(1.10) + return Image.blend(base, cartoon, float(amount)) + + +def _soft_line_preserve(base_face: Image.Image, orig_face: Image.Image, amount: float) -> Image.Image: + if amount <= 0: + return base_face + edge = orig_face.convert("L").filter(ImageFilter.FIND_EDGES).filter(ImageFilter.GaussianBlur(radius=1.0)) + edge = ImageOps.autocontrast(edge) + edge = edge.point(lambda p: int(255 - p * 0.42)) + edge_rgb = Image.merge("RGB", (edge, edge, edge)) + lined = ImageChops.multiply(base_face, edge_rgb) + return Image.blend(base_face, lined, amount) + + +def _detail_restore(base_face: Image.Image, orig_face: Image.Image, amount: float) -> Image.Image: + if amount <= 0: + return base_face + fine = orig_face.filter(ImageFilter.UnsharpMask(radius=1.2, percent=135, threshold=2)) + high = ImageChops.subtract(fine, fine.filter(ImageFilter.GaussianBlur(radius=1.6))) + high = ImageOps.autocontrast(high) + high = ImageEnhance.Contrast(high).enhance(0.82) + detailed = ImageChops.overlay(base_face, high) + return Image.blend(base_face, detailed, float(amount)) + + +def _luma_match_keep_edges(src_face: Image.Image, ref_face: Image.Image) -> Image.Image: + src = src_face.convert("RGB") + ref_y = ref_face.convert("YCbCr").split()[0] + src_ycbcr = list(src.convert("YCbCr").split()) + src_ycbcr[0] = Image.blend(src_ycbcr[0], ref_y, 0.35) + return Image.merge("YCbCr", tuple(src_ycbcr)).convert("RGB") + + +def _make_identity_locked_face_patch( + original_face: Image.Image, + global_face: Image.Image, + style_name: str, + style_face_blend: float, + line_preserve: float, + detail_preserve: float, + face_cartoon_boost: float = 0.0, +) -> Image.Image: + """ + 不再对脸做第二次 diffusion 生成,避免“重新捏脸”。 + 直接用原脸结构 + 风格脸颜色/明暗 + 轻量卡通化滤波, + 这样能同时满足“更像本人”与“脸部仍然有风格感”。 + """ + recolored = _color_transfer_keep_structure(original_face, global_face) + recolored = _luma_match_keep_edges(recolored, global_face) + stylized_orig = _stylize_original_face(recolored, style_name) + fused = Image.blend(stylized_orig, global_face, float(style_face_blend)) + fused = _detail_restore(fused, original_face, float(detail_preserve)) + fused = _soft_line_preserve(fused, original_face, float(line_preserve)) + if style_name == "吉卜力(Ghibli)": + fused = ImageEnhance.Color(fused).enhance(1.02) + fused = ImageEnhance.Sharpness(fused).enhance(1.10) + else: + fused = _cartoon_postprocess(fused, amount=float(face_cartoon_boost)) + fused = _soft_line_preserve(fused, original_face, float(max(0.16, line_preserve - 0.04))) + fused = ImageEnhance.Color(fused).enhance(1.05) + fused = ImageEnhance.Contrast(fused).enhance(1.08) + fused = ImageEnhance.Sharpness(fused).enhance(1.22) + return fused + + +def _run_pipe( + + pipe: StableDiffusionImg2ImgPipeline, + prompt: str, + negative_prompt: str, + image: Image.Image, + strength: float, + steps: int, + guidance_scale: float, +): + out = pipe( + prompt=prompt, + negative_prompt=negative_prompt, + image=image, + strength=float(strength), + num_inference_steps=int(steps), + guidance_scale=float(guidance_scale), + ) + if hasattr(out, "images") and out.images: + return out.images[0] + if isinstance(out, (list, tuple)) and len(out) > 0: + return out[0] + return out + + +# ----------------------------- +# 4. 生成逻辑 +# ----------------------------- +def generate( + image: Image.Image, + style_name: str, + strength: float = 0.42, + steps: int = 25, + guidance_scale: float = 7.5, + seed: int = 0, + size: int = 512, + preserve_identity: bool = True, +) -> Image.Image: + if image is None: + raise ValueError("请先上传一张图片") + + if style_name not in STYLE_PRESETS: + raise ValueError(f"不支持的风格:{style_name}") + + preset = STYLE_PRESETS[style_name] + pipe = load_pipe(str(preset["model_id"])) + base_image = _prep_image(image, size=size) + + if seed and int(seed) > 0: + ms.set_seed(int(seed)) + np.random.seed(int(seed)) + + print( + f"[ENTER] generate | device_target={ms.get_context('device_target')} | " + f"style={style_name} | preserve_identity={preserve_identity}", + flush=True, + ) + + if preserve_identity: + global_strength = min(float(strength), float(preset["identity_strength_cap"])) + else: + global_strength = float(strength) + + # 第一阶段:全图风格化 + global_img = _to_pil_image( + _run_pipe( + pipe=pipe, + prompt=str(preset["prompt"]), + negative_prompt=str(preset["negative"]), + image=base_image, + strength=global_strength, + steps=int(steps), + guidance_scale=float(guidance_scale), + ) + ) + + if style_name == "卡通插画(Cartoon)": + global_img = _cartoon_postprocess(global_img, amount=float(preset.get("global_cartoon_boost", 0.0))) + + # 第二阶段:结构锁脸融合(不再二次 diffusion 捏脸) + if preserve_identity: + face_box = _estimate_face_box(size) + original_face = base_image.crop(face_box) + global_face = global_img.crop(face_box) + face_patch = _make_identity_locked_face_patch( + original_face=original_face, + global_face=global_face, + style_name=style_name, + style_face_blend=float(preset["style_face_blend"]), + line_preserve=float(preset["line_preserve"]), + detail_preserve=float(preset.get("detail_preserve", 0.25)), + face_cartoon_boost=float(preset.get("face_cartoon_boost", 0.0)), + ) + + patch_w = face_box[2] - face_box[0] + patch_h = face_box[3] - face_box[1] + face_patch = face_patch.resize((patch_w, patch_h), Image.Resampling.LANCZOS) + blur_radius = max(3, size // 128) + face_mask = _make_soft_face_mask(patch_w, patch_h, blur_radius) + + fused = global_img.copy() + fused.paste(face_patch, (face_box[0], face_box[1]), mask=face_mask) + global_img = fused + + return global_img + + +# ----------------------------- +# 5. Gradio UI +# ----------------------------- +DESCRIPTION = """ +# 真人照片一键风格化(MindSpore 2.7.0 + MindNLP 0.5.1) +建议: +- `人物特征保留增强`:默认开启 +- `strength`:0.30 ~ 0.52 更像本人;数值越大,风格越强、但越容易不像本人 +- 卡通插画建议从 `0.46 ~ 0.56` 起步,吉卜力建议从 `0.34 ~ 0.44` 起步 +- `steps`:20 ~ 35 +- 半身人像建议优先用 `640` 或 `768`,脸部会更清楚 +""" + + +def _ui_generate(img, style, strength, steps, guidance, seed, size, preserve_identity): + try: + out_img = generate( + image=img, + style_name=style, + strength=float(strength), + steps=int(steps), + guidance_scale=float(guidance), + seed=int(seed), + size=int(size), + preserve_identity=bool(preserve_identity), + ) + if out_img is None: + raise RuntimeError("generate() returned None") + return out_img + except Exception as e: + traceback.print_exc() + raise gr.Error(str(e)) + + +with gr.Blocks() as demo: + gr.Markdown(DESCRIPTION) + + with gr.Row(): + inp = gr.Image(type="pil", label="上传真人照片") + out = gr.Image(type="pil", label="生成结果") + + with gr.Row(): + style = gr.Dropdown(list(STYLE_PRESETS.keys()), value=DEFAULT_STYLE, label="风格") + size = gr.Dropdown([512, 640, 768], value=640, label="输出尺寸(越大越慢)") + + with gr.Row(): + preserve_identity = gr.Checkbox(value=True, label="人物特征保留增强(推荐开启)") + strength = gr.Slider(0.20, 0.75, value=0.42, step=0.01, label="strength(风格强度)") + steps = gr.Slider(10, 50, value=25, step=1, label="steps(推理步数)") + + with gr.Row(): + guidance = gr.Slider(1.0, 12.0, value=7.5, step=0.5, label="guidance_scale(CFG)") + seed = gr.Number(value=0, precision=0, label="seed(0=随机)") + + btn = gr.Button("生成", variant="primary") + btn.click( + _ui_generate, + inputs=[inp, style, strength, steps, guidance, seed, size, preserve_identity], + outputs=[out], + ) + + +if __name__ == "__main__": + demo.queue(max_size=20).launch( + server_name="0.0.0.0", + server_port=int(os.getenv("PORT", "7860")), + show_error=True, + ) From 8487cd0c78b32701a9fa4c4836d4a7fe5c832405 Mon Sep 17 00:00:00 2001 From: tt5416 <1776822920@qq.com> Date: Thu, 26 Mar 2026 19:46:54 +0800 Subject: [PATCH 2/2] Add new diffusion project --- diffusion/README.md | 2 +- diffusion/photo2style/app.ipynb | 765 ++++++++++++++++++++++++++++++++ diffusion/photo2style/app.py | 506 --------------------- 3 files changed, 766 insertions(+), 507 deletions(-) create mode 100644 diffusion/photo2style/app.ipynb delete mode 100644 diffusion/photo2style/app.py diff --git a/diffusion/README.md b/diffusion/README.md index 5de0f50..6d71540 100644 --- a/diffusion/README.md +++ b/diffusion/README.md @@ -6,7 +6,7 @@ This directory contains ready-to-use Diffusion application notebooks built with | No. | Model | Description | | :-- | :---- | :------------------------------ | -| 1 | / | This section is empty for now — feel free to contribute your first application! | +| 1 | https://github.com/tt5416/applications/tree/dev/diffusion/photo2style | Includes notebooks for convert real photos into images of a specified artistic style. | ## Contributing New Diffusion Applications diff --git a/diffusion/photo2style/app.ipynb b/diffusion/photo2style/app.ipynb new file mode 100644 index 0000000..c2a2c47 --- /dev/null +++ b/diffusion/photo2style/app.ipynb @@ -0,0 +1,765 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": {}, + "source": [ + "# 基于MindSpore和MindSpore NLP的 Photo2Style 真人照片风格化应用\n", + "\n", + "## 案例介绍\n", + "\n", + "本案例将真人照片转换为指定艺术风格图像,当前支持 **吉卜力(Ghibli)** 和 **卡通插画(Cartoon)** 两种风格。\n", + "整体方案基于 **MindSpore 2.7.0 + MindSpore NLP 0.5.1 + Diffusers** 构建,并面向 **Ascend NPU** 环境运行。\n", + "\n", + "与普通风格迁移不同,本案例重点关注两点:\n", + "\n", + "1. **风格化效果**:让输出图像具备更明显的插画或动画质感;\n", + "2. **人物身份保持**:尽量保留原始人物的面部比例、五官结构和发型特征,避免“风格化后不像本人”。" + ] + }, + { + "cell_type": "markdown", + "id": "1", + "metadata": {}, + "source": [ + "## 方案简介\n", + "\n", + "本应用的主要流程如下:\n", + "\n", + "- **环境检查**:校验 Python、MindSpore、MindSpore NLP 版本,并确认当前设备为 Ascend;\n", + "- **模型加载**:按风格加载不同的 `StableDiffusionImg2ImgPipeline`;\n", + "- **图像预处理**:对输入人像进行尺寸规范化、锐化和对比度增强;\n", + "- **全图风格化**:先对整张图进行一次 img2img 风格转换;\n", + "- **身份保持增强**:对面部区域进行局部结构锁定与细节恢复,减少“重新捏脸”问题;\n", + "- **交互界面**:通过 Gradio 提供上传图片、选择风格和调节参数的 Web UI。" + ] + }, + { + "cell_type": "markdown", + "id": "2", + "metadata": {}, + "source": [ + "## 环境准备\n", + "\n", + "推荐运行环境如下:\n", + "\n", + "| MindSpore | MindSpore NLP | Gradio |\n", + "| :-------- | :------ | :----- |\n", + "| 2.7.0 | 0.5.1 | 最新稳定版 |" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [ + "# 检查当前环境中的关键依赖版本\n", + "!pip show mindspore\n", + "!pip show mindnlp\n", + "!pip show gradio" + ] + }, + { + "cell_type": "markdown", + "id": "4", + "metadata": {}, + "source": [ + "如果你在昇思相关平台、ModelArts、启智社区或其他 Notebook 环境中运行本案例,\n", + "可以按需取消下方安装命令的注释,完成依赖准备。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [], + "source": [ + "# 如需重新安装环境,可取消注释后执行\n", + "# !pip uninstall mindspore -y\n", + "# !pip uninstall mindnlp -y\n", + "# !pip uninstall gradio -y\n", + "#\n", + "# !pip install mindspore==2.7.0 -i https://repo.mindspore.cn/pypi/simple\n", + "# !pip install mindnlp==0.5.1\n", + "# !pip install gradio" + ] + }, + { + "cell_type": "markdown", + "id": "6", + "metadata": {}, + "source": [ + "## 导入依赖与兼容处理\n", + "\n", + "先进行 `mindtorch` 兼容处理,再导入图像处理、MindSpore、MindSpore NLP、Diffusers 和 Gradio 等依赖。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": {}, + "outputs": [], + "source": [ + "# -*- coding: utf-8 -*-\n", + "\n", + "# ---- Compatibility shim (MindTorch vs HF Diffusers) ----\n", + "try:\n", + " import mindtorch.autograd.function as _mt_func\n", + " if not hasattr(_mt_func, \"FunctionCtx\"):\n", + " class FunctionCtx:\n", + " pass\n", + " _mt_func.FunctionCtx = FunctionCtx\n", + "except Exception as _e:\n", + " print(f\"[WARN] mindtorch FunctionCtx shim skipped: {_e}\")\n", + "# ---------------------------------------------------------\n", + "\n", + "import os\n", + "import sys\n", + "import traceback\n", + "from functools import lru_cache\n", + "from typing import Dict, Tuple\n", + "\n", + "import numpy as np\n", + "from PIL import Image, ImageDraw, ImageEnhance, ImageFilter, ImageOps, ImageChops\n", + "\n", + "import mindspore as ms\n", + "import mindnlp \n", + "from diffusers import DDIMScheduler, StableDiffusionImg2ImgPipeline\n", + "import gradio as gr" + ] + }, + { + "cell_type": "markdown", + "id": "8", + "metadata": {}, + "source": [ + "## 版本与设备检查\n", + "\n", + "这里定义版本要求、运行精度与设备映射策略,并通过简单的矩阵乘法确认 Ascend 环境可正常执行。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9", + "metadata": {}, + "outputs": [], + "source": [ + "EXPECTED_MS = \"2.7.0\"\n", + "EXPECTED_MNLP = \"0.5.1\"\n", + "EXPECTED_PY_MIN = (3, 10)\n", + "EXPECTED_PY_MAX = (3, 12)\n", + "MS_DTYPE = ms.float16\n", + "\n", + "# 关键:Diffusers 的 device_map 只支持 \"cuda\"/\"balanced\"\n", + "# 在 MindNLP + Ascend 场景下,应使用 \"cuda\" 让 MindNLP 接管并映射到 NPU\n", + "DEVICE_MAP_STRATEGY = \"cuda\"\n", + "\n", + "\n", + "def _version_prefix(v: str) -> str:\n", + " return \".\".join(str(v).split(\".\")[:3])\n", + "\n", + "\n", + "def _check_versions() -> None:\n", + " device_target = ms.get_context(\"device_target\")\n", + " print(\"current device_target =\", device_target)\n", + " if device_target != \"Ascend\":\n", + " raise RuntimeError(f\"Ascend-only demo, but device_target={device_target}\")\n", + "\n", + " py_v = sys.version_info[:3]\n", + " if not (EXPECTED_PY_MIN <= py_v < EXPECTED_PY_MAX):\n", + " print(\n", + " f\"[WARN] Python version is {py_v[0]}.{py_v[1]}.{py_v[2]}, \"\n", + " f\"recommended range is >= {EXPECTED_PY_MIN[0]}.{EXPECTED_PY_MIN[1]} and < {EXPECTED_PY_MAX[0]}.{EXPECTED_PY_MAX[1]}.\"\n", + " )\n", + "\n", + " ms_v = _version_prefix(getattr(ms, \"__version__\", \"\"))\n", + " mnlp_v = _version_prefix(getattr(mindnlp, \"__version__\", \"\"))\n", + "\n", + " if ms_v != EXPECTED_MS:\n", + " print(f\"[WARN] MindSpore version is {ms_v}, expected {EXPECTED_MS}.\")\n", + " if mnlp_v != EXPECTED_MNLP:\n", + " print(f\"[WARN] MindNLP version is {mnlp_v}, expected {EXPECTED_MNLP}.\")\n", + "\n", + "\n", + "def _smoke_test_ascend() -> None:\n", + " a = ms.Tensor(np.random.randn(1024, 1024).astype(np.float16))\n", + " b = ms.Tensor(np.random.randn(1024, 1024).astype(np.float16))\n", + " _ = ms.ops.matmul(a, b)\n", + " print(\"[OK] Ascend smoke test done.\")\n", + "\n", + "_check_versions()\n", + "_smoke_test_ascend()" + ] + }, + { + "cell_type": "markdown", + "id": "10", + "metadata": {}, + "source": [ + "## 风格模板配置\n", + "\n", + "为不同风格定义对应的模型、提示词、反向提示词以及身份保持相关超参数。\n", + "这样可以在不改动主流程的情况下快速扩展新的风格模板。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11", + "metadata": {}, + "outputs": [], + "source": [ + "STYLE_PRESETS: Dict[str, Dict[str, object]] = {\n", + " \"吉卜力(Ghibli)\": {\n", + " \"model_id\": \"nitrosocke/Ghibli-Diffusion\",\n", + " \"prompt\": (\n", + " \"portrait of the same exact person, same identity, same facial proportions, same jawline, same hairstyle, \"\n", + " \"ghibli style, studio ghibli anime film still, hand-painted anime illustration, clean lineart, soft cel shading, \"\n", + " \"natural expression, upper body, masterpiece\"\n", + " ),\n", + " \"negative\": (\n", + " \"different person, changed face, aged face, chubby face, child face, huge anime eyes, lowres, blurry, \"\n", + " \"bad face, deformed face, disfigured, mutated, cross-eyed, extra eyes, bad anatomy, watermark, text, logo\"\n", + " ),\n", + " \"identity_strength_cap\": 0.48,\n", + " \"default_strength\": 0.38,\n", + " \"style_face_blend\": 0.18,\n", + " \"line_preserve\": 0.22,\n", + " \"detail_preserve\": 0.28,\n", + " },\n", + " \"卡通插画(Cartoon)\": {\n", + " \"model_id\": \"lavaman131/cartoonify\",\n", + " \"prompt\": (\n", + " \"portrait of the same exact person, same identity, same facial proportions, same jawline, same hairstyle, \"\n", + " \"disney pixar style, polished cartoon illustration, animated feature film character portrait, clean cartoon lineart, \"\n", + " \"simplified facial planes, soft cel shading, readable silhouette, stylized but recognizable face, upper body, masterpiece\"\n", + " ),\n", + " \"negative\": (\n", + " \"different person, changed face, exaggerated face, huge eyes, tiny chin, malformed mouth, over-smoothed face, waxy skin, lowres, blurry, \"\n", + " \"deformed, bad anatomy, watermark, text, logo\"\n", + " ),\n", + " \"identity_strength_cap\": 0.56,\n", + " \"default_strength\": 0.50,\n", + " \"style_face_blend\": 0.32,\n", + " \"line_preserve\": 0.24,\n", + " \"detail_preserve\": 0.28,\n", + " \"global_cartoon_boost\": 0.34,\n", + " \"face_cartoon_boost\": 0.26,\n", + " },\n", + "}\n", + "\n", + "DEFAULT_STYLE = \"吉卜力(Ghibli)\" " + ] + }, + { + "cell_type": "markdown", + "id": "12", + "metadata": {}, + "source": [ + "## Pipeline 加载与缓存\n", + "\n", + "由于不同风格会对应不同模型,使用 `lru_cache` 对 pipeline 进行缓存,避免重复加载带来的额外时间和显存开销。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13", + "metadata": {}, + "outputs": [], + "source": [ + "@lru_cache(maxsize=2)\n", + "def load_pipe(model_id: str) -> StableDiffusionImg2ImgPipeline:\n", + " pipe = StableDiffusionImg2ImgPipeline.from_pretrained(\n", + " model_id,\n", + " ms_dtype=MS_DTYPE,\n", + " device_map=DEVICE_MAP_STRATEGY,\n", + " )\n", + " pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)\n", + "\n", + " try:\n", + " pipe.enable_attention_slicing()\n", + " except Exception:\n", + " pass\n", + " try:\n", + " pipe.set_progress_bar_config(disable=True)\n", + " except Exception:\n", + " pass\n", + " try:\n", + " pipe.safety_checker = None\n", + " pipe.requires_safety_checker = False\n", + " except Exception:\n", + " pass\n", + "\n", + " print(f\"[OK] pipeline loaded: {model_id}\", flush=True)\n", + " return pipe" + ] + }, + { + "cell_type": "markdown", + "id": "14", + "metadata": {}, + "source": [ + "## 图像预处理与身份保持增强\n", + "\n", + "这部分代码用于:\n", + "\n", + "- 统一图像尺寸与构图;\n", + "- 估计面部区域;\n", + "- 构造柔和的人脸掩膜;\n", + "- 完成人脸颜色迁移、明暗匹配、边缘保留与细节恢复;\n", + "- 在卡通风格下进一步加强线条与块面感。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15", + "metadata": {}, + "outputs": [], + "source": [ + "def _prep_image(img: Image.Image, size: int = 512) -> Image.Image:\n", + " img = ImageOps.exif_transpose(img).convert(\"RGB\")\n", + " img = ImageEnhance.Sharpness(img).enhance(1.15)\n", + " img = ImageEnhance.Contrast(img).enhance(1.04)\n", + " return ImageOps.fit(\n", + " img,\n", + " (size, size),\n", + " method=Image.Resampling.LANCZOS,\n", + " centering=(0.5, 0.28), # 往上偏一点,更照顾脸部区域\n", + " )\n", + "\n", + "\n", + "def _estimate_face_box(size: int) -> Tuple[int, int, int, int]:\n", + " \"\"\"\n", + " 更紧的人脸经验框:主要覆盖额头、眼鼻口和少量下巴,\n", + " 尽量少吃到西装、肩部和背景,避免脸部融合区被过度软化。\n", + " \"\"\"\n", + " x0 = int(size * 0.31)\n", + " y0 = int(size * 0.11)\n", + " x1 = int(size * 0.69)\n", + " y1 = int(size * 0.50)\n", + " return x0, y0, x1, y1\n", + "\n", + "\n", + "def _make_soft_face_mask(width: int, height: int, blur_radius: int) -> Image.Image:\n", + " mask = Image.new(\"L\", (width, height), 0)\n", + " draw = ImageDraw.Draw(mask)\n", + " pad_w = int(width * 0.08)\n", + " pad_h = int(height * 0.08)\n", + " inner = (pad_w, pad_h, width - pad_w, height - pad_h)\n", + " draw.rounded_rectangle(inner, radius=max(8, min(width, height) // 9), fill=220)\n", + " core_pad_w = int(width * 0.16)\n", + " core_pad_h = int(height * 0.15)\n", + " core = (core_pad_w, core_pad_h, width - core_pad_w, height - core_pad_h)\n", + " draw.ellipse(core, fill=255)\n", + " return mask.filter(ImageFilter.GaussianBlur(radius=blur_radius))\n", + "\n", + "\n", + "def _to_pil_image(obj) -> Image.Image:\n", + " if isinstance(obj, Image.Image):\n", + " return obj.convert(\"RGB\")\n", + " if isinstance(obj, ms.Tensor):\n", + " arr = obj.asnumpy()\n", + " arr = np.clip(arr, 0, 255).astype(np.uint8)\n", + " return Image.fromarray(arr).convert(\"RGB\")\n", + " if isinstance(obj, np.ndarray):\n", + " arr = np.clip(obj, 0, 255).astype(np.uint8)\n", + " return Image.fromarray(arr).convert(\"RGB\")\n", + " raise RuntimeError(f\"Unsupported output type: {type(obj)}\")\n", + "\n", + "\n", + "def _color_transfer_keep_structure(src_face: Image.Image, ref_style_face: Image.Image) -> Image.Image:\n", + " \"\"\"\n", + " 用风格图的颜色统计迁移到原脸上:\n", + " 保留原脸的几何结构/五官位置,只借用风格脸的颜色与明暗分布。\n", + " \"\"\"\n", + " src = np.asarray(src_face.convert(\"RGB\")).astype(np.float32)\n", + " ref = np.asarray(ref_style_face.convert(\"RGB\")).astype(np.float32)\n", + " out = np.empty_like(src)\n", + " for c in range(3):\n", + " s = src[..., c]\n", + " r = ref[..., c]\n", + " s_mean, s_std = float(s.mean()), float(s.std()) + 1e-6\n", + " r_mean, r_std = float(r.mean()), float(r.std()) + 1e-6\n", + " out[..., c] = (s - s_mean) * (r_std / s_std) + r_mean\n", + " out = np.clip(out, 0, 255).astype(np.uint8)\n", + " return Image.fromarray(out, mode=\"RGB\")\n", + "\n", + "\n", + "def _stylize_original_face(face_img: Image.Image, style_name: str) -> Image.Image:\n", + " img = face_img.convert(\"RGB\")\n", + " if style_name == \"吉卜力(Ghibli)\":\n", + " img = img.filter(ImageFilter.SMOOTH)\n", + " img = ImageOps.posterize(img, 6)\n", + " img = ImageEnhance.Color(img).enhance(1.04)\n", + " img = ImageEnhance.Contrast(img).enhance(1.02)\n", + " img = ImageEnhance.Sharpness(img).enhance(1.06)\n", + " else:\n", + " img = img.filter(ImageFilter.MedianFilter(size=3))\n", + " img = img.filter(ImageFilter.SMOOTH_MORE)\n", + " img = ImageOps.posterize(img, 5)\n", + " img = ImageEnhance.Color(img).enhance(1.10)\n", + " img = ImageEnhance.Contrast(img).enhance(1.10)\n", + " img = ImageEnhance.Sharpness(img).enhance(1.18)\n", + " return img\n", + "\n", + "\n", + "def _cartoon_postprocess(img: Image.Image, amount: float = 0.3) -> Image.Image:\n", + " if amount <= 0:\n", + " return img.convert(\"RGB\")\n", + " base = img.convert(\"RGB\")\n", + " smooth = base.filter(ImageFilter.MedianFilter(size=3)).filter(ImageFilter.SMOOTH_MORE)\n", + " flat = ImageOps.posterize(smooth, 5)\n", + " flat = ImageEnhance.Color(flat).enhance(1.08)\n", + " flat = ImageEnhance.Contrast(flat).enhance(1.10)\n", + "\n", + " edge = base.convert(\"L\").filter(ImageFilter.FIND_EDGES).filter(ImageFilter.GaussianBlur(radius=0.7))\n", + " edge = ImageOps.autocontrast(edge)\n", + " edge = edge.point(lambda p: max(36, 255 - int(p * 1.55)))\n", + " edge_rgb = Image.merge(\"RGB\", (edge, edge, edge))\n", + " cartoon = ImageChops.multiply(flat, edge_rgb)\n", + " cartoon = ImageEnhance.Sharpness(cartoon).enhance(1.10)\n", + " return Image.blend(base, cartoon, float(amount))\n", + "\n", + "\n", + "def _soft_line_preserve(base_face: Image.Image, orig_face: Image.Image, amount: float) -> Image.Image:\n", + " if amount <= 0:\n", + " return base_face\n", + " edge = orig_face.convert(\"L\").filter(ImageFilter.FIND_EDGES).filter(ImageFilter.GaussianBlur(radius=1.0))\n", + " edge = ImageOps.autocontrast(edge)\n", + " edge = edge.point(lambda p: int(255 - p * 0.42))\n", + " edge_rgb = Image.merge(\"RGB\", (edge, edge, edge))\n", + " lined = ImageChops.multiply(base_face, edge_rgb)\n", + " return Image.blend(base_face, lined, amount)\n", + "\n", + "\n", + "def _detail_restore(base_face: Image.Image, orig_face: Image.Image, amount: float) -> Image.Image:\n", + " if amount <= 0:\n", + " return base_face\n", + " fine = orig_face.filter(ImageFilter.UnsharpMask(radius=1.2, percent=135, threshold=2))\n", + " high = ImageChops.subtract(fine, fine.filter(ImageFilter.GaussianBlur(radius=1.6)))\n", + " high = ImageOps.autocontrast(high)\n", + " high = ImageEnhance.Contrast(high).enhance(0.82)\n", + " detailed = ImageChops.overlay(base_face, high)\n", + " return Image.blend(base_face, detailed, float(amount))\n", + "\n", + "\n", + "def _luma_match_keep_edges(src_face: Image.Image, ref_face: Image.Image) -> Image.Image:\n", + " src = src_face.convert(\"RGB\")\n", + " ref_y = ref_face.convert(\"YCbCr\").split()[0]\n", + " src_ycbcr = list(src.convert(\"YCbCr\").split())\n", + " src_ycbcr[0] = Image.blend(src_ycbcr[0], ref_y, 0.35)\n", + " return Image.merge(\"YCbCr\", tuple(src_ycbcr)).convert(\"RGB\")\n", + "\n", + "\n", + "def _make_identity_locked_face_patch(\n", + " original_face: Image.Image,\n", + " global_face: Image.Image,\n", + " style_name: str,\n", + " style_face_blend: float,\n", + " line_preserve: float,\n", + " detail_preserve: float,\n", + " face_cartoon_boost: float = 0.0,\n", + ") -> Image.Image:\n", + " \"\"\"\n", + " 不再对脸做第二次 diffusion 生成,避免“重新捏脸”。\n", + " 直接用原脸结构 + 风格脸颜色/明暗 + 轻量卡通化滤波,\n", + " 这样能同时满足“更像本人”与“脸部仍然有风格感”。\n", + " \"\"\"\n", + " recolored = _color_transfer_keep_structure(original_face, global_face)\n", + " recolored = _luma_match_keep_edges(recolored, global_face)\n", + " stylized_orig = _stylize_original_face(recolored, style_name)\n", + " fused = Image.blend(stylized_orig, global_face, float(style_face_blend))\n", + " fused = _detail_restore(fused, original_face, float(detail_preserve))\n", + " fused = _soft_line_preserve(fused, original_face, float(line_preserve))\n", + " if style_name == \"吉卜力(Ghibli)\":\n", + " fused = ImageEnhance.Color(fused).enhance(1.02)\n", + " fused = ImageEnhance.Sharpness(fused).enhance(1.10)\n", + " else:\n", + " fused = _cartoon_postprocess(fused, amount=float(face_cartoon_boost))\n", + " fused = _soft_line_preserve(fused, original_face, float(max(0.16, line_preserve - 0.04)))\n", + " fused = ImageEnhance.Color(fused).enhance(1.05)\n", + " fused = ImageEnhance.Contrast(fused).enhance(1.08)\n", + " fused = ImageEnhance.Sharpness(fused).enhance(1.22)\n", + " return fused" + ] + }, + { + "cell_type": "markdown", + "id": "16", + "metadata": {}, + "source": [ + "## 图像生成主流程\n", + "\n", + "主流程 `generate()` 包含两阶段:\n", + "\n", + "1. **全图风格化**:使用 img2img pipeline 完成整体风格变换;\n", + "2. **结构锁脸融合**:基于原始面部结构进行局部增强,减少人物特征漂移。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "17", + "metadata": {}, + "outputs": [], + "source": [ + "def _run_pipe(\n", + " pipe: StableDiffusionImg2ImgPipeline,\n", + " prompt: str,\n", + " negative_prompt: str,\n", + " image: Image.Image,\n", + " strength: float,\n", + " steps: int,\n", + " guidance_scale: float,\n", + "):\n", + " out = pipe(\n", + " prompt=prompt,\n", + " negative_prompt=negative_prompt,\n", + " image=image,\n", + " strength=float(strength),\n", + " num_inference_steps=int(steps),\n", + " guidance_scale=float(guidance_scale),\n", + " )\n", + " if hasattr(out, \"images\") and out.images:\n", + " return out.images[0]\n", + " if isinstance(out, (list, tuple)) and len(out) > 0:\n", + " return out[0]\n", + " return out\n", + "\n", + "\n", + "def generate(\n", + " image: Image.Image,\n", + " style_name: str,\n", + " strength: float = 0.42,\n", + " steps: int = 25,\n", + " guidance_scale: float = 7.5,\n", + " seed: int = 0,\n", + " size: int = 512,\n", + " preserve_identity: bool = True,\n", + ") -> Image.Image:\n", + " if image is None:\n", + " raise ValueError(\"请先上传一张图片\")\n", + "\n", + " if style_name not in STYLE_PRESETS:\n", + " raise ValueError(f\"不支持的风格:{style_name}\")\n", + "\n", + " preset = STYLE_PRESETS[style_name]\n", + " pipe = load_pipe(str(preset[\"model_id\"]))\n", + " base_image = _prep_image(image, size=size)\n", + "\n", + " if seed and int(seed) > 0:\n", + " ms.set_seed(int(seed))\n", + " np.random.seed(int(seed))\n", + "\n", + " print(\n", + " f\"[ENTER] generate | device_target={ms.get_context('device_target')} | \"\n", + " f\"style={style_name} | preserve_identity={preserve_identity}\",\n", + " flush=True,\n", + " )\n", + "\n", + " if preserve_identity:\n", + " global_strength = min(float(strength), float(preset[\"identity_strength_cap\"]))\n", + " else:\n", + " global_strength = float(strength)\n", + "\n", + " # 第一阶段:全图风格化\n", + " global_img = _to_pil_image(\n", + " _run_pipe(\n", + " pipe=pipe,\n", + " prompt=str(preset[\"prompt\"]),\n", + " negative_prompt=str(preset[\"negative\"]),\n", + " image=base_image,\n", + " strength=global_strength,\n", + " steps=int(steps),\n", + " guidance_scale=float(guidance_scale),\n", + " )\n", + " )\n", + "\n", + " if style_name == \"卡通插画(Cartoon)\":\n", + " global_img = _cartoon_postprocess(global_img, amount=float(preset.get(\"global_cartoon_boost\", 0.0)))\n", + "\n", + " # 第二阶段:结构锁脸融合(不再二次 diffusion 捏脸)\n", + " if preserve_identity:\n", + " face_box = _estimate_face_box(size)\n", + " original_face = base_image.crop(face_box)\n", + " global_face = global_img.crop(face_box)\n", + " face_patch = _make_identity_locked_face_patch(\n", + " original_face=original_face,\n", + " global_face=global_face,\n", + " style_name=style_name,\n", + " style_face_blend=float(preset[\"style_face_blend\"]),\n", + " line_preserve=float(preset[\"line_preserve\"]),\n", + " detail_preserve=float(preset.get(\"detail_preserve\", 0.25)),\n", + " face_cartoon_boost=float(preset.get(\"face_cartoon_boost\", 0.0)),\n", + " )\n", + "\n", + " patch_w = face_box[2] - face_box[0]\n", + " patch_h = face_box[3] - face_box[1]\n", + " face_patch = face_patch.resize((patch_w, patch_h), Image.Resampling.LANCZOS)\n", + " blur_radius = max(3, size // 128)\n", + " face_mask = _make_soft_face_mask(patch_w, patch_h, blur_radius)\n", + "\n", + " fused = global_img.copy()\n", + " fused.paste(face_patch, (face_box[0], face_box[1]), mask=face_mask)\n", + " global_img = fused\n", + "\n", + " return global_img" + ] + }, + { + "cell_type": "markdown", + "id": "18", + "metadata": {}, + "source": [ + "## 交互说明与参数建议\n", + "\n", + "Notebook 版本同样保留 Gradio 交互界面。 \n", + "建议参数如下:\n", + "\n", + "- **人物特征保留增强**:默认开启;\n", + "- **strength**:`0.30 ~ 0.52` 更接近本人,值越大风格越强;\n", + "- **steps**:建议 `20 ~ 35`;\n", + "- **size**:半身人像优先使用 `640` 或 `768`。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19", + "metadata": {}, + "outputs": [], + "source": [ + "DESCRIPTION = \"\"\"\n", + "# 真人照片一键风格化(MindSpore 2.7.0 + MindNLP 0.5.1)\n", + "建议:\n", + "- `人物特征保留增强`:默认开启\n", + "- `strength`:0.30 ~ 0.52 更像本人;数值越大,风格越强、但越容易不像本人\n", + "- 卡通插画建议从 `0.46 ~ 0.56` 起步,吉卜力建议从 `0.34 ~ 0.44` 起步\n", + "- `steps`:20 ~ 35\n", + "- 半身人像建议优先用 `640` 或 `768`,脸部会更清楚\n", + "\"\"\"\n", + "\n", + "\n", + "def _ui_generate(img, style, strength, steps, guidance, seed, size, preserve_identity):\n", + " try:\n", + " out_img = generate(\n", + " image=img,\n", + " style_name=style,\n", + " strength=float(strength),\n", + " steps=int(steps),\n", + " guidance_scale=float(guidance),\n", + " seed=int(seed),\n", + " size=int(size),\n", + " preserve_identity=bool(preserve_identity),\n", + " )\n", + " if out_img is None:\n", + " raise RuntimeError(\"generate() returned None\")\n", + " return out_img\n", + " except Exception as e:\n", + " traceback.print_exc()\n", + " raise gr.Error(str(e))" + ] + }, + { + "cell_type": "markdown", + "id": "20", + "metadata": {}, + "source": [ + "## Gradio 界面定义\n", + "\n", + "这里构建可视化界面,包括图片上传、风格选择、尺寸设置和推理参数控制。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21", + "metadata": {}, + "outputs": [], + "source": [ + "with gr.Blocks() as demo:\n", + " gr.Markdown(DESCRIPTION)\n", + "\n", + " with gr.Row():\n", + " inp = gr.Image(type=\"pil\", label=\"上传真人照片\")\n", + " out = gr.Image(type=\"pil\", label=\"生成结果\")\n", + "\n", + " with gr.Row():\n", + " style = gr.Dropdown(list(STYLE_PRESETS.keys()), value=DEFAULT_STYLE, label=\"风格\")\n", + " size = gr.Dropdown([512, 640, 768], value=640, label=\"输出尺寸(越大越慢)\")\n", + "\n", + " with gr.Row():\n", + " preserve_identity = gr.Checkbox(value=True, label=\"人物特征保留增强(推荐开启)\")\n", + " strength = gr.Slider(0.20, 0.75, value=0.42, step=0.01, label=\"strength(风格强度)\")\n", + " steps = gr.Slider(10, 50, value=25, step=1, label=\"steps(推理步数)\")\n", + "\n", + " with gr.Row():\n", + " guidance = gr.Slider(1.0, 12.0, value=7.5, step=0.5, label=\"guidance_scale(CFG)\")\n", + " seed = gr.Number(value=0, precision=0, label=\"seed(0=随机)\")\n", + "\n", + " btn = gr.Button(\"生成\", variant=\"primary\")\n", + " btn.click(\n", + " _ui_generate,\n", + " inputs=[inp, style, strength, steps, guidance, seed, size, preserve_identity],\n", + " outputs=[out],\n", + " )\n", + "\n", + "demo" + ] + }, + { + "cell_type": "markdown", + "id": "22", + "metadata": {}, + "source": [ + "## 启动应用\n", + "\n", + "执行下方单元即可启动 Gradio 服务。 \n", + "如需调整端口,可修改环境变量 `PORT` 或直接改写 `server_port`。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23", + "metadata": {}, + "outputs": [], + "source": [ + "demo.queue(max_size=20).launch(\n", + " server_name=\"0.0.0.0\",\n", + " server_port=int(os.getenv(\"PORT\", \"7861\")),\n", + " show_error=True,\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "work", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/diffusion/photo2style/app.py b/diffusion/photo2style/app.py deleted file mode 100644 index 883a390..0000000 --- a/diffusion/photo2style/app.py +++ /dev/null @@ -1,506 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Photo2Style Demo (MindSpore 2.7.0 + MindNLP 0.5.1) -""" - -# ---- Compatibility shim (MindTorch vs HF Diffusers) ---- -try: - import mindtorch.autograd.function as _mt_func - if not hasattr(_mt_func, "FunctionCtx"): - class FunctionCtx: - pass - _mt_func.FunctionCtx = FunctionCtx -except Exception as _e: - print(f"[WARN] mindtorch FunctionCtx shim skipped: {_e}") -# --------------------------------------------------------- - -import os -import sys -import traceback -from functools import lru_cache -from typing import Dict, Tuple - -import numpy as np -from PIL import Image, ImageDraw, ImageEnhance, ImageFilter, ImageOps, ImageChops - -import mindspore as ms -import mindnlp # IMPORTANT: import mindnlp BEFORE diffusers (it patches HF stack) -from diffusers import DDIMScheduler, StableDiffusionImg2ImgPipeline -import gradio as gr - - -# ----------------------------- -# 0. 版本与设备(Ascend-only) -# ----------------------------- -EXPECTED_MS = "2.7.0" -EXPECTED_MNLP = "0.5.1" -EXPECTED_PY_MIN = (3, 10) -EXPECTED_PY_MAX = (3, 12) -MS_DTYPE = ms.float16 - -# 关键:Diffusers 的 device_map 只支持 "cuda"/"balanced" -# 在 MindNLP + Ascend 场景下,应使用 "cuda" 让 MindNLP 接管并映射到 NPU -DEVICE_MAP_STRATEGY = "cuda" - - -def _version_prefix(v: str) -> str: - return ".".join(str(v).split(".")[:3]) - - -def _set_context() -> None: - ms.set_context(mode=ms.PYNATIVE_MODE) - ms.set_device("Ascend", int(os.getenv("DEVICE_ID", "0"))) - - -def _check_versions() -> None: - device_target = ms.get_context("device_target") - if device_target != "Ascend": - raise RuntimeError(f"Ascend-only demo, but device_target={device_target}") - - py_v = sys.version_info[:3] - if not (EXPECTED_PY_MIN <= py_v < EXPECTED_PY_MAX): - print( - f"[WARN] Python version is {py_v[0]}.{py_v[1]}.{py_v[2]}, " - f"recommended range is >= {EXPECTED_PY_MIN[0]}.{EXPECTED_PY_MIN[1]} and < {EXPECTED_PY_MAX[0]}.{EXPECTED_PY_MAX[1]}." - ) - - ms_v = _version_prefix(getattr(ms, "__version__", "")) - mnlp_v = _version_prefix(getattr(mindnlp, "__version__", "")) - - if ms_v != EXPECTED_MS: - print(f"[WARN] MindSpore version is {ms_v}, expected {EXPECTED_MS}.") - if mnlp_v != EXPECTED_MNLP: - print(f"[WARN] MindNLP version is {mnlp_v}, expected {EXPECTED_MNLP}.") - - -def _smoke_test_ascend() -> None: - a = ms.Tensor(np.random.randn(1024, 1024).astype(np.float16)) - b = ms.Tensor(np.random.randn(1024, 1024).astype(np.float16)) - _ = ms.ops.matmul(a, b) - print("[OK] Ascend smoke test done.") - - -_set_context() -_check_versions() -_smoke_test_ascend() - - -# ----------------------------- -# 1. 风格模板(只保留更可交付的风格) -# ----------------------------- -STYLE_PRESETS: Dict[str, Dict[str, object]] = { - "吉卜力(Ghibli)": { - "model_id": "nitrosocke/Ghibli-Diffusion", - "prompt": ( - "portrait of the same exact person, same identity, same facial proportions, same jawline, same hairstyle, " - "ghibli style, studio ghibli anime film still, hand-painted anime illustration, clean lineart, soft cel shading, " - "natural expression, upper body, masterpiece" - ), - "negative": ( - "different person, changed face, aged face, chubby face, child face, huge anime eyes, lowres, blurry, " - "bad face, deformed face, disfigured, mutated, cross-eyed, extra eyes, bad anatomy, watermark, text, logo" - ), - "identity_strength_cap": 0.48, - "default_strength": 0.38, - "style_face_blend": 0.18, - "line_preserve": 0.22, - "detail_preserve": 0.28, - }, - "卡通插画(Cartoon)": { - "model_id": "lavaman131/cartoonify", - "prompt": ( - "portrait of the same exact person, same identity, same facial proportions, same jawline, same hairstyle, " - "disney pixar style, polished cartoon illustration, animated feature film character portrait, clean cartoon lineart, " - "simplified facial planes, soft cel shading, readable silhouette, stylized but recognizable face, upper body, masterpiece" - ), - "negative": ( - "different person, changed face, exaggerated face, huge eyes, tiny chin, malformed mouth, over-smoothed face, waxy skin, lowres, blurry, " - "deformed, bad anatomy, watermark, text, logo" - ), - "identity_strength_cap": 0.56, - "default_strength": 0.50, - "style_face_blend": 0.32, - "line_preserve": 0.24, - "detail_preserve": 0.28, - "global_cartoon_boost": 0.34, - "face_cartoon_boost": 0.26, - }, -} -DEFAULT_STYLE = "吉卜力(Ghibli)" - - -# ----------------------------- -# 2. Pipeline 缓存(按模型 id 复用) -# ----------------------------- -@lru_cache(maxsize=2) -def load_pipe(model_id: str) -> StableDiffusionImg2ImgPipeline: - pipe = StableDiffusionImg2ImgPipeline.from_pretrained( - model_id, - ms_dtype=MS_DTYPE, - device_map=DEVICE_MAP_STRATEGY, - ) - pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config) - - try: - pipe.enable_attention_slicing() - except Exception: - pass - try: - pipe.set_progress_bar_config(disable=True) - except Exception: - pass - try: - pipe.safety_checker = None - pipe.requires_safety_checker = False - except Exception: - pass - - print(f"[OK] pipeline loaded: {model_id}", flush=True) - return pipe - - -# ----------------------------- -# 3. 图像预处理与身份增强 -# ----------------------------- -def _prep_image(img: Image.Image, size: int = 512) -> Image.Image: - img = ImageOps.exif_transpose(img).convert("RGB") - img = ImageEnhance.Sharpness(img).enhance(1.15) - img = ImageEnhance.Contrast(img).enhance(1.04) - return ImageOps.fit( - img, - (size, size), - method=Image.Resampling.LANCZOS, - centering=(0.5, 0.28), # 往上偏一点,更照顾脸部区域 - ) - - -def _estimate_face_box(size: int) -> Tuple[int, int, int, int]: - """ - 更紧的人脸经验框:主要覆盖额头、眼鼻口和少量下巴, - 尽量少吃到西装、肩部和背景,避免脸部融合区被过度软化。 - """ - x0 = int(size * 0.31) - y0 = int(size * 0.11) - x1 = int(size * 0.69) - y1 = int(size * 0.50) - return x0, y0, x1, y1 - - -def _make_soft_face_mask(width: int, height: int, blur_radius: int) -> Image.Image: - mask = Image.new("L", (width, height), 0) - draw = ImageDraw.Draw(mask) - pad_w = int(width * 0.08) - pad_h = int(height * 0.08) - inner = (pad_w, pad_h, width - pad_w, height - pad_h) - draw.rounded_rectangle(inner, radius=max(8, min(width, height) // 9), fill=220) - core_pad_w = int(width * 0.16) - core_pad_h = int(height * 0.15) - core = (core_pad_w, core_pad_h, width - core_pad_w, height - core_pad_h) - draw.ellipse(core, fill=255) - return mask.filter(ImageFilter.GaussianBlur(radius=blur_radius)) - - -def _to_pil_image(obj) -> Image.Image: - if isinstance(obj, Image.Image): - return obj.convert("RGB") - if isinstance(obj, ms.Tensor): - arr = obj.asnumpy() - arr = np.clip(arr, 0, 255).astype(np.uint8) - return Image.fromarray(arr).convert("RGB") - if isinstance(obj, np.ndarray): - arr = np.clip(obj, 0, 255).astype(np.uint8) - return Image.fromarray(arr).convert("RGB") - raise RuntimeError(f"Unsupported output type: {type(obj)}") - - -def _color_transfer_keep_structure(src_face: Image.Image, ref_style_face: Image.Image) -> Image.Image: - """ - 用风格图的颜色统计迁移到原脸上: - 保留原脸的几何结构/五官位置,只借用风格脸的颜色与明暗分布。 - """ - src = np.asarray(src_face.convert("RGB")).astype(np.float32) - ref = np.asarray(ref_style_face.convert("RGB")).astype(np.float32) - out = np.empty_like(src) - for c in range(3): - s = src[..., c] - r = ref[..., c] - s_mean, s_std = float(s.mean()), float(s.std()) + 1e-6 - r_mean, r_std = float(r.mean()), float(r.std()) + 1e-6 - out[..., c] = (s - s_mean) * (r_std / s_std) + r_mean - out = np.clip(out, 0, 255).astype(np.uint8) - return Image.fromarray(out, mode="RGB") - - -def _stylize_original_face(face_img: Image.Image, style_name: str) -> Image.Image: - img = face_img.convert("RGB") - if style_name == "吉卜力(Ghibli)": - img = img.filter(ImageFilter.SMOOTH) - img = ImageOps.posterize(img, 6) - img = ImageEnhance.Color(img).enhance(1.04) - img = ImageEnhance.Contrast(img).enhance(1.02) - img = ImageEnhance.Sharpness(img).enhance(1.06) - else: - img = img.filter(ImageFilter.MedianFilter(size=3)) - img = img.filter(ImageFilter.SMOOTH_MORE) - img = ImageOps.posterize(img, 5) - img = ImageEnhance.Color(img).enhance(1.10) - img = ImageEnhance.Contrast(img).enhance(1.10) - img = ImageEnhance.Sharpness(img).enhance(1.18) - return img - - -def _cartoon_postprocess(img: Image.Image, amount: float = 0.3) -> Image.Image: - if amount <= 0: - return img.convert("RGB") - base = img.convert("RGB") - smooth = base.filter(ImageFilter.MedianFilter(size=3)).filter(ImageFilter.SMOOTH_MORE) - flat = ImageOps.posterize(smooth, 5) - flat = ImageEnhance.Color(flat).enhance(1.08) - flat = ImageEnhance.Contrast(flat).enhance(1.10) - - edge = base.convert("L").filter(ImageFilter.FIND_EDGES).filter(ImageFilter.GaussianBlur(radius=0.7)) - edge = ImageOps.autocontrast(edge) - edge = edge.point(lambda p: max(36, 255 - int(p * 1.55))) - edge_rgb = Image.merge("RGB", (edge, edge, edge)) - cartoon = ImageChops.multiply(flat, edge_rgb) - cartoon = ImageEnhance.Sharpness(cartoon).enhance(1.10) - return Image.blend(base, cartoon, float(amount)) - - -def _soft_line_preserve(base_face: Image.Image, orig_face: Image.Image, amount: float) -> Image.Image: - if amount <= 0: - return base_face - edge = orig_face.convert("L").filter(ImageFilter.FIND_EDGES).filter(ImageFilter.GaussianBlur(radius=1.0)) - edge = ImageOps.autocontrast(edge) - edge = edge.point(lambda p: int(255 - p * 0.42)) - edge_rgb = Image.merge("RGB", (edge, edge, edge)) - lined = ImageChops.multiply(base_face, edge_rgb) - return Image.blend(base_face, lined, amount) - - -def _detail_restore(base_face: Image.Image, orig_face: Image.Image, amount: float) -> Image.Image: - if amount <= 0: - return base_face - fine = orig_face.filter(ImageFilter.UnsharpMask(radius=1.2, percent=135, threshold=2)) - high = ImageChops.subtract(fine, fine.filter(ImageFilter.GaussianBlur(radius=1.6))) - high = ImageOps.autocontrast(high) - high = ImageEnhance.Contrast(high).enhance(0.82) - detailed = ImageChops.overlay(base_face, high) - return Image.blend(base_face, detailed, float(amount)) - - -def _luma_match_keep_edges(src_face: Image.Image, ref_face: Image.Image) -> Image.Image: - src = src_face.convert("RGB") - ref_y = ref_face.convert("YCbCr").split()[0] - src_ycbcr = list(src.convert("YCbCr").split()) - src_ycbcr[0] = Image.blend(src_ycbcr[0], ref_y, 0.35) - return Image.merge("YCbCr", tuple(src_ycbcr)).convert("RGB") - - -def _make_identity_locked_face_patch( - original_face: Image.Image, - global_face: Image.Image, - style_name: str, - style_face_blend: float, - line_preserve: float, - detail_preserve: float, - face_cartoon_boost: float = 0.0, -) -> Image.Image: - """ - 不再对脸做第二次 diffusion 生成,避免“重新捏脸”。 - 直接用原脸结构 + 风格脸颜色/明暗 + 轻量卡通化滤波, - 这样能同时满足“更像本人”与“脸部仍然有风格感”。 - """ - recolored = _color_transfer_keep_structure(original_face, global_face) - recolored = _luma_match_keep_edges(recolored, global_face) - stylized_orig = _stylize_original_face(recolored, style_name) - fused = Image.blend(stylized_orig, global_face, float(style_face_blend)) - fused = _detail_restore(fused, original_face, float(detail_preserve)) - fused = _soft_line_preserve(fused, original_face, float(line_preserve)) - if style_name == "吉卜力(Ghibli)": - fused = ImageEnhance.Color(fused).enhance(1.02) - fused = ImageEnhance.Sharpness(fused).enhance(1.10) - else: - fused = _cartoon_postprocess(fused, amount=float(face_cartoon_boost)) - fused = _soft_line_preserve(fused, original_face, float(max(0.16, line_preserve - 0.04))) - fused = ImageEnhance.Color(fused).enhance(1.05) - fused = ImageEnhance.Contrast(fused).enhance(1.08) - fused = ImageEnhance.Sharpness(fused).enhance(1.22) - return fused - - -def _run_pipe( - - pipe: StableDiffusionImg2ImgPipeline, - prompt: str, - negative_prompt: str, - image: Image.Image, - strength: float, - steps: int, - guidance_scale: float, -): - out = pipe( - prompt=prompt, - negative_prompt=negative_prompt, - image=image, - strength=float(strength), - num_inference_steps=int(steps), - guidance_scale=float(guidance_scale), - ) - if hasattr(out, "images") and out.images: - return out.images[0] - if isinstance(out, (list, tuple)) and len(out) > 0: - return out[0] - return out - - -# ----------------------------- -# 4. 生成逻辑 -# ----------------------------- -def generate( - image: Image.Image, - style_name: str, - strength: float = 0.42, - steps: int = 25, - guidance_scale: float = 7.5, - seed: int = 0, - size: int = 512, - preserve_identity: bool = True, -) -> Image.Image: - if image is None: - raise ValueError("请先上传一张图片") - - if style_name not in STYLE_PRESETS: - raise ValueError(f"不支持的风格:{style_name}") - - preset = STYLE_PRESETS[style_name] - pipe = load_pipe(str(preset["model_id"])) - base_image = _prep_image(image, size=size) - - if seed and int(seed) > 0: - ms.set_seed(int(seed)) - np.random.seed(int(seed)) - - print( - f"[ENTER] generate | device_target={ms.get_context('device_target')} | " - f"style={style_name} | preserve_identity={preserve_identity}", - flush=True, - ) - - if preserve_identity: - global_strength = min(float(strength), float(preset["identity_strength_cap"])) - else: - global_strength = float(strength) - - # 第一阶段:全图风格化 - global_img = _to_pil_image( - _run_pipe( - pipe=pipe, - prompt=str(preset["prompt"]), - negative_prompt=str(preset["negative"]), - image=base_image, - strength=global_strength, - steps=int(steps), - guidance_scale=float(guidance_scale), - ) - ) - - if style_name == "卡通插画(Cartoon)": - global_img = _cartoon_postprocess(global_img, amount=float(preset.get("global_cartoon_boost", 0.0))) - - # 第二阶段:结构锁脸融合(不再二次 diffusion 捏脸) - if preserve_identity: - face_box = _estimate_face_box(size) - original_face = base_image.crop(face_box) - global_face = global_img.crop(face_box) - face_patch = _make_identity_locked_face_patch( - original_face=original_face, - global_face=global_face, - style_name=style_name, - style_face_blend=float(preset["style_face_blend"]), - line_preserve=float(preset["line_preserve"]), - detail_preserve=float(preset.get("detail_preserve", 0.25)), - face_cartoon_boost=float(preset.get("face_cartoon_boost", 0.0)), - ) - - patch_w = face_box[2] - face_box[0] - patch_h = face_box[3] - face_box[1] - face_patch = face_patch.resize((patch_w, patch_h), Image.Resampling.LANCZOS) - blur_radius = max(3, size // 128) - face_mask = _make_soft_face_mask(patch_w, patch_h, blur_radius) - - fused = global_img.copy() - fused.paste(face_patch, (face_box[0], face_box[1]), mask=face_mask) - global_img = fused - - return global_img - - -# ----------------------------- -# 5. Gradio UI -# ----------------------------- -DESCRIPTION = """ -# 真人照片一键风格化(MindSpore 2.7.0 + MindNLP 0.5.1) -建议: -- `人物特征保留增强`:默认开启 -- `strength`:0.30 ~ 0.52 更像本人;数值越大,风格越强、但越容易不像本人 -- 卡通插画建议从 `0.46 ~ 0.56` 起步,吉卜力建议从 `0.34 ~ 0.44` 起步 -- `steps`:20 ~ 35 -- 半身人像建议优先用 `640` 或 `768`,脸部会更清楚 -""" - - -def _ui_generate(img, style, strength, steps, guidance, seed, size, preserve_identity): - try: - out_img = generate( - image=img, - style_name=style, - strength=float(strength), - steps=int(steps), - guidance_scale=float(guidance), - seed=int(seed), - size=int(size), - preserve_identity=bool(preserve_identity), - ) - if out_img is None: - raise RuntimeError("generate() returned None") - return out_img - except Exception as e: - traceback.print_exc() - raise gr.Error(str(e)) - - -with gr.Blocks() as demo: - gr.Markdown(DESCRIPTION) - - with gr.Row(): - inp = gr.Image(type="pil", label="上传真人照片") - out = gr.Image(type="pil", label="生成结果") - - with gr.Row(): - style = gr.Dropdown(list(STYLE_PRESETS.keys()), value=DEFAULT_STYLE, label="风格") - size = gr.Dropdown([512, 640, 768], value=640, label="输出尺寸(越大越慢)") - - with gr.Row(): - preserve_identity = gr.Checkbox(value=True, label="人物特征保留增强(推荐开启)") - strength = gr.Slider(0.20, 0.75, value=0.42, step=0.01, label="strength(风格强度)") - steps = gr.Slider(10, 50, value=25, step=1, label="steps(推理步数)") - - with gr.Row(): - guidance = gr.Slider(1.0, 12.0, value=7.5, step=0.5, label="guidance_scale(CFG)") - seed = gr.Number(value=0, precision=0, label="seed(0=随机)") - - btn = gr.Button("生成", variant="primary") - btn.click( - _ui_generate, - inputs=[inp, style, strength, steps, guidance, seed, size, preserve_identity], - outputs=[out], - ) - - -if __name__ == "__main__": - demo.queue(max_size=20).launch( - server_name="0.0.0.0", - server_port=int(os.getenv("PORT", "7860")), - show_error=True, - )