开源王者 Qwen-Image-Edit：图生图实战代码独家分享

通义千问最新推出的图像编辑模型Qwen-Image-Edit的使用体验，该模型支持图生图功能，效果强大且支持家用4090显卡运行。模型开源地址和示例代码均已给出。

烤鸭的世界我们不懂

296人浏览 · 2025-09-05 07:30:00

烤鸭的世界我们不懂 · 2025-09-05 07:30:00 发布

大家好，我是烤鸭：

按照这个开源的更新速度，文章都写不过来，还没试用呢，下一个就出来了。上篇文章 https://blog.csdn.net/Angry_Mills/article/details/150151851 说千问图片的图生图很快就出来了，没想到一周就出来了，可怕的更新速度。

介绍

魔塔社区模型地址：https://www.modelscope.cn/models/Qwen/Qwen-Image-Edit/summary

源码地址：https://github.com/QwenLM/Qwen-Image

看看官方的示例，效果还是非常强的，而且家用的4090就可以跑。
请添加图片描述

环境

linux ubuntu

python 3.13

显卡是4090 24G * 3

使用优化后官方的示例代码，显示模式使用均衡模式，单卡24G跑一张图片大概在8min。

展示几张效果

这里和豆包生成做一个对比，能看出来qwen-image应该是用的doubao结果做的训练数据，某些场景下相似度比较接近。但是文本较多的情况下，qwen-image明显优化做得更好。

prompt：跑车变成红色的

原图：

请添加图片描述

豆包：

请添加图片描述

千问：

在这里插入图片描述

prompt：背景变成城市

原图：

在这里插入图片描述

豆包：

在这里插入图片描述

千问：

在这里插入图片描述

prompt：在长城道上加几个行走的路人

原图：

在这里插入图片描述

豆包：

在这里插入图片描述

千问：

在这里插入图片描述

源码

调用的shell脚本

export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True && source /data/sharedpvc/venv/qwen-image/venv/bin/activate && /data/sharedpvc/venv/qwen-image/venv/bin/python /data/awesome-wan2/mutilparts/qwen-image.py --resolution 960,544 --num_images 1 --output /data/sharedpvc/qwen-image/jU5I2bzGT13wF4CmYI9b7A.png --image /data/downloaded_images/db17edf7-3f5e-4310-b891-1d1516dbe8cf.png --prompt "背景变成城市"

源码(整合了qwen-image和qwen-image-edit)：

import os
import torch
from modelscope import DiffusionPipeline
from accelerate import Accelerator
import argparse
import json
from logging_config import KAFKA_LOGGER
import time  # 需要导入time模块

from PIL import Image
from diffusers import QwenImageEditPipeline

def generate_image_by_qwen(
        resolution,  # 分辨率，元组形式如(1280, 720)
        output_path,  # 生成图像保存路径
        prompt,  # 提示词
        use_gpus=None,  # 要使用的显卡编号列表，如[0,1,2]，None则使用所有可用显卡
        num_images=1, # 生成图片的数量
        output_dir="/data/sharedpvc",  # 生成图像保存目录
        image=''
):
    """
    使用Qwen-Image模型生成图像的可调用方法

    参数:
        resolution: 元组 (width, height)，生成图像的分辨率
        output_path: 字符串，生成图像的保存路径
        prompt: 字符串，图像生成的提示词
        use_gpus: 列表，指定使用的显卡编号，如[0,1,2]
    """
    # 设置环境变量解决通信问题
    os.environ["NCCL_P2P_DISABLE"] = "1"
    os.environ["NCCL_IB_DISABLE"] = "1"
    os.environ["TORCH_CUDNN_V8_API_ENABLED"] = "1"

    # 配置显卡可见性
    if use_gpus is not None and isinstance(use_gpus, list) and len(use_gpus) > 0:
        os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, use_gpus))
        num_gpus = len(use_gpus)
    else:
        # 自动检测可用显卡
        num_gpus = torch.cuda.device_count()
        use_gpus = list(range(num_gpus))
    # 初始化加速器
    accelerator = Accelerator()

    # 准备模型加载参数
    device_map = "balanced"
    max_memory = {gpu: "23GiB" for gpu in use_gpus} if num_gpus > 1 else None
    results = None
    output_paths = []
    try:
        # 图片编辑模式
        if image:
            # 加载模型
            local_model_dir = "/data/sharedpvc/modelscope/hub/models/Qwen/Qwen-Image-Edit"
            pipeline = QwenImageEditPipeline.from_pretrained(
                local_model_dir,
                torch_dtype = torch.bfloat16,
                device_map = device_map,
                trust_remote_code = True
            )
            print("pipeline loaded")
            # pipeline.to(torch.bfloat16)
            # pipeline.to("cuda")
            pipeline.set_progress_bar_config(disable=None)
            KAFKA_LOGGER.info(f"prompt:{prompt}, pipe inited succeed")
            # 提取分辨率参数
            width, height = resolution

            image = Image.open(image).convert("RGB")

            inputs = {
                "image": image,
                "prompt": prompt,
                "generator": torch.manual_seed(0),
                "true_cfg_scale": 4.0,
                "negative_prompt": "low quality, blurry, text distortion",
                "num_inference_steps": 50,
            }

            with torch.inference_mode():
                results = pipeline(**inputs)
        else:

            # 加载模型
            pipe = DiffusionPipeline.from_pretrained(
                # "/root/.cache/modelscope/hub/models/Qwen/Qwen-Image",
                "/data/sharedpvc/modelscope/hub/models/Qwen/Qwen-Image",
                torch_dtype=torch.bfloat16,
                device_map=device_map,
                max_memory=max_memory,
                trust_remote_code=True
            )
            KAFKA_LOGGER.info(f"prompt:{prompt}, pipe inited succeed")
            # 启用高效注意力机制
            try:
                pipe.enable_flash_attention_2()
            except (AttributeError, ImportError):
                try:
                    pipe.enable_xformers_memory_efficient_attention()
                except ImportError:
                    print("警告：未启用高效注意力优化，速度可能较慢")
            KAFKA_LOGGER.info(f"prompt:{prompt}, flash_attention inited succeed")
            # 提取分辨率参数
            width, height = resolution

            # 添加质量增强词
            positive_magic = {"zh": "超清，4K，电影级构图", "en": "Ultra HD, 4K, cinematic composition."}
            # 自动判断提示词语言并添加对应的增强词
            if any("\u4e00" <= c <= "\u9fff" for c in prompt):  # 检测中文
                prompt += positive_magic["zh"]
            else:
                prompt += positive_magic["en"]
            # 生成图像 - 一次调用生成多张
            with torch.no_grad():
                with accelerator.autocast():
                    # 关键修改：创建包含相同提示词的列表，长度为要生成的图片数量

                    # 生成多张图片
                    results = pipe(
                        prompt=prompt,
                        negative_prompt="low quality, blurry, text distortion",
                        width=width,
                        height=height,
                        num_inference_steps=30,
                        true_cfg_scale=3.0,
                        generator=torch.Generator(device=accelerator.device).manual_seed(42)
                    )
        print(f"图像生成完成 size: {len(results.images)}")
        KAFKA_LOGGER.info(f"图像生成完成 size: {len(results.images)}")
        # 检查并创建输出目录
        if not os.path.exists(output_dir):
            os.makedirs(output_dir, exist_ok=True)
            print(f"创建输出目录: {output_dir}")
        # 存储所有生成的图片路径
        if output_path:
            results.images[0].save(output_path)
            output_paths.append(output_path)
        else:
            for i, image in enumerate(results.images):
                output_path = os.path.join(output_dir, f"output_{i + 1}.png")
                image.save(output_path)
                output_paths.append(output_path)
                KAFKA_LOGGER.info(f"图像 {i + 1}/{num_images} 已保存至: {output_path}")

        KAFKA_LOGGER.info(f"所有 {num_images} 张图像已成功生成并保存至: {output_paths}")
        return True, output_paths

    except Exception as e:
        print(f"生成过程出错: {str(e)}")
        KAFKA_LOGGER.error(f"提示词：{prompt} 生成过程出错: {str(e)}")
        KAFKA_LOGGER.error(f"提示词：{prompt} 生成过程出错: {type(e).__name__} - {str(e)}", exc_info=True)
        return False, str(e)
    finally:
        # 清理显存
        if torch.cuda.is_available():
            torch.cuda.empty_cache()


def parse_arguments():
    """解析命令行参数，带有默认值"""
    parser = argparse.ArgumentParser(description='使用Qwen-Image模型生成图像')

    # 带有默认值的参数（不再是必选）
    parser.add_argument('--image',
                        help='编辑的图片地址')
    parser.add_argument('--resolution',
                        default="1280,720",  # 默认正方形分辨率
                        help='图像分辨率，格式为"宽度,高度"，例如"1280,720"，默认值为"1024,1024"')
    parser.add_argument('--output',
                        default='./output.png',
                        help='生成图像的保存路径，例如"output.png"')
    parser.add_argument('--output_dir',
                        default='/data/sharedpvc',
                        help='生成图像的保存完整路径目录,用于多张图片，例如"output.png"')
    parser.add_argument('--num_images',
                        default=1,
                        help='生成图像数量,用于多张图片，例如 4')
    parser.add_argument('--prompt',
                        default='A coffee shop entrance features a chalkboard sign reading "Qwen Coffee 😊 $2 per cup," with a neon light beside it displaying "通义千问". Next to it hangs a poster showing a beautiful Chinese woman, and beneath the poster is written "π≈3.1415926-53589793-23846264-33832795-02384197". Ultra HD, 4K, cinematic composition ',
                        help='图像生成的提示词')

    # 可选参数，有默认值（None）
    parser.add_argument('--gpus',
                        default='1',
                        help='要使用的显卡编号，用逗号分隔，例如"0,1,2"，默认使用所有可用显卡')
    # 新增：推理步数参数，带有默认值
    parser.add_argument('--steps',
                        type=int,
                        default=30,
                        help='推理步数，默认值为30，值越大生成质量可能越高但速度越慢')

    return parser.parse_args()

if __name__ == "__main__":
    # 记录开始时间
    start_time = time.time()
    try:
        # 解析命令行参数
        args = parse_arguments()

        # 处理分辨率参数
        width, height = map(int, args.resolution.split(','))
        resolution = (width, height)

        # 处理显卡参数
        use_gpus = None
        if args.gpus:
            try:
                use_gpus = list(map(int, args.gpus.split(',')))
            except ValueError:
                print("错误：显卡编号格式不正确，请使用逗号分隔的数字，例如'0,1,2'")
                exit(1)

            # 调用生成函数
        result = generate_image_by_qwen(
            resolution=resolution,
            output_path=args.output,
            output_dir=args.output_dir,  # 传入目录
            num_images=args.num_images,  # 传入生成数量
            prompt=args.prompt,
            use_gpus=use_gpus,
            image=args.image
        )
        result = json.dumps({"files": result[1]})
        # 打印包含list[str]的JSON
        print(f"generate end:{result}")
    except Exception as e:
        # 捕获所有异常并处理
        error_msg = f"生成图片过程中发生错误: {str(e)}"
        KAFKA_LOGGER.error(error_msg, exc_info=True)  # 记录详细的异常信息
    finally:
        # 计算并打印耗时，无论是否发生异常都会执行
        end_time = time.time()
        elapsed_time = end_time - start_time
        # 只有在args成功解析的情况下才记录args信息
        KAFKA_LOGGER.info(f"generate end args:{args}, cost: {elapsed_time:.2f}秒")  # 保留两位小数

显存占用情况（看单卡的）：
在这里插入图片描述

总结

单卡24G的4090，1张图片编辑时间大概为15分钟。有点慢，不过免费的还说啥。

现在开源的家庭成员是越来越多了，而且功能越来越强大。

当你用sd还吭哧吭哧训练中文的时候，人家qwen-image直接KO了，根本没法玩。

现在更适合利用已有模型，做简单的训练，不要投入太多的人力物力，快速使用并接入业务是王道，不过这也注定了无法做复杂的业务。

还是那句话，每天更新实在太快了，用不了多久，多模态模型的平价时代也会来的。

下一篇写wan2.2和qwen-image的lora微调。

本来是打算写wan2.2，qwen-image-edit开源的有点快，或者说我有点懒了…AI最先替代的肯定咱这帮懒人

文章参考

https://zhuanlan.zhihu.com/p/1941869851632181759

https://zhuanlan.zhihu.com/p/1941561330537600404

https://blog.csdn.net/weixin_41446370/article/details/150521982

智联教育社区

更多推荐

01-Hadoop简介与生态系统

Hadoop是一个开源分布式框架，用于处理大规模数据集。其核心包括分布式文件系统HDFS和计算框架MapReduce，具有高可靠性（自动数据备份）、可扩展性（支持PB级数据）和成本效益（使用商用硬件）等优势。Hadoop历经多个版本演进，从1.x的基础架构发展到3.x支持云环境。主要组件HDFS通过分布式存储实现数据冗余和容错，而MapReduce则提供并行计算能力。该框架适用于各种数据类型，是处

智联教育社区

Camunda流程引擎的安装使用

接下来，我们添加一个带有 main 方法的应用程序类，该方法将成为启动 Spring Boot 应用程序的入口点。使用camunda开源工作流引擎有：通过docker运行、使用springboot集成、部署camunda发行包、基于源代码编译运行等多种方式。文本重点介绍如何在Spring Boot应用程序中如何集成Camunda Platform开源流程平台，这也是项目中最为常见的一种使用方式。时

智联教育社区

ragflow MCP 调用核心提示词解析：逻辑闭环与优化方向

本文探讨了开源项目ragflow中MCP调用体系的两个关键提示词。高级推理代理提示词通过分解复杂问题为可验证步骤，确保执行精准，其优势在于任务拆解颗粒度精准、工具使用规范明确和结果导向流程设计，但存在缺乏搜索优先级引导、信息足够判断标准不明确等问题。信息提取模块提示词专注于从搜索结果中提取关键信息，优势在于严格限定提取范围、输出格式二元化和目标导向流程设计，但需优化权威来源优先规则、部分相关信息处