Running Qwen Image Edit on Modal
Setup
- create an account at Modal and install it in your environment.
The Code
Qwen Image Edit inference
- put the code in a file,
qwen_image_edit.py
and deploy the endpoint with modal deploy qwen_image_edit.py
import io
import os
import time
import modal
# Modal Volume URL configuration
MODAL_WORKSPACE = "drchrislevy" # replace with your modal workspace
MODAL_ENVIRONMENT = "main" # replace with your modal environment
VOLUME_NAME = "qwen_edited_images"
# Image with required dependencies
image = (
modal.Image.debian_slim()
.apt_install(["git"])
.pip_install(
[
"torch",
"torchvision",
"git+https://github.com/huggingface/diffusers",
"transformers",
"accelerate",
"pillow",
"sentencepiece",
"python-dotenv",
"requests",
]
)
)
app = modal.App("qwen-image-editor", image=image)
hf_hub_cache = modal.Volume.from_name("hf_hub_cache", create_if_missing=True)
images_volume = modal.Volume.from_name(VOLUME_NAME, create_if_missing=True)
@app.cls(
image=image,
gpu="H100",
secrets=[
modal.Secret.from_name("huggingface-secret"),
],
timeout=60 * 5,
volumes={
"/root/.cache/huggingface/hub/": hf_hub_cache,
"/root/edited_images": images_volume,
},
scaledown_window=10 * 60,
max_containers=2,
# enable_memory_snapshot=True, # in alpha # https://modal.com/blog/gpu-mem-snapshots
# experimental_options={"enable_gpu_snapshot": True}
)
@modal.concurrent(max_inputs=1)
class QwenImageEditor:
@modal.enter() # snap=True to try gpu memory snapshot
def setup(self):
"""Load Qwen-Image-Edit model once per container"""
import torch
from diffusers import QwenImageEditPipeline
print("Loading Qwen/Qwen-Image-Edit model...")
self.pipe = QwenImageEditPipeline.from_pretrained("Qwen/Qwen-Image-Edit")
self.pipe.to(torch.bfloat16)
self.pipe.to("cuda")
self.pipe.set_progress_bar_config(disable=None)
print("Model loaded successfully!")
self.images_path = "/root/edited_images"
def _download_image_from_url(self, image_url: str):
"""Download image from URL and convert to PIL Image"""
import requests
from PIL import Image
response = requests.get(image_url)
response.raise_for_status()
image = Image.open(io.BytesIO(response.content)).convert("RGB")
return image
def edit_image(
self,
image_url: str,
prompt: str,
negative_prompt: str = " ",
true_cfg_scale: float = 4.0,
seed: int = 0,
randomize_seed: bool = False,
num_inference_steps: int = 50,
):
import random
import uuid
from datetime import datetime
import numpy as np
import torch
input_image = self._download_image_from_url(image_url)
MAX_SEED = np.iinfo(np.int32).max
if randomize_seed:
seed = random.randint(0, MAX_SEED)
print(f"Editing image {image_url} with prompt: {prompt}")
# Edit image using Qwen-Image-Edit exactly like the original
inputs = {
"image": input_image,
"prompt": prompt,
"generator": torch.manual_seed(seed),
"true_cfg_scale": true_cfg_scale,
"negative_prompt": negative_prompt,
"num_inference_steps": num_inference_steps,
}
with torch.inference_mode():
output = self.pipe(**inputs)
edited_image = output.images[0]
# Create unique filename
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
unique_id = str(uuid.uuid4())[:8]
filename = f"qwen_edited_{timestamp}_{unique_id}.png"
file_path = os.path.join(self.images_path, filename)
edited_image.save(file_path, format="PNG")
print(f"Edited image saved successfully to volume: {file_path}")
# Generate Modal Volume URL
# Format: https://modal.com/api/volumes/{workspace}/{env}/{volume_name}/files/content?path={filename}
image_url_output = f"https://modal.com/api/volumes/{MODAL_WORKSPACE}/{MODAL_ENVIRONMENT}/{VOLUME_NAME}/files/content?path={filename}"
return {
"original_image_url": image_url,
"image_url": image_url_output,
"prompt": prompt,
"negative_prompt": negative_prompt,
"true_cfg_scale": true_cfg_scale,
"seed": seed,
"num_inference_steps": num_inference_steps,
}
@modal.fastapi_endpoint(
method="POST",
docs=True,
)
def edit_image_endpoint(
self,
image_url: str,
prompt: str,
negative_prompt: str = " ",
true_cfg_scale: float = 4.0,
seed: int = 0,
randomize_seed: bool = False,
num_inference_steps: int = 50,
):
"""Public FastAPI endpoint for image editing"""
return self.edit_image(
image_url=image_url,
prompt=prompt,
negative_prompt=negative_prompt,
true_cfg_scale=true_cfg_scale,
seed=seed,
randomize_seed=randomize_seed,
num_inference_steps=num_inference_steps,
)
Qwen Image Edit Inference (FAST!)
- code is nearly identical to above except it uses lightx2v/Qwen-Image-Lightning
- The inference code snippet I got from here
- uses less inference steps, 8 instead of 50
- put the code in a file,
qwen_image_edit_fast.py
and deploy the endpoint with modal deploy qwen_image_edit_fast.py
import io
import math
import os
import time
import modal
# Modal Volume URL configuration
MODAL_WORKSPACE = "drchrislevy" # replace with your modal workspace
MODAL_ENVIRONMENT = "main" # replace with your modal environment
VOLUME_NAME = "qwen_edited_images"
# Image with required dependencies
image = (
modal.Image.debian_slim()
.apt_install(["git"])
.pip_install(
[
"torch",
"torchvision",
"git+https://github.com/huggingface/diffusers",
"transformers",
"accelerate",
"pillow",
"sentencepiece",
"python-dotenv",
"requests",
"peft",
]
)
)
app = modal.App("qwen-image-editor-fast", image=image)
hf_hub_cache = modal.Volume.from_name("hf_hub_cache", create_if_missing=True)
images_volume = modal.Volume.from_name(VOLUME_NAME, create_if_missing=True)
@app.cls(
image=image,
gpu="H100",
secrets=[
modal.Secret.from_name("huggingface-secret"),
],
timeout=60 * 5,
volumes={
"/root/.cache/huggingface/hub/": hf_hub_cache,
"/root/edited_images": images_volume,
},
scaledown_window=10 * 60,
max_containers=2,
# enable_memory_snapshot=True, # in alpha # https://modal.com/blog/gpu-mem-snapshots
# experimental_options={"enable_gpu_snapshot": True}
)
@modal.concurrent(max_inputs=1)
class QwenImageEditor:
@modal.enter() # snap=True to try gpu memory snapshot
def setup(self):
"""Load Qwen-Image-Edit model with Lightning acceleration once per container"""
import torch
from diffusers import FlowMatchEulerDiscreteScheduler, QwenImageEditPipeline
print("Loading Qwen/Qwen-Image-Edit model with Lightning acceleration...")
dtype = torch.bfloat16
device = "cuda"
scheduler_config = {
"base_image_seq_len": 256,
"base_shift": math.log(3),
"invert_sigmas": False,
"max_image_seq_len": 8192,
"max_shift": math.log(3),
"num_train_timesteps": 1000,
"shift": 1.0,
"shift_terminal": None,
"stochastic_sampling": False,
"time_shift_type": "exponential",
"use_beta_sigmas": False,
"use_dynamic_shifting": True,
"use_exponential_sigmas": False,
"use_karras_sigmas": False,
}
scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
self.pipe = QwenImageEditPipeline.from_pretrained(
"Qwen/Qwen-Image-Edit", scheduler=scheduler, torch_dtype=dtype
).to(device)
self.pipe.load_lora_weights(
"lightx2v/Qwen-Image-Lightning",
weight_name="Qwen-Image-Lightning-8steps-V1.1.safetensors",
)
self.pipe.fuse_lora()
self.pipe.set_progress_bar_config(disable=None)
print("Model loaded successfully with Lightning acceleration!")
self.images_path = "/root/edited_images"
def _download_image_from_url(self, image_url: str):
"""Download image from URL and convert to PIL Image"""
import requests
from PIL import Image
response = requests.get(image_url)
response.raise_for_status()
image = Image.open(io.BytesIO(response.content)).convert("RGB")
return image
def edit_image(
self,
image_url: str,
prompt: str,
negative_prompt: str = " ",
true_cfg_scale: float = 4.0,
seed: int = 0,
randomize_seed: bool = False,
num_inference_steps: int = 8,
):
import random
import uuid
from datetime import datetime
import numpy as np
import torch
input_image = self._download_image_from_url(image_url)
MAX_SEED = np.iinfo(np.int32).max
if randomize_seed:
seed = random.randint(0, MAX_SEED)
print(f"Editing image {image_url} with prompt: {prompt}")
# Edit image using Qwen-Image-Edit exactly like the original
inputs = {
"image": input_image,
"prompt": prompt,
"generator": torch.manual_seed(seed),
"true_cfg_scale": true_cfg_scale,
"negative_prompt": negative_prompt,
"num_inference_steps": num_inference_steps,
}
with torch.inference_mode():
output = self.pipe(**inputs)
edited_image = output.images[0]
# Create unique filename
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
unique_id = str(uuid.uuid4())[:8]
filename = f"qwen_edited_{timestamp}_{unique_id}.png"
file_path = os.path.join(self.images_path, filename)
edited_image.save(file_path, format="PNG")
print(f"Edited image saved successfully to volume: {file_path}")
# Generate Modal Volume URL
# Format: https://modal.com/api/volumes/{workspace}/{env}/{volume_name}/files/content?path={filename}
image_url_output = f"https://modal.com/api/volumes/{MODAL_WORKSPACE}/{MODAL_ENVIRONMENT}/{VOLUME_NAME}/files/content?path={filename}"
return {
"original_image_url": image_url,
"image_url": image_url_output,
"prompt": prompt,
"negative_prompt": negative_prompt,
"true_cfg_scale": true_cfg_scale,
"seed": seed,
"num_inference_steps": num_inference_steps,
}
@modal.fastapi_endpoint(
method="POST",
docs=True,
)
def edit_image_endpoint(
self,
image_url: str,
prompt: str,
negative_prompt: str = " ",
true_cfg_scale: float = 4.0,
seed: int = 0,
randomize_seed: bool = False,
num_inference_steps: int = 8,
):
"""Public FastAPI endpoint for image editing"""
return self.edit_image(
image_url=image_url,
prompt=prompt,
negative_prompt=negative_prompt,
true_cfg_scale=true_cfg_scale,
seed=seed,
randomize_seed=randomize_seed,
num_inference_steps=num_inference_steps,
)
Testing the Endpoints - Examples
import subprocess
import time
import requests
from PIL import Image
endpoint_url = "https://drchrislevy--qwen-image-editor-qwenimageeditor-edit-imag-10fda9.modal.run/"
endpoint_url_fast = "https://drchrislevy--qwen-image-editor-fast-qwenimageeditor-edit-516e26.modal.run/"
def download_generated_image(result):
time.sleep(5) # wait for volume sync
filename = result["image_url"].split("path=")[-1]
download_command = f"modal volume get qwen_edited_images {filename} ../static_blog_imgs/{filename} --force"
subprocess.run(download_command, shell=True, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
p = f"../static_blog_imgs/{filename}"
return Image.open(p)
def generate_image(endpoint, image_url, prompt, num_inference_steps=None, negative_prompt=" ", true_cfg_scale=4.0, seed=0, randomize_seed=False):
if num_inference_steps is None:
if endpoint == endpoint_url:
num_inference_steps = 50
elif endpoint == endpoint_url_fast:
num_inference_steps = 8
else:
raise ValueError(f"Invalid endpoint: {endpoint}")
response = requests.post(
endpoint,
params={
"image_url": image_url,
"prompt": prompt,
"negative_prompt": negative_prompt,
"true_cfg_scale": true_cfg_scale,
"seed": seed,
"randomize_seed": randomize_seed,
"num_inference_steps": num_inference_steps,
},
timeout=180,
)
res = response.json()
# image = Image.open(requests.get(image_url, stream=True).raw)
# display(image)
download_generated_image(res)
# display(download_generated_image(res))
return res
image_url = (
"https://img.alicdn.com/imgextra/i3/O1CN01XfJ71c1qokTchToKf_!!6000000005543-2-tps-1248-832.png?x-oss-process=image/resize,m_mfit,w_320,h_320"
)
res = generate_image(endpoint_url_fast, image_url, "transform this image into Ghibli style")
image_url = (
"https://img.alicdn.com/imgextra/i3/O1CN01m3Jkqd1UoMb4edofx_!!6000000002564-2-tps-832-1248.png?x-oss-process=image/resize,m_mfit,w_320,h_320"
)
res = generate_image(endpoint_url, image_url, "change the color of the purse to the same color of the jacket", num_inference_steps=10)
image_url = (
"https://img.alicdn.com/imgextra/i3/O1CN01m3Jkqd1UoMb4edofx_!!6000000002564-2-tps-832-1248.png?x-oss-process=image/resize,m_mfit,w_320,h_320"
)
res = generate_image(endpoint_url_fast, image_url, "change the color of the purse to the same color of the jacket", num_inference_steps=8)
image_url = (
"https://img.alicdn.com/imgextra/i3/O1CN01m3Jkqd1UoMb4edofx_!!6000000002564-2-tps-832-1248.png?x-oss-process=image/resize,m_mfit,w_320,h_320"
)
res = generate_image(endpoint_url_fast, image_url, "change the color of the hair to blonde", num_inference_steps=8)
image_url = (
"https://img.alicdn.com/imgextra/i3/O1CN01m3Jkqd1UoMb4edofx_!!6000000002564-2-tps-832-1248.png?x-oss-process=image/resize,m_mfit,w_320,h_320"
)
res = generate_image(endpoint_url, image_url, "change the color of the hair to blonde", num_inference_steps=20)
image_url = (
"https://img.alicdn.com/imgextra/i4/O1CN01hZNlck1mYwLJKmEaI_!!6000000004967-2-tps-1024-1024.png?x-oss-process=image/resize,m_mfit,w_320,h_320"
)
res = generate_image(endpoint_url_fast, image_url, "face to the right")
image_url = (
"https://img.alicdn.com/imgextra/i4/O1CN01hZNlck1mYwLJKmEaI_!!6000000004967-2-tps-1024-1024.png?x-oss-process=image/resize,m_mfit,w_320,h_320"
)
res = generate_image(endpoint_url_fast, image_url, "change the color of the door to yellow")
image_url = (
"https://img.alicdn.com/imgextra/i4/O1CN01hZNlck1mYwLJKmEaI_!!6000000004967-2-tps-1024-1024.png?x-oss-process=image/resize,m_mfit,w_320,h_320"
)
res = generate_image(endpoint_url_fast, image_url, "turn the dog around")
image_url = (
"https://img.alicdn.com/imgextra/i4/O1CN01hZNlck1mYwLJKmEaI_!!6000000004967-2-tps-1024-1024.png?x-oss-process=image/resize,m_mfit,w_320,h_320"
)
res = generate_image(endpoint_url_fast, image_url, "change the dog to sitting")
image_url = (
"https://img.alicdn.com/imgextra/i4/O1CN01GzeRec1ji9SfxtvRm_!!6000000004581-2-tps-1184-896.png?x-oss-process=image/resize,m_mfit,w_320,h_320"
)
prompt = "Restore old photograph, remove scratches, reduce noise, enhance details, high resolution, realistic, natural skin tones, clear facial features, no distortion, vintage photo restoration."
res = generate_image(endpoint_url_fast, image_url, prompt)
image_url = "https://travelbynatasha.com/wp-content/uploads/2023/09/dsc_2272.jpg"
prompt = "Add a sunset to this image"
res = generate_image(endpoint_url_fast, image_url, prompt)
image_url = "https://travelbynatasha.com/wp-content/uploads/2023/09/dsc_2272.jpg"
prompt = "Add a fishing boat to the image"
res = generate_image(endpoint_url_fast, image_url, prompt)
image_url = "https://travelbynatasha.com/wp-content/uploads/2023/09/dsc_2272.jpg"
prompt = "Add a pod of killer whales swimming out of the cove"
res = generate_image(endpoint_url_fast, image_url, prompt, num_inference_steps=20)