import torch from diffsynth.pipelines.hidream_o1_image import HiDreamO1ImagePipeline from diffsynth.core.loader.config import ModelConfig from diffsynth.diffusion import HiDreamO1FlashScheduler from PIL import Image import os from modelscope import dataset_snapshot_download pipe = HiDreamO1ImagePipeline.from_pretrained( torch_dtype=torch.bfloat16, device="cuda", model_configs=[ ModelConfig(model_id="HiDream-ai/HiDream-O1-Image-Dev", origin_file_pattern="model-*.safetensors"), ], processor_config=ModelConfig(model_id="HiDream-ai/HiDream-O1-Image-Dev", origin_file_pattern="./"), ) pipe.scheduler = HiDreamO1FlashScheduler(noise_scale_start=7.5, noise_scale_end=7.5, noise_clip_std=2.5) # Text-to-Image image = pipe( prompt="medium shot, eye-level, front view. A woman is seated in an ornate bedroom, illuminated by candlelight, with a calm and composed expression. The subject is a young woman with fair skin, light brown hair styled in an updo with loose tendrils framing her face, and blue eyes. She wears a cream-colored satin robe with delicate floral embroidery and lace trim along the neckline. Her ears are adorned with pearl drop earrings. She is seated on a bed with a dark, intricately carved wooden headboard. To her left, a wooden nightstand holds three lit white candles and a candelabra with multiple lit candles in the background. The bed is covered with patterned pillows and a dark, textured blanket. The walls are paneled with dark wood and feature a large, ornate tapestry with muted earth tones. The lighting creates soft highlights on her face and robe, with warm shadows cast across the room.", cfg_scale=1.0, height=2048, width=2048, seed=42, num_inference_steps=28, model_type="dev", noise_scale=7.5, ) image.save("image.jpg") # Image-to-Image image = pipe( prompt="change her clothes to blue", cfg_scale=1.0, height=2048, width=2048, seed=43, num_inference_steps=28, model_type="dev", noise_scale=7.5, edit_image=[image], ) image.save("image_edit.jpg") dataset_snapshot_download( dataset_id="DiffSynth-Studio/diffsynth_example_dataset", local_dir="data/diffsynth_example_dataset", allow_file_pattern="hidream_o1_image/HiDream-O1-Image-Dev/*", ) # Multi-Reference Subject-Driven Personalization ref_image_dir = "./data/diffsynth_example_dataset/hidream_o1_image/HiDream-O1-Image-Dev/IP/" ref_images = [Image.open(os.path.join(ref_image_dir, f)).convert("RGB") for f in os.listdir(ref_image_dir) if f.endswith(".jpg")] prompt = "A young boy with blonde hair stands on steps wearing light blue jeans, a white t-shirt with logo, and blue and white sneakers. He wears a brown cord necklace with beads, a black wristwatch with digital display, and carries a yellow fanny pack with white zipper. In his hand is a red boxing glove with white top, a teal plastic toy car, and a plastic toy figure of Captain America. He wears a straw hat with cream band. Natural light illuminates the scene." image = pipe( prompt=prompt, cfg_scale=1.0, height=2048, width=2048, seed=43, num_inference_steps=28, model_type="dev", noise_scale=7.5, edit_image=ref_images, ) image.save("ip.jpg") # Multi-Reference Subject-Driven Personalization with Skeleton ref_image_dir = "./data/diffsynth_example_dataset/hidream_o1_image/HiDream-O1-Image-Dev/IP_skeleton/" ref_images = [Image.open(os.path.join(ref_image_dir, f)).convert("RGB") for f in os.listdir(ref_image_dir) if f.endswith(".jpg")] prompt = "Create a realistic try-on image of the person wearing the provided clothing." image = pipe( prompt=prompt, cfg_scale=1.0, height=2048, width=2048, seed=43, num_inference_steps=28, model_type="dev", noise_scale=7.5, edit_image=ref_images, ) image.save("ip_skeleton.jpg") # Multi-Reference Subject-Driven Personalization with Layout:relative coordinates [x1, x2, y1, y2] layout_bboxes = [[0.21, 0.44, 0.49, 0.74], [0.58, 0.80, 0.09, 0.34]] ref_image_dir = "./data/diffsynth_example_dataset/hidream_o1_image/HiDream-O1-Image-Dev/IP_layout/" ref_images = [Image.open(os.path.join(ref_image_dir, f)).convert("RGB") for f in os.listdir(ref_image_dir) if f.endswith(".jpg")] prompt = "City council members pose with relaxed smiles on a sunlit terrace, warm approachable mood, golden hour, cinematic soft glow." image = pipe( prompt=prompt, cfg_scale=1.0, height=2048, width=2048, seed=43, num_inference_steps=28, model_type="dev", noise_scale=7.5, edit_image=ref_images, layout_bboxes=layout_bboxes, ) image.save("ip_layout.jpg")