wangfuyun commited on
Commit
ee650ee
·
verified ·
1 Parent(s): e2b9036

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +107 -3
README.md CHANGED
@@ -1,3 +1,107 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: runwayml/stable-diffusion-v1-5
4
+ tags:
5
+ - text-to-image
6
+ - diffusion-models
7
+ - stable-diffusion
8
+ - diffusers
9
+ - image-generation
10
+ - fast-sampling
11
+ library_name: diffusers
12
+ pipeline_tag: text-to-image
13
+ ---
14
+ # Image Diffusion Preview with Consistency Solver (Google DeepMind)
15
+
16
+ [paper](https://arxiv.org/abs/2512.13592) [code](https://github.com/G-U-N/consolver) [huggingface](https://huggingface.co/papers/2512.13592) [model](https://huggingface.co/wangfuyun/consolver)
17
+
18
+ # Quick Start
19
+
20
+ ```python
21
+ Pythonimport torch
22
+ from diffusers import StableDiffusionPipeline, DDIMScheduler
23
+ from scheduler_ppo import PPOScheduler # Provided in this repo
24
+ from huggingface_hub import hf_hub_download
25
+
26
+ # Download the trained factor_net checkpoint
27
+ factor_net_path = hf_hub_download(
28
+ repo_id="wangfuyun/consolver",
29
+ filename="model.ckpt"
30
+ )
31
+
32
+ model_id = "runwayml/stable-diffusion-v1-5"
33
+ prompt = "an astronaut riding a horse on the moon, highly detailed, 8k"
34
+ num_inference_steps = 8
35
+ guidance_scale = 3.0
36
+ seed = 43
37
+ height = width = 512
38
+
39
+ def load_pipeline(scheduler_type="ddim"):
40
+ if scheduler_type == "ppo":
41
+ scheduler = PPOScheduler(
42
+ beta_end=0.012,
43
+ beta_schedule="scaled_linear",
44
+ beta_start=0.00085,
45
+ num_train_timesteps=1000,
46
+ steps_offset=1,
47
+ timestep_spacing="trailing",
48
+ order_dim=4,
49
+ scaler_dim=0,
50
+ use_conv=False,
51
+ factor_net_kwargs=dict(embedding_dim=64, hidden_dim=256, num_actions=11),
52
+ )
53
+ else:
54
+ scheduler = DDIMScheduler.from_pretrained(model_id, subfolder="scheduler", timestep_spacing="trailing")
55
+
56
+ pipe = StableDiffusionPipeline.from_pretrained(
57
+ model_id,
58
+ scheduler=scheduler,
59
+ safety_checker=None,
60
+ # torch_dtype=torch.float16, # Uncomment for GPU memory savings
61
+ ).to("cuda")
62
+
63
+ if scheduler_type == "ppo" and factor_net_path:
64
+ weight = torch.load(factor_net_path, map_location="cpu")
65
+ pipe.scheduler.factor_net.load_state_dict(weight)
66
+ pipe.scheduler.factor_net.to("cuda")
67
+
68
+ return pipe
69
+
70
+ generator = torch.Generator("cuda").manual_seed(seed)
71
+
72
+ # DDIM baseline (8 steps)
73
+ pipe_ddim = load_pipeline("ddim")
74
+ image_ddim = pipe_ddim(prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale,
75
+ generator=generator, height=height, width=width).images[0]
76
+ image_ddim.save("ddim_result.jpg")
77
+
78
+ # ConSolver (8 steps)
79
+ pipe_consolver = load_pipeline("ppo")
80
+ image_consolver = pipe_consolver(prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale,
81
+ generator=generator, height=height, width=width).images[0]
82
+ image_consolver.save("consolver_result.jpg")
83
+ ```
84
+
85
+
86
+
87
+
88
+ <div align="center">
89
+ <table>
90
+ <tr>
91
+ <td align="center">
92
+ <img src="https://github.com/user-attachments/assets/35f5f99a-ca5f-4919-82cf-04a67a2dbe13" alt="DDIM" width="80%" />
93
+ </td>
94
+ <td align="center">
95
+ <img src="https://github.com/user-attachments/assets/6428a663-b488-4ecc-b79c-4fcb431d5630" alt="Consistency Solver" width="80%" />
96
+ </td>
97
+ </tr>
98
+ <tr>
99
+ <td align="center">
100
+ <em>DDIM</em>
101
+ </td>
102
+ <td align="center">
103
+ <em>ConsistencySolver</em>
104
+ </td>
105
+ </tr>
106
+ </table>
107
+ </div>