Spaces:

nycu-cplab
/

Genfocus-Demo

Running on Zero

App Files Files Community

Ray commited on 19 days ago

Commit

4178132

1 Parent(s): 77d6865

Update app and add models via LFS

Browse files

Files changed (7) hide show

{omini → Genfocus}/__init__.py +0 -0
omini/pipeline/flux_omini.py → Genfocus/pipeline/flux.py +12 -7
app.py +261 -91
bokehNet.safetensors +3 -0
default.safetensors → deblurNet.safetensors +0 -0
example/female.jpg +3 -0
requirements.txt +3 -1

{omini → Genfocus}/__init__.py RENAMED Viewed

File without changes

omini/pipeline/flux_omini.py → Genfocus/pipeline/flux.py RENAMED Viewed

@@ -35,12 +35,12 @@ def clip_hidden_states(hidden_states: torch.FloatTensor) -> torch.FloatTensor:
         hidden_states = hidden_states.clip(-65504, 65504)
     return hidden_states
-def encode_images(pipeline: FluxPipeline, images: torch.Tensor):
     """
     Encodes the images into tokens and ids for FLUX pipeline.
     """
-    images = pipeline.image_processor.preprocess(images)
     images = images.to(pipeline.device).to(pipeline.dtype)
     images = pipeline.vae.encode(images).latent_dist.sample()
     images = (
@@ -105,6 +105,7 @@ class Condition(object):
         position_scale=1.0,
         latent_mask=None,
         is_complement=False,
     ) -> None:
         self.condition = condition
         self.adapter = adapter_setting
@@ -114,12 +115,17 @@ class Condition(object):
             latent_mask.T.reshape(-1) if latent_mask is not None else None
         )
         self.is_complement = is_complement
     def encode(
         self, pipe: FluxPipeline, empty: bool = False
     ) -> Tuple[torch.Tensor, torch.Tensor, int]:
-        condition_empty = Image.new("RGB", self.condition.size, (0, 0, 0))
-        tokens, ids = encode_images(pipe, condition_empty if empty else self.condition)
         if self.position_delta is not None:
             ids[:, 1] += self.position_delta[0]
@@ -136,7 +142,6 @@ class Condition(object):
         return tokens, ids
 @contextmanager
 def specify_lora(lora_modules: List[BaseTunerLayer], specified_lora):
     # Filter valid lora modules
@@ -259,7 +264,6 @@ def attn_forward(
             with specify_lora((attn.to_out[0],), adapters[i + h2_n]):
                 h = attn.to_out[0](h)
         h_out.append(h)
     return (h_out, h2_out) if h2_n else h_out
@@ -450,6 +454,7 @@ def transformer_forward(
     return (output,)
 @torch.no_grad()
 def generate(
     pipeline: FluxPipeline,

         hidden_states = hidden_states.clip(-65504, 65504)
     return hidden_states
+def encode_images(pipeline: FluxPipeline, images: torch.Tensor,No_preprocess=False):
     """
     Encodes the images into tokens and ids for FLUX pipeline.
     """
+    if not No_preprocess:
+        images = pipeline.image_processor.preprocess(images)
     images = images.to(pipeline.device).to(pipeline.dtype)
     images = pipeline.vae.encode(images).latent_dist.sample()
     images = (
         position_scale=1.0,
         latent_mask=None,
         is_complement=False,
+        No_preprocess=False,
     ) -> None:
         self.condition = condition
         self.adapter = adapter_setting
             latent_mask.T.reshape(-1) if latent_mask is not None else None
         )
         self.is_complement = is_complement
+        self.No_preprocess=No_preprocess
     def encode(
         self, pipe: FluxPipeline, empty: bool = False
     ) -> Tuple[torch.Tensor, torch.Tensor, int]:
+        if isinstance(self.condition, Image.Image):
+            condition_empty = Image.new("RGB", self.condition.size, (0, 0, 0))
+        elif torch.is_tensor(self.condition):
+            H, W = self.condition.shape[-2], self.condition.shape[-1]
+            condition_empty = Image.fromarray(np.zeros((H, W, 3), dtype=np.uint8), "RGB")
+        tokens, ids = encode_images(pipe, condition_empty if empty else self.condition,self.No_preprocess)
         if self.position_delta is not None:
             ids[:, 1] += self.position_delta[0]
         return tokens, ids
 @contextmanager
 def specify_lora(lora_modules: List[BaseTunerLayer], specified_lora):
     # Filter valid lora modules
             with specify_lora((attn.to_out[0],), adapters[i + h2_n]):
                 h = attn.to_out[0](h)
         h_out.append(h)
     return (h_out, h2_out) if h2_n else h_out
     return (output,)
 @torch.no_grad()
 def generate(
     pipeline: FluxPipeline,

app.py CHANGED Viewed

@@ -1,139 +1,309 @@
-import spaces  # <--- Must be the very first line
 import gradio as gr
 import torch
 import numpy as np
-from PIL import Image
-from diffusers import FluxPipeline
-import os
-# === Omini Modules ===
-from omini.pipeline.flux_omini import Condition, generate, seed_everything
-# === Settings ===
 MODEL_ID = "black-forest-labs/FLUX.1-dev"
-LORA_WEIGHT_NAME = "default.safetensors"
-# 1. Global Load
-print("🔄 Loading FLUX pipeline to CPU RAM...")
-dtype = torch.bfloat16
-pipe_flux = FluxPipeline.from_pretrained(MODEL_ID, torch_dtype=dtype)
-# Load LoRA
-try:
-    print("🔄 Loading LoRA weights...")
-    pipe_flux.load_lora_weights(".", weight_name=LORA_WEIGHT_NAME, adapter_name="deblurring")
-    pipe_flux.set_adapters(["deblurring"])
-except Exception as e:
-    print(f"⚠️ LoRA Error: {e}")
-print("✅ Model loaded to CPU. Ready.")
-# === Helper Function: Center Crop Only (No Resize) ===
 def center_crop_512(img: Image.Image) -> Image.Image:
     w, h = img.size
     target = 512
-    # Only resize if the image is smaller than the target size
     if min(w, h) < target:
         scale = target / min(w, h)
         new_w, new_h = int(w * scale), int(h * scale)
         img = img.resize((new_w, new_h), Image.LANCZOS)
         w, h = new_w, new_h
-    # Calculate center coordinates
     left = (w - target) // 2
     top = (h - target) // 2
     right = left + target
     bottom = top + target
     return img.crop((left, top, right, bottom))
-# 2. GPU Function
-@spaces.GPU(duration=120)
-def process_image(input_image):
-    global pipe_flux
-    if input_image is None:
-        return None
-    print("🚀 Moving model to GPU...")
-    pipe = pipe_flux.to("cuda")
-    print("🖼️ Processing Image...")
-    condition_1_img = center_crop_512(input_image)
-    # Create a black image for condition 0
     condition_0_img = Image.new("RGB", (512, 512), (0, 0, 0))
-    adapter = "deblurring"
-    prompt = "a sharp photo with everything in focus"
-    cond0 = Condition(condition_0_img, adapter, [0, 32], 1.0)
-    cond1 = Condition(condition_1_img, adapter, [0, 0], 1.0)
-    conditions = [cond0, cond1]
-    seed_everything(42)
-    result = generate(
-        pipe,
-        height=512,
-        width=512,
-        prompt=prompt,
-        conditions=conditions
-    ).images[0]
-    return result
-# === UI Setup ===
 css = """
-#col-container { margin: 0 auto; max-width: 900px; }
 """
-# Define example image paths (Absolute paths)
-example_paths = [
-    ["example/get-out.jpg"],
-    ["example/group_5.jpg"],
-    ["example/historic-photos.jpg"],
-    ["example/wweii_nurse.jpg"],
-    ["example/albert-einstein-and-charlie-chaplin.jpg"]
-]
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
-        # Title
-        gr.Markdown("# 📷 Genfocus DeblurNet Demo")
-        # Description
-        gr.Markdown("""
-        ### Description
-        This demo showcases the functionality of our **first-stage defocus deblurring**.
-        ⚠️ **Note**: For demonstration purposes, input images will be automatically **Center Cropped to 512x512**.
-        """)
         with gr.Row():
-            with gr.Column():
-                input_img = gr.Image(label="Upload Blurred Image", type="pil")
-                run_btn = gr.Button("Run Refocusing", variant="primary")
-            with gr.Column():
-                output_img = gr.Image(label="Refocused Result")
-        # Examples Block
-        gr.Examples(
-            examples=example_paths,
-            inputs=[input_img],
-            label="Example Images (Click to load)",
-            # cache_examples=False avoids running inference on startup
-            cache_examples=False
         )
-    # Bind button event
-    run_btn.click(
-        fn=process_image,
-        inputs=[input_img],
-        outputs=[output_img]
-    )
 if __name__ == "__main__":
     demo.launch()

+import os
+import cv2
 import gradio as gr
 import torch
 import numpy as np
+import tempfile
+from PIL import Image, ImageDraw
+from skimage import color, img_as_float32, img_as_ubyte
+# Hugging Face Spaces 特有的 GPU 裝飾器
+import spaces
+from huggingface_hub import login
+# === Import Logic ===
+# 確保 Genfocus 與 depth_pro 資料夾已上傳至 Space 根目錄
+try:
+    from Genfocus.pipeline.flux import Condition, generate, seed_everything
+    print("✅ Loaded Condition/generate from Genfocus.pipeline.flux")
+except ImportError:
+    raise RuntimeError("❌ Cannot find 'Genfocus'. Please upload the folder to the Space.")
+import depth_pro
+# ==========================================
+# 2. Global Settings
+# ==========================================
 MODEL_ID = "black-forest-labs/FLUX.1-dev"
+# 假設 .safetensors 檔案位於根目錄
+DEBLUR_LORA_PATH = "."
+DEBLUR_WEIGHT_NAME = "deblurNet.safetensors"
+BOKEH_LORA_DIR = "."
+BOKEH_WEIGHT_NAME = "bokehNet.safetensors"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
+print(f"🚀 Device detected: {device}")
+# 全域變數初始化 (延遲加載以節省啟動時間)
+pipe_flux = None
+depth_model = None
+depth_transform = None
+current_adapter = None
+def load_models():
+    """在第一次執行時加載模型"""
+    global pipe_flux, depth_model, depth_transform
+    if pipe_flux is None:
+        print("🔄 Loading FLUX pipeline...")
+        # 注意：FLUX.1-dev 需要 HF Token 權限
+        pipe_flux = FluxPipeline.from_pretrained(MODEL_ID, torch_dtype=dtype)
+        if device == "cuda":
+            pipe_flux.to("cuda")
+    if depth_model is None:
+        print("🔄 Loading Depth Pro model...")
+        try:
+            checkpoint_path = hf_hub_download(
+                repo_id=WEIGHTS_REPO_ID,
+                filename=DEPTH_FILENAME,
+                repo_type="model"
+            )
+            print(f"📂 Depth checkpoint cached at: {checkpoint_path}")
+            depth_model, depth_transform = depth_pro.create_model_and_transforms(
+                device=device,
+                checkpoint_path=checkpoint_path
+            )
+            if device == "cuda":
+                depth_model.eval().to("cuda")
+            else:
+                depth_model.eval()
+            print("✅ Depth Pro loaded.")
+        except Exception as e:
+            print(f"❌ Failed to load Depth Pro: {e}")
+# ==========================================
+# 3. Helper Functions
+# ==========================================
 def center_crop_512(img: Image.Image) -> Image.Image:
     w, h = img.size
     target = 512
     if min(w, h) < target:
         scale = target / min(w, h)
         new_w, new_h = int(w * scale), int(h * scale)
         img = img.resize((new_w, new_h), Image.LANCZOS)
         w, h = new_w, new_h
     left = (w - target) // 2
     top = (h - target) // 2
     right = left + target
     bottom = top + target
     return img.crop((left, top, right, bottom))
+def switch_lora(target_mode):
+    global pipe_flux, current_adapter
+    if current_adapter == target_mode:
+        return
+    print(f"🔄 Switching LoRA to [{target_mode}]...")
+    pipe_flux.unload_lora_weights()
+    if target_mode == "deblur":
+        try:
+            pipe_flux.load_lora_weights(DEBLUR_LORA_PATH, weight_name=DEBLUR_WEIGHT_NAME, adapter_name="deblurring")
+            pipe_flux.set_adapters(["deblurring"])
+            current_adapter = "deblur"
+        except Exception as e:
+            print(f"❌ Failed to load Deblur LoRA: {e}")
+    elif target_mode == "bokeh":
+        try:
+            pipe_flux.load_lora_weights(BOKEH_LORA_DIR, weight_name=BOKEH_WEIGHT_NAME, adapter_name="bokeh")
+            pipe_flux.set_adapters(["bokeh"])
+            current_adapter = "bokeh"
+        except Exception as e:
+            print(f"❌ Failed to load Bokeh LoRA: {e}")
+# ==========================================
+# 4. Processing Logic
+# ==========================================
+def preprocess_input_image(raw_img, do_resize):
+    if raw_img is None: return None, None, None
+    print(f"🔄 Preprocessing Input... Resize={do_resize}")
+    img_to_process = raw_img
+    if do_resize:
+        w, h = img_to_process.size
+        scale = 512 / min(w, h)
+        new_w, new_h = int(w * scale), int(h * scale)
+        img_to_process = img_to_process.resize((new_w, new_h), Image.LANCZOS)
+    final_input = center_crop_512(img_to_process)
+    return final_input, final_input, None
+def draw_red_dot_on_preview(clean_img, evt: gr.SelectData):
+    if clean_img is None: return None, None
+    img_copy = clean_img.copy()
+    draw = ImageDraw.Draw(img_copy)
+    x, y = evt.index
+    r = 8
+    draw.ellipse((x-r, y-r, x+r, y+r), outline="red", width=2)
+    draw.line((x-r, y, x+r, y), fill="red", width=2)
+    draw.line((x, y-r, x, y+r), fill="red", width=2)
+    return img_copy, evt.index
+# !!! 關鍵修改：加上 @spaces.GPU 裝飾器 !!!
+# 這告訴 HF Spaces 當這個函式執行時，將其分配到 GPU 上
+@spaces.GPU(duration=120)
+def run_genfocus_pipeline(clean_input_512, click_coords, K_value, cached_latents):
+    global pipe_flux, depth_model
+    # 確保模型已加載
+    load_models()
+    if clean_input_512 is None:
+        raise gr.Error("Please complete Step 1 (Upload Image) first.")
+    print("🚀 Starting Genfocus Pipeline...")
+    # 1. Run Deblur (Stage 1)
+    switch_lora("deblur")
     condition_0_img = Image.new("RGB", (512, 512), (0, 0, 0))
+    cond0 = Condition(condition_0_img, "deblurring", [0, 32], 1.0)
+    cond1 = Condition(clean_input_512, "deblurring", [0, 0], 1.0)
+    seed_everything(42)
+    deblurred_img = generate(
+        pipe_flux, height=512, width=512,
+        prompt="a sharp photo with everything in focus",
+        conditions=[cond0, cond1]
+    ).images[0]
+    if K_value == 0:
+        return deblurred_img, cached_latents
+    # 2. Run Bokeh (Stage 2)
+    if click_coords is None:
+        click_coords = [256, 256]
+    # Depth Estimation
+    try:
+        img_t = depth_transform(deblurred_img)
+        if device == "cuda": img_t = img_t.to("cuda")
+        with torch.no_grad():
+            pred = depth_model.infer(img_t, f_px=None)
+        depth_map = pred["depth"].cpu().numpy().squeeze()
+        safe_depth = np.where(depth_map > 0.0, depth_map, np.finfo(np.float32).max)
+        disp_orig = 1.0 / safe_depth
+        disp = cv2.resize(disp_orig, (512, 512), interpolation=cv2.INTER_LINEAR)
+    except Exception as e:
+        print(f"❌ Depth Error: {e}")
+        return deblurred_img, cached_latents
+    # Defocus Map
+    tx, ty = click_coords
+    tx = min(max(int(tx), 0), 511)
+    ty = min(max(int(ty), 0), 511)
+    disp_focus = float(disp[ty, tx])
+    dmf = disp - np.float32(disp_focus)
+    defocus_abs = np.abs(K_value * dmf)
+    MAX_COC = 100.0
+    defocus_t = torch.from_numpy(defocus_abs).unsqueeze(0).float()
+    cond_map = (defocus_t / MAX_COC).clamp(0, 1).repeat(3,1,1).unsqueeze(0)
+    # Latents
+    if cached_latents is None:
+        seed_everything(42)
+        gen = torch.Generator(device=pipe_flux.device).manual_seed(1234)
+        latents, _ = pipe_flux.prepare_latents(
+            batch_size=1, num_channels_latents=16, height=512, width=512,
+            dtype=pipe_flux.dtype, device=pipe_flux.device, generator=gen, latents=None
+        )
+        current_latents = latents
+    else:
+        current_latents = cached_latents
+    # Generate
+    switch_lora("bokeh")
+    cond_img = Condition(deblurred_img, "bokeh")
+    cond_dmf = Condition(cond_map, "bokeh", [0,0], 1.0, No_preprocess=True)
+    seed_everything(42)
+    gen = torch.Generator(device=pipe_flux.device).manual_seed(1234)
+    with torch.no_grad():
+        res = generate(
+            pipe_flux, height=512, width=512,
+            prompt="an excellent photo with a large aperture",
+            conditions=[cond_img, cond_dmf],
+            guidance_scale=1.0, kv_cache=False, generator=gen,
+            latents=current_latents,
+        )
+    generated_bokeh = res.images[0]
+    return generated_bokeh, current_latents
+# ==========================================
+# 5. UI Setup
+# ==========================================
 css = """
+#col-container { margin: 0 auto; max-width: 1400px; }
 """
+base_path = os.getcwd()
+# 簡化 Example 路徑檢查
+example_dir = os.path.join(base_path, "example")
+valid_examples = []
+if os.path.exists(example_dir):
+    files = os.listdir(example_dir)
+    for f in files:
+        if f.lower().endswith(('.jpg', '.jpeg', '.png')):
+            valid_examples.append([os.path.join(example_dir, f)])
 with gr.Blocks(css=css) as demo:
+    clean_processed_state = gr.State(value=None)
+    click_coords_state = gr.State(value=None)
+    latents_state = gr.State(value=None)
     with gr.Column(elem_id="col-container"):
+        gr.Markdown("# 📷 Genfocus Pipeline: Interactive Refocusing (HF Demo)")
         with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown("### Step 1: Upload & Preprocess")
+                input_raw = gr.Image(label="Raw Input Image", type="pil")
+                resize_chk = gr.Checkbox(label="Resize min edge to 512", value=False)
+                if valid_examples:
+                    gr.Examples(examples=valid_examples, inputs=input_raw, label="Examples")
+            with gr.Column(scale=1):
+                gr.Markdown("### Step 2: Set Focus & K")
+                focus_preview_img = gr.Image(label="Focus Point Selection", type="pil", interactive=False)
+                with gr.Row():
+                    click_status = gr.Textbox(label="Coords", value="Center", interactive=False, scale=1)
+                    k_slider = gr.Slider(0, 50, value=0, step=1, label="Blur Strength (K)", scale=2)
+                run_btn = gr.Button("✨ Run Genfocus", variant="primary", scale=1)
+        with gr.Row():
+            output_img = gr.Image(label="Result", type="pil", interactive=False)
+        # Events
+        update_trigger = [input_raw.change, resize_chk.change, input_raw.upload]
+        for trigger in update_trigger:
+            trigger(
+                fn=preprocess_input_image,
+                inputs=[input_raw, resize_chk],
+                outputs=[focus_preview_img, clean_processed_state, latents_state]
+            )
+        focus_preview_img.select(
+            fn=draw_red_dot_on_preview,
+            inputs=[clean_processed_state],
+            outputs=[focus_preview_img, click_coords_state]
+        ).then(
+            fn=lambda x: f"x={x[0]}, y={x[1]}",
+            inputs=[click_coords_state],
+            outputs=[click_status]
         )
+        run_btn.click(
+            fn=run_genfocus_pipeline,
+            inputs=[clean_processed_state, click_coords_state, k_slider, latents_state],
+            outputs=[output_img, latents_state]
+        )
 if __name__ == "__main__":
+    # HF Spaces 不需要指定 server_name 或 allowed_paths
     demo.launch()

bokehNet.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2e87c32b368b1af66e2aa5bc8f58c53ebba75b1afae85e8279cb5f9c5c608d13
+size 463703368

default.safetensors → deblurNet.safetensors RENAMED Viewed

File without changes

example/female.jpg ADDED Viewed

Git LFS Details

SHA256: 1615b2c29cbde70d9d48b6bb62d64a8090fa8ea015fbd47f90bdb059271c2c47
Pointer size: 131 Bytes
Size of remote file: 166 kB

requirements.txt CHANGED Viewed

@@ -6,4 +6,6 @@ protobuf
 sentencepiece
 gradio
 jupyter
-torchao

 sentencepiece
 gradio
 jupyter
+torchao
+git+https://github.com/apple/ml-depth-pro.git
+scikit-image