Spaces:

jblalock30
/

quantizing-diffusion-models

Running

App Files Files Community

justinblalock87 commited on Apr 25, 2024

Commit

08328de

1 Parent(s): dbde0cb

Final

Browse files

Files changed (5) hide show

.DS_Store +0 -0
__pycache__/app.cpython-38.pyc +0 -0
__pycache__/quantize.cpython-38.pyc +0 -0
app.py +8 -25
quantize.py +11 -69

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

__pycache__/app.cpython-38.pyc DELETED Viewed

Binary file (2.21 kB)

__pycache__/quantize.cpython-38.pyc DELETED Viewed

Binary file (3.64 kB)

app.py CHANGED Viewed

@@ -1,25 +1,14 @@
-import os
 from typing import Optional
 import gradio as gr
 import quantize
 from huggingface_hub import HfApi, login
-HF_TOKEN = os.environ.get("HF_TOKEN")
-def run(model_id: str, model_version: str, additional_args: str, is_private: bool, token: Optional[str] = None) -> str:
     if model_id == "":
-        return "Please fill a token and model_id."
     login(token=token)
-    if is_private:
-        api = HfApi(token=token)
-    else:
-        api = HfApi(token=HF_TOKEN)
-    hf_is_private = api.model_info(repo_id=model_id).private
-    if is_private and not hf_is_private:
-        api = HfApi(token=HF_TOKEN)
-    print("is_private", is_private)
     quantize.quantize(api=api, model_id=model_id, model_version=model_version, additional_args=additional_args)
@@ -29,10 +18,6 @@ Simple utility tool to quantize diffusion models and convert them to CoreML.
 """
 title="Quantize model and convert to CoreML"
-allow_flagging="never"
-def token_text(visible=False):
-    return gr.Text(max_lines=1, label="your_hf_token", visible=True)
 with gr.Blocks(title=title) as demo:
     description = gr.Markdown(f"""# {title}""")
@@ -40,11 +25,10 @@ with gr.Blocks(title=title) as demo:
     with gr.Row() as r:
         with gr.Column() as c:
-            model_id = gr.Text(max_lines=1, label="model_id", value="jblalock30/coreml")
-            model_version = gr.Text(max_lines=1, label="model_version", value="stabilityai/sd-turbo")
-            additional_args = gr.Text(max_lines=1, label="additional_args", value="--quantize-nbits 2 --convert-unet --convert-text-encoder --convert-vae-decoder --chunk-unet --attention-implementation ORIGINAL")
-            is_private = gr.Checkbox(label="Private model")
-            token = token_text()
             with gr.Row() as c:
                 clean = gr.ClearButton()
                 submit = gr.Button("Submit", variant="primary")
@@ -52,7 +36,6 @@ with gr.Blocks(title=title) as demo:
         with gr.Column() as d:
             output = gr.Markdown()
-    is_private.change(lambda s: token_text(s), inputs=is_private, outputs=token)
-    submit.click(run, inputs=[model_id, model_version, additional_args, is_private, token], outputs=output, concurrency_limit=1)
 demo.queue(max_size=10).launch(show_api=True)

 from typing import Optional
 import gradio as gr
 import quantize
 from huggingface_hub import HfApi, login
+def run(model_id: str, model_version: str, additional_args: str, token: Optional[str] = None) -> str:
     if model_id == "":
+        return "Please enter model_id."
     login(token=token)
+    api = HfApi(token=token)
     quantize.quantize(api=api, model_id=model_id, model_version=model_version, additional_args=additional_args)
 """
 title="Quantize model and convert to CoreML"
 with gr.Blocks(title=title) as demo:
     description = gr.Markdown(f"""# {title}""")
     with gr.Row() as r:
         with gr.Column() as c:
+            model_id = gr.Text(max_lines=1, label="ID of output repo")
+            model_version = gr.Text(max_lines=1, label="Version of model to convert", value="stabilityai/sd-turbo")
+            additional_args = gr.Text(max_lines=1, label="Additional Args (optional)")
+            token = gr.Text(max_lines=1, label="Your HuggingFace write token")
             with gr.Row() as c:
                 clean = gr.ClearButton()
                 submit = gr.Button("Submit", variant="primary")
         with gr.Column() as d:
             output = gr.Markdown()
+    submit.click(run, inputs=[model_id, model_version, additional_args, token], outputs=output, concurrency_limit=1)
 demo.queue(max_size=10).launch(show_api=True)

quantize.py CHANGED Viewed

@@ -1,28 +1,26 @@
-import argparse
-import json
 import os
 import shutil
-from collections import defaultdict
 from tempfile import TemporaryDirectory
-from typing import Dict, List, Optional, Set, Tuple
 import subprocess
-import torch
-from huggingface_hub import CommitInfo, CommitOperationAdd, Discussion, HfApi, hf_hub_download
 from huggingface_hub.file_download import repo_folder_name
-from safetensors.torch import _find_shared_tensors, _is_complete, load_file, save_file
 ConversionResult = Tuple[List["CommitOperationAdd"], List[Tuple[str, "Exception"]]]
-def convert_generic(
     model_id: str, folder: str, token: Optional[str], model_version: str, additional_args: str
 ) -> ConversionResult:
     command = ["python3", "-m" , "python_coreml_stable_diffusion.torch2coreml", "--model-version", model_version, "-o", folder]
     command.extend(additional_args.split(" "))
-    print("Starting conversion")
     subprocess.run(command)
@@ -44,62 +42,6 @@ def quantize(
         folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
         os.makedirs(folder)
         try:
-            convert_generic(model_id, folder, token=api.token, model_version=model_version, additional_args=additional_args)
         finally:
-            shutil.rmtree(folder)
-if __name__ == "__main__":
-    DESCRIPTION = """
-    Simple utility tool to convert automatically some weights on the hub to `safetensors` format.
-    It is PyTorch exclusive for now.
-    It works by downloading the weights (PT), converting them locally, and uploading them back
-    as a PR on the hub.
-    """
-    parser = argparse.ArgumentParser(description=DESCRIPTION)
-    parser.add_argument(
-        "model_id",
-        type=str,
-        help="The name of the model on the hub to convert. E.g. `gpt2` or `facebook/wav2vec2-base-960h`",
-    )
-    parser.add_argument(
-        "--revision",
-        type=str,
-        help="The revision to convert",
-    )
-    parser.add_argument(
-        "--force",
-        action="store_true",
-        help="Create the PR even if it already exists of if the model was already converted.",
-    )
-    parser.add_argument(
-        "-y",
-        action="store_true",
-        help="Ignore safety prompt",
-    )
-    args = parser.parse_args()
-    model_id = args.model_id
-    api = HfApi()
-    if args.y:
-        txt = "y"
-    else:
-        txt = input(
-            "This conversion script will unpickle a pickled file, which is inherently unsafe. If you do not trust this file, we invite you to use"
-            " https://huggingface.co/spaces/safetensors/convert or google colab or other hosted solution to avoid potential issues with this file."
-            " Continue [Y/n] ?"
-        )
-    if txt.lower() in {"", "y"}:
-        commit_info, errors = convert(api, model_id, revision=args.revision, force=args.force)
-        string = f"""
-### Success 🔥
-Yay! This model was successfully converted and a PR was open using your token, here:
-[{commit_info.pr_url}]({commit_info.pr_url})
-        """
-        if errors:
-            string += "\nErrors during conversion:\n"
-            string += "\n".join(
-                f"Error while converting {filename}: {e}, skipped conversion" for filename, e in errors
-            )
-        print(string)
-    else:
-        print(f"Answer was `{txt}` aborting.")

 import os
 import shutil
 from tempfile import TemporaryDirectory
+from typing import List, Optional, Tuple
 import subprocess
+from huggingface_hub import CommitOperationAdd, HfApi
 from huggingface_hub.file_download import repo_folder_name
 ConversionResult = Tuple[List["CommitOperationAdd"], List[Tuple[str, "Exception"]]]
+def convert_to_core_ml(
     model_id: str, folder: str, token: Optional[str], model_version: str, additional_args: str
 ) -> ConversionResult:
     command = ["python3", "-m" , "python_coreml_stable_diffusion.torch2coreml", "--model-version", model_version, "-o", folder]
+    additional_args = additional_args
+    if additional_args == "":
+        # Set default args
+        additional_args = f"--convert-unet --convert-text-encoder --convert-vae-decoder --attention-implementation SPLIT_EINSUM --quantize-nbits 6"
     command.extend(additional_args.split(" "))
+    print("Starting conversion: ", command)
     subprocess.run(command)
         folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
         os.makedirs(folder)
         try:
+            convert_to_core_ml(model_id, folder, token=api.token, model_version=model_version, additional_args=additional_args)
         finally:
+            shutil.rmtree(folder)