| | <!DOCTYPE html> |
| | <html> |
| | <head> |
| | <meta charset="UTF-8" /> |
| | <meta name="viewport" content="width=device-width, initial-scale=1.0" /> |
| | <style> |
| | @import url("https://fonts.googleapis.com/css2?family=Source+Code+Pro:wght@200;300;400&family=Source+Sans+3:wght@100;200;300;400;500;600;700;800;900&display=swap"); |
| | html, |
| | body { |
| | font-family: "Source Sans 3", sans-serif; |
| | } |
| | </style> |
| | <title>Candle Blip Image Captioning Demo</title> |
| | <script src="https://cdn.tailwindcss.com/3.4.3"></script> |
| | <script type="module" src="./code.js"></script> |
| | <script type="module"> |
| | const MODELS = { |
| | blip_image_quantized_q4k: { |
| | base_url: "https://huggingface.co/lmz/candle-blip/resolve/main/", |
| | model: "blip-image-captioning-large-q4k.gguf", |
| | config: "config.json", |
| | tokenizer: "tokenizer.json", |
| | quantized: true, |
| | size: "271 MB", |
| | }, |
| | blip_image_quantized_q80: { |
| | base_url: "https://huggingface.co/lmz/candle-blip/resolve/main/", |
| | model: "blip-image-captioning-large-q80.gguf", |
| | config: "config.json", |
| | tokenizer: "tokenizer.json", |
| | quantized: true, |
| | size: "505 MB", |
| | }, |
| | blip_image_large: { |
| | base_url: |
| | "https://huggingface.co/Salesforce/blip-image-captioning-large/resolve/refs%2Fpr%2F18/", |
| | model: "model.safetensors", |
| | config: "config.json", |
| | tokenizer: "tokenizer.json", |
| | quantized: false, |
| | size: "1.88 GB", |
| | }, |
| | }; |
| | |
| | const blipWorker = new Worker("./blipWorker.js", { |
| | type: "module", |
| | }); |
| | |
| | const outputStatusEl = document.querySelector("#output-status"); |
| | const outputCaptionEl = document.querySelector("#output-caption"); |
| | const modelSelectEl = document.querySelector("#model"); |
| | const clearBtn = document.querySelector("#clear-btn"); |
| | const fileUpload = document.querySelector("#file-upload"); |
| | const dropArea = document.querySelector("#drop-area"); |
| | const imagesExamples = document.querySelector("#image-select"); |
| | const canvas = document.querySelector("#canvas"); |
| | const ctxCanvas = canvas.getContext("2d"); |
| | |
| | let isCaptioning = false; |
| | let currentImageURL = null; |
| | clearBtn.addEventListener("click", () => { |
| | clearImageCanvas(); |
| | }); |
| | modelSelectEl.addEventListener("change", () => { |
| | if (currentImageURL) { |
| | runInference(currentImageURL); |
| | } |
| | }); |
| | |
| | |
| | fileUpload.addEventListener("input", async (e) => { |
| | const target = e.target; |
| | if (target.files.length > 0) { |
| | const href = URL.createObjectURL(target.files[0]); |
| | clearImageCanvas(); |
| | await drawImageCanvas(href); |
| | runInference(href); |
| | } |
| | }); |
| | |
| | dropArea.addEventListener("dragenter", (e) => { |
| | e.preventDefault(); |
| | dropArea.classList.add("border-blue-700"); |
| | }); |
| | dropArea.addEventListener("dragleave", (e) => { |
| | e.preventDefault(); |
| | dropArea.classList.remove("border-blue-700"); |
| | }); |
| | dropArea.addEventListener("dragover", (e) => { |
| | e.preventDefault(); |
| | }); |
| | dropArea.addEventListener("drop", async (e) => { |
| | e.preventDefault(); |
| | dropArea.classList.remove("border-blue-700"); |
| | const url = e.dataTransfer.getData("text/uri-list"); |
| | const files = e.dataTransfer.files; |
| | |
| | if (files.length > 0) { |
| | const href = URL.createObjectURL(files[0]); |
| | clearImageCanvas(); |
| | await drawImageCanvas(href); |
| | runInference(href); |
| | } else if (url) { |
| | clearImageCanvas(); |
| | await drawImageCanvas(url); |
| | runInference(url); |
| | } |
| | }); |
| | |
| | imagesExamples.addEventListener("click", async (e) => { |
| | if (isCaptioning) { |
| | return; |
| | } |
| | const target = e.target; |
| | if (target.nodeName === "IMG") { |
| | const href = target.src; |
| | clearImageCanvas(); |
| | await drawImageCanvas(href); |
| | runInference(href); |
| | } |
| | }); |
| | function clearImageCanvas() { |
| | ctxCanvas.clearRect(0, 0, canvas.width, canvas.height); |
| | isCaptioning = false; |
| | clearBtn.disabled = true; |
| | canvas.parentElement.style.height = "auto"; |
| | outputStatusEl.hidden = false; |
| | outputCaptionEl.hidden = true; |
| | outputStatusEl.innerText = "Please select an image"; |
| | currentImageURL = null; |
| | } |
| | |
| | async function drawImageCanvas(imgURL) { |
| | if (!imgURL) { |
| | throw new Error("No image URL provided"); |
| | } |
| | return new Promise((resolve, reject) => { |
| | ctxCanvas.clearRect(0, 0, canvas.width, canvas.height); |
| | ctxCanvas.clearRect(0, 0, canvas.width, canvas.height); |
| | |
| | const img = new Image(); |
| | img.crossOrigin = "anonymous"; |
| | img.onload = () => { |
| | canvas.width = img.width; |
| | canvas.height = img.height; |
| | ctxCanvas.drawImage(img, 0, 0); |
| | canvas.parentElement.style.height = canvas.offsetHeight + "px"; |
| | clearBtn.disabled = false; |
| | resolve(img); |
| | }; |
| | img.src = imgURL; |
| | currentImageURL = imgURL; |
| | }); |
| | } |
| | |
| | document.addEventListener("DOMContentLoaded", () => { |
| | for (const [id, model] of Object.entries(MODELS)) { |
| | const option = document.createElement("option"); |
| | option.value = id; |
| | option.innerText = `${id} (${model.size})`; |
| | modelSelectEl.appendChild(option); |
| | } |
| | }); |
| | async function getImageCaption( |
| | worker, |
| | weightsURL, |
| | tokenizerURL, |
| | configURL, |
| | modelID, |
| | imageURL, |
| | quantized, |
| | updateStatus = null |
| | ) { |
| | return new Promise((resolve, reject) => { |
| | worker.postMessage({ |
| | weightsURL, |
| | tokenizerURL, |
| | configURL, |
| | modelID, |
| | imageURL, |
| | quantized, |
| | }); |
| | function messageHandler(event) { |
| | if ("error" in event.data) { |
| | worker.removeEventListener("message", messageHandler); |
| | reject(new Error(event.data.error)); |
| | } |
| | if (event.data.status === "complete") { |
| | worker.removeEventListener("message", messageHandler); |
| | resolve(event.data); |
| | } |
| | if (updateStatus) updateStatus(event.data); |
| | } |
| | worker.addEventListener("message", messageHandler); |
| | }); |
| | } |
| | function updateStatus(data) { |
| | if (data.status === "status") { |
| | outputStatusEl.innerText = data.message; |
| | } |
| | } |
| | async function runInference(imageURL) { |
| | if (isCaptioning || !imageURL) { |
| | alert("Please select an image first"); |
| | return; |
| | } |
| | |
| | outputStatusEl.hidden = false; |
| | outputCaptionEl.hidden = true; |
| | clearBtn.disabled = true; |
| | modelSelectEl.disabled = true; |
| | isCaptioning = true; |
| | const selectedModel = modelSelectEl.value; |
| | const model = MODELS[selectedModel]; |
| | const weightsURL = `${model.base_url}${model.model}`; |
| | const tokenizerURL = `${model.base_url}${model.tokenizer}`; |
| | const configURL = `${model.base_url}${model.config}`; |
| | const quantized = model.quantized; |
| | try { |
| | const time = performance.now(); |
| | const caption = await getImageCaption( |
| | blipWorker, |
| | weightsURL, |
| | tokenizerURL, |
| | configURL, |
| | selectedModel, |
| | imageURL, |
| | quantized, |
| | updateStatus |
| | ); |
| | outputStatusEl.hidden = true; |
| | outputCaptionEl.hidden = false; |
| | const totalTime = ((performance.now() - time)/1000).toFixed(2); |
| | outputCaptionEl.innerHTML = `${ |
| | caption.output |
| | }<br/><span class="text-xs">Inference time: ${totalTime} s</span>`; |
| | } catch (err) { |
| | console.error(err); |
| | outputStatusEl.hidden = false; |
| | outputCaptionEl.hidden = true; |
| | outputStatusEl.innerText = err.message; |
| | } |
| | clearBtn.disabled = false; |
| | modelSelectEl.disabled = false; |
| | isCaptioning = false; |
| | } |
| | </script> |
| | </head> |
| | <body class="container max-w-4xl mx-auto p-4"> |
| | <main class="grid grid-cols-1 gap-5 relative"> |
| | <span class="absolute text-5xl -ml-[1em]"> 🕯️ </span> |
| | <div> |
| | <h1 class="text-5xl font-bold">Candle BLIP Image Captioning</h1> |
| | <h2 class="text-2xl font-bold">Rust/WASM Demo</h2> |
| | <p class="max-w-lg"> |
| | <a |
| | href="https://huggingface.co/Salesforce/blip-image-captioning-large" |
| | target="_blank" |
| | class="underline hover:text-blue-500 hover:no-underline" |
| | >BLIP Image Captioning |
| | </a> |
| | running in the browser using |
| | <a |
| | href="https://github.com/huggingface/candle/" |
| | target="_blank" |
| | class="underline hover:text-blue-500 hover:no-underline" |
| | >Candle</a |
| | >, a minimalist ML framework for Rust. |
| | </p> |
| | <p class="text-xs max-w-lg py-2"> |
| | <b>Note:</b> |
| | The image captioning on the smallest model takes about ~50 seconds, it |
| | will vary depending on your machine and model size. |
| | </p> |
| | </div> |
| |
|
| | <div> |
| | <label for="model" class="font-medium block">Models Options: </label> |
| | <select |
| | id="model" |
| | class="border-2 border-gray-500 rounded-md font-light interactive disabled:cursor-not-allowed w-full max-w-max" |
| | ></select> |
| | </div> |
| | |
| | <div class="grid gap-4 sm:grid-cols-2 py-4"> |
| | <div class="relative max-w-lg"> |
| | <div |
| | class="absolute w-full bottom-full flex justify-between items-center" |
| | > |
| | <div class="flex gap-2 w-full"> |
| | <button |
| | id="clear-btn" |
| | disabled |
| | title="Clear Image" |
| | class="ml-auto text-xs bg-white rounded-md disabled:opacity-50 flex gap-1 items-center" |
| | > |
| | <svg |
| | class="" |
| | xmlns="http://www.w3.org/2000/svg" |
| | viewBox="0 0 13 12" |
| | height="1em" |
| | > |
| | <path |
| | d="M1.6.7 12 11.1M12 .7 1.6 11.1" |
| | stroke="#2E3036" |
| | stroke-width="2" |
| | /> |
| | </svg> |
| | </button> |
| | </div> |
| | </div> |
| | <div |
| | id="drop-area" |
| | class="flex flex-col items-center justify-center border-2 border-gray-300 border-dashed rounded-xl relative aspect-video w-full overflow-hidden" |
| | > |
| | <div |
| | class="flex flex-col items-center justify-center space-y-1 text-center" |
| | > |
| | <svg |
| | width="25" |
| | height="25" |
| | viewBox="0 0 25 25" |
| | fill="none" |
| | xmlns="http://www.w3.org/2000/svg" |
| | > |
| | <path |
| | d="M3.5 24.3a3 3 0 0 1-1.9-.8c-.5-.5-.8-1.2-.8-1.9V2.9c0-.7.3-1.3.8-1.9.6-.5 1.2-.7 2-.7h18.6c.7 0 1.3.2 1.9.7.5.6.7 1.2.7 2v18.6c0 .7-.2 1.4-.7 1.9a3 3 0 0 1-2 .8H3.6Zm0-2.7h18.7V2.9H3.5v18.7Zm2.7-2.7h13.3c.3 0 .5 0 .6-.3v-.7l-3.7-5a.6.6 0 0 0-.6-.2c-.2 0-.4 0-.5.3l-3.5 4.6-2.4-3.3a.6.6 0 0 0-.6-.3c-.2 0-.4.1-.5.3l-2.7 3.6c-.1.2-.2.4 0 .7.1.2.3.3.6.3Z" |
| | fill="#000" |
| | /> |
| | </svg> |
| | <div class="flex text-sm text-gray-600"> |
| | <label |
| | for="file-upload" |
| | class="relative cursor-pointer bg-white rounded-md font-medium text-blue-950 hover:text-blue-700" |
| | > |
| | <span>Drag and drop y our image here</span> |
| | <span class="block text-xs">or</span> |
| | <span class="block text-xs">Click to upload</span> |
| | </label> |
| | </div> |
| | <input |
| | id="file-upload" |
| | name="file-upload" |
| | type="file" |
| | class="sr-only" |
| | /> |
| | </div> |
| | <canvas |
| | id="canvas" |
| | class="absolute pointer-events-none w-full" |
| | ></canvas> |
| | </div> |
| | </div> |
| | <div class=""> |
| | <div |
| | class="h-full bg-slate-100 text-gray-500 p-4 rounded-md flex flex-col gap-2" |
| | > |
| | <p |
| | id="output-caption" |
| | class="m-auto text-xl text-center p-2" |
| | hidden |
| | ></p> |
| | <span id="output-status" class="m-auto font-light"> |
| | Please select an image |
| | </span> |
| | </div> |
| | </div> |
| | </div> |
| |
|
| | <div> |
| | <div |
| | class="flex gap-3 items-center overflow-x-scroll" |
| | id="image-select" |
| | > |
| | <h3 class="font-medium">Examples:</h3> |
| |
|
| | <img |
| | src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/candle/examples/sf.jpg" |
| | class="cursor-pointer w-24 h-24 object-cover" |
| | /> |
| | <img |
| | src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/candle/examples/bike.jpeg" |
| | class="cursor-pointer w-24 h-24 object-cover" |
| | /> |
| | <img |
| | src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/candle/examples/000000000077.jpg" |
| | class="cursor-pointer w-24 h-24 object-cover" |
| | /> |
| | </div> |
| | </div> |
| | </main> |
| | </body> |
| | </html> |
| |
|