#!/usr/bin/env python3 # Copyright (c) 2025 ByteDance Ltd. and/or its affiliates # Backend API server for Depth Anything 3 remote inference import os import sys import asyncio import base64 import io import json import uuid from typing import Dict, Any, Optional from datetime import datetime import glob import shutil import zipfile import numpy as np import torch from fastapi import FastAPI, WebSocket, HTTPException, Query from fastapi.responses import JSONResponse from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel import uvicorn sys.path.append("depth-anything-3/") from depth_anything_3.api import DepthAnything3 # noqa: E402 from depth_anything_3.utils.export.glb import export_to_glb # noqa: E402 from depth_anything_3.utils.export.gs import export_to_gs_video # noqa: E402 # Initialize FastAPI app app = FastAPI(title="Depth Anything 3 Inference API", version="1.0.0") # Add CORS middleware app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Global model instance model: Optional[DepthAnything3] = None device: Optional[str] = None # Job storage: {job_id: {"status": "processing/completed/failed", "result": {...}, "progress": 0}} jobs: Dict[str, Dict[str, Any]] = {} # WebSocket connections: {client_id: websocket} websocket_connections: Dict[str, WebSocket] = {} # ------------------------------------------------------------------------- # Request/Response Models # ------------------------------------------------------------------------- class ImageData(BaseModel): filename: str data: str # base64 encoded image class Options(BaseModel): process_res_method: Optional[str] = "upper_bound_resize" selected_first_frame: Optional[str] = "" infer_gs: Optional[bool] = False # Optional export tuning (defaults if not provided) conf_thresh_percentile: Optional[float] = 40.0 num_max_points: Optional[int] = 1_000_000 show_cameras: Optional[bool] = True gs_trj_mode: Optional[str] = "extend" # "extend" | "smooth" gs_video_quality: Optional[str] = "low" # "low" | "high" class InferenceRequest(BaseModel): images: list[ImageData] client_id: str options: Optional[Options] = None class InferenceResponse(BaseModel): job_id: str status: str = "queued" # ------------------------------------------------------------------------- # Model Loading # ------------------------------------------------------------------------- def load_model(): """Load Depth Anything 3 model on startup (GPU required)""" global model, device print("Initializing and loading Depth Anything 3 model...") if not torch.cuda.is_available(): raise RuntimeError("CUDA is not available. GPU is required for DA3 inference.") device = "cuda" model_dir = os.getenv("DA3_MODEL_DIR", "depth-anything/DA3NESTED-GIANT-LARGE") # Load from HF Hub or local path model = DepthAnything3.from_pretrained(model_dir) # type: ignore model = model.to(device) model.eval() print(f"Model loaded successfully on {device} from {model_dir}") # ------------------------------------------------------------------------- # Helpers # ------------------------------------------------------------------------- def _serialize_bytes(b: bytes) -> str: """Serialize raw bytes to base64 string""" return base64.b64encode(b).decode("utf-8") def _serialize_file(path: str) -> str: """Serialize a file at 'path' to base64 string""" with open(path, "rb") as f: return _serialize_bytes(f.read()) def _zip_dir_to_bytes(dir_path: str) -> bytes: """Zip a directory and return zip bytes""" buffer = io.BytesIO() with zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED) as zf: for root, _, files in os.walk(dir_path): for fn in files: full = os.path.join(root, fn) arcname = os.path.relpath(full, start=dir_path) zf.write(full, arcname) buffer.seek(0) return buffer.read() def _actual_process_method(name: str) -> str: """Map frontend option to actual processing method used by DA3""" mapping = { "high_res": "lower_bound_resize", "low_res": "upper_bound_resize", "upper_bound_resize": "upper_bound_resize", "lower_bound_resize": "lower_bound_resize", "upper_bound_crop": "upper_bound_crop", } return mapping.get(name or "upper_bound_resize", "upper_bound_resize") def _save_predictions_npz(target_dir: str, prediction: Any): """Save predictions data to predictions.npz for caching.""" try: output_file = os.path.join(target_dir, "predictions.npz") save_dict: Dict[str, Any] = {} if getattr(prediction, "processed_images", None) is not None: save_dict["images"] = prediction.processed_images if getattr(prediction, "depth", None) is not None: save_dict["depths"] = np.round(prediction.depth, 6) if getattr(prediction, "conf", None) is not None: save_dict["conf"] = np.round(prediction.conf, 2) if getattr(prediction, "extrinsics", None) is not None: save_dict["extrinsics"] = prediction.extrinsics if getattr(prediction, "intrinsics", None) is not None: save_dict["intrinsics"] = prediction.intrinsics np.savez_compressed(output_file, **save_dict) print(f"[backend] Saved predictions cache to: {output_file}") except Exception as e: print(f"[backend] Warning: Failed to save predictions cache: {e}") # ------------------------------------------------------------------------- # Core Inference Function # ------------------------------------------------------------------------- async def run_inference( job_id: str, target_dir: str, client_id: Optional[str] = None, options: Optional[Options] = None, ): """Run DA3 model inference on images and export all artifacts server-side""" try: # Update job status jobs[job_id]["status"] = "processing" # Send WebSocket update (start) if client_id and client_id in websocket_connections: await websocket_connections[client_id].send_json( {"type": "executing", "data": {"job_id": job_id, "node": "start"}} ) # Load and preprocess images image_names = glob.glob(os.path.join(target_dir, "images", "*")) image_names = sorted(image_names) print(f"Found {len(image_names)} images for job {job_id}") if len(image_names) == 0: raise ValueError("No images found in target directory") # Reorder for selected first frame selected_first = options.selected_first_frame if options else "" if selected_first: sel_path = None for p in image_names: if os.path.basename(p) == selected_first: sel_path = p break if sel_path: image_names = [sel_path] + [p for p in image_names if p != sel_path] print(f"Selected first frame: {selected_first} -> {sel_path}") # Send progress updates if client_id and client_id in websocket_connections: await websocket_connections[client_id].send_json( {"type": "executing", "data": {"job_id": job_id, "node": "preprocess"}} ) # Run inference (do not export during inference; export explicitly below) print(f"Running inference for job {job_id}...") actual_method = _actual_process_method( options.process_res_method if options else "upper_bound_resize" ) with torch.no_grad(): prediction = model.inference( image=image_names, process_res_method=actual_method, export_dir=None, # export manually below export_format="mini_npz", infer_gs=bool(options.infer_gs) if options else False, ) if client_id and client_id in websocket_connections: await websocket_connections[client_id].send_json( {"type": "executing", "data": {"job_id": job_id, "node": "postprocess"}} ) # Export GLB and (optional) GS video on backend try: export_to_glb( prediction, export_dir=target_dir, num_max_points=int(options.num_max_points) if options else 1_000_000, conf_thresh_percentile=float(options.conf_thresh_percentile) if options else 40.0, show_cameras=bool(options.show_cameras) if options else True, ) print(f"[backend] Exported GLB + depth_vis to {target_dir}") except Exception as e: print(f"[backend] GLB export failed: {e}") if options and bool(options.infer_gs): try: mode_mapping = {"extend": "extend", "smooth": "interpolate_smooth"} export_to_gs_video( prediction, export_dir=target_dir, chunk_size=4, trj_mode=mode_mapping.get(options.gs_trj_mode or "extend", "extend"), enable_tqdm=False, vis_depth="hcat", video_quality=options.gs_video_quality or "low", ) print(f"[backend] Exported GS video to {target_dir}") except Exception as e: print(f"[backend] GS video export failed: {e}") # Save predictions.npz on backend _save_predictions_npz(target_dir, prediction) # Package artifacts artifacts: Dict[str, Any] = {} glb_path = os.path.join(target_dir, "scene.glb") if os.path.exists(glb_path): artifacts["glb"] = _serialize_file(glb_path) depth_vis_dir = os.path.join(target_dir, "depth_vis") if os.path.isdir(depth_vis_dir): try: artifacts["depth_vis_zip"] = _serialize_bytes(_zip_dir_to_bytes(depth_vis_dir)) except Exception as e: print(f"[backend] depth_vis zip failed: {e}") npz_path = os.path.join(target_dir, "predictions.npz") if os.path.exists(npz_path): artifacts["predictions_npz"] = _serialize_file(npz_path) # Optional GS video: search for mp4 under target_dir # Prefer gs_video subdir; fallback to recursive search under target_dir mp4_candidates = glob.glob(os.path.join(target_dir, "gs_video", "*.mp4")) if not mp4_candidates: mp4_candidates = glob.glob(os.path.join(target_dir, "**", "*.mp4"), recursive=True) if mp4_candidates: artifacts["gs_video"] = _serialize_file(mp4_candidates[0]) # Store result jobs[job_id]["status"] = "completed" jobs[job_id]["result"] = {"artifacts": artifacts} # Send completion via WebSocket if client_id and client_id in websocket_connections: await websocket_connections[client_id].send_json( { "type": "executing", "data": { "job_id": job_id, "node": None, # None indicates completion }, } ) # Clean up try: torch.cuda.empty_cache() except Exception: pass shutil.rmtree(target_dir, ignore_errors=True) print(f"Job {job_id} completed successfully") except Exception as e: print(f"Error in job {job_id}: {str(e)}") jobs[job_id]["status"] = "failed" jobs[job_id]["error"] = str(e) if client_id and client_id in websocket_connections: try: await websocket_connections[client_id].send_json( {"type": "error", "data": {"job_id": job_id, "error": str(e)}} ) except Exception: pass # ------------------------------------------------------------------------- # API Endpoints # ------------------------------------------------------------------------- @app.on_event("startup") async def startup_event(): """Load model on startup""" load_model() @app.get("/") async def root(): """Health check endpoint""" return {"status": "ok", "service": "Depth Anything 3 Inference API"} @app.post("/inference") async def create_inference(request: InferenceRequest, token: str = Query(...)): """ Submit an inference job Args: request: InferenceRequest containing images, client_id, options token: Authentication token (currently not validated, for compatibility) Returns: InferenceResponse with job_id """ # Generate unique job ID job_id = str(uuid.uuid4()) # Create temporary directory for images timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f") target_dir = f"/tmp/da3_job_{job_id}_{timestamp}" target_dir_images = os.path.join(target_dir, "images") os.makedirs(target_dir_images, exist_ok=True) # Decode and save images try: for img_data in request.images: img_bytes = base64.b64decode(img_data.data) img_path = os.path.join(target_dir_images, img_data.filename) with open(img_path, "wb") as f: f.write(img_bytes) # Initialize job jobs[job_id] = { "status": "queued", "result": None, "created_at": datetime.now().isoformat(), } # Start inference in background asyncio.create_task(run_inference(job_id, target_dir, request.client_id, request.options)) return InferenceResponse(job_id=job_id, status="queued") except Exception as e: shutil.rmtree(target_dir, ignore_errors=True) raise HTTPException(status_code=400, detail=f"Failed to process images: {str(e)}") @app.get("/result/{job_id}") async def get_result(job_id: str, token: str = Query(...)): """ Get inference result for a job Args: job_id: Job ID token: Authentication token (currently not validated, for compatibility) Returns: Job result with artifacts """ if job_id not in jobs: raise HTTPException(status_code=404, detail="Job not found") job = jobs[job_id] if job["status"] == "failed": raise HTTPException(status_code=500, detail=job.get("error", "Job failed")) if job["status"] != "completed": return {job_id: {"status": job["status"]}} return {job_id: {"status": "completed", "artifacts": job["result"].get("artifacts", {})}} @app.websocket("/ws") async def websocket_endpoint( websocket: WebSocket, clientId: str = Query(...), token: str = Query(...) ): """ WebSocket endpoint for real-time progress updates Args: websocket: WebSocket connection clientId: Client ID token: Authentication token (currently not validated, for compatibility) """ await websocket.accept() websocket_connections[clientId] = websocket try: while True: # Keep connection alive data = await websocket.receive_text() # Echo back for heartbeat await websocket.send_text(data) except Exception as e: print(f"WebSocket error for client {clientId}: {str(e)}") finally: if clientId in websocket_connections: del websocket_connections[clientId] @app.get("/history/{job_id}") async def get_history(job_id: str, token: str = Query(...)): """ Get job history (alias for /result for compatibility) Args: job_id: Job ID token: Authentication token Returns: Job history """ return await get_result(job_id, token) # ------------------------------------------------------------------------- # Main # ------------------------------------------------------------------------- if __name__ == "__main__": # Run server (default port 7860) uvicorn.run(app, host="0.0.0.0", port=7860, log_level="info")