"""SHARP Gradio demo (PLY export + 3D viewer). This Space: - Runs Apple's SHARP model to predict a 3D Gaussian scene from a single image. - Exports a canonical `.ply` file for download. - Serves unique PLY and settings files per generation via the SuperSplat Viewer. Uses Gradio 6's static file serving (no FastAPI/uvicorn needed). """ from __future__ import annotations import json import math import time import uuid from pathlib import Path from typing import Final import gradio as gr from model_utils import predict_to_ply_gpu # ----------------------------------------------------------------------------- # Paths & constants # ----------------------------------------------------------------------------- APP_DIR: Final[Path] = Path(__file__).resolve().parent OUTPUTS_DIR: Final[Path] = APP_DIR / "outputs" VIEWER_DIR: Final[Path] = APP_DIR / "viewer" DEFAULT_SETTINGS_JSON: Final[Path] = VIEWER_DIR / "settings.default.json" DEFAULT_QUEUE_MAX_SIZE: Final[int] = 32 DEFAULT_FOCAL_LENGTH_MM: Final[float] = 35.0 SENSOR_HEIGHT_MM: Final[float] = 24.0 # Full-frame 35mm equivalent SENSOR_WIDTH_MM: Final[float] = 36.0 # Full-frame 35mm width # Register static paths for Gradio 6 file serving gr.set_static_paths(paths=[str(VIEWER_DIR), str(OUTPUTS_DIR)]) THEME: Final = gr.themes.Origin() CSS: Final[str] = """ /* Keep layout stable when scrollbars appear/disappear */ html { scrollbar-gutter: stable; } /* Use normal document flow */ html, body { height: auto; } body { overflow: auto; } /* Full-width layout */ .gradio-container { max-width: none; width: 100%; margin: 0; padding: 0.5rem 1rem 1rem; box-sizing: border-box; } /* Header styling */ #app-header { margin-bottom: 0.5rem; } #app-header h2 { margin: 0 0 0.25rem 0; font-size: 1.5rem; } #app-header p { margin: 0; opacity: 0.85; } /* Main layout: controls left, viewer right (larger) */ #main-row { gap: 1rem; align-items: stretch; } /* Left panel: controls */ #controls-panel { display: flex; flex-direction: column; gap: 0.75rem; } #controls-panel .input-image-container { flex: 1; min-height: 200px; } #input-image { width: 100%; } #input-image img { width: 100%; height: auto; max-height: 280px; object-fit: contain; } /* Options row */ #options-row { gap: 0.5rem; } #options-row > div { flex: 1; } /* Action buttons */ #actions-row { gap: 0.5rem; } #actions-row button { flex: 1; min-height: 42px; } /* Downloads row */ #downloads-row { gap: 0.5rem; align-items: center; } #downloads-row > div { flex: 1; } /* Right panel: 3D viewer (dominant) */ #viewer-panel { display: flex; flex-direction: column; min-height: 500px; } #viewer-container { flex: 1; display: flex; flex-direction: column; min-height: 0; } /* Viewer iframe/placeholder */ #viewer-html { flex: 1; min-height: 500px; } #viewer-html iframe { width: 100%; height: 100%; min-height: 500px; border: 0; border-radius: 12px; overflow: hidden; background: #000; } /* Placeholder styling */ .viewer-placeholder { width: 100%; height: 100%; min-height: 500px; display: flex; align-items: center; justify-content: center; border: 2px dashed var(--border-color-primary, rgba(127, 127, 127, 0.35)); border-radius: 12px; background: var(--block-background-fill, rgba(127, 127, 127, 0.05)); color: var(--body-text-color, rgba(255, 255, 255, 0.92)); transition: all 0.3s ease; } .viewer-placeholder-inner { max-width: 400px; padding: 32px; text-align: center; } .viewer-placeholder-icon { font-size: 48px; margin-bottom: 16px; opacity: 0.6; } .viewer-placeholder-title { font-size: 18px; font-weight: 600; margin-bottom: 8px; } .viewer-placeholder-desc { font-size: 14px; line-height: 1.5; opacity: 0.75; } /* Loading state */ .viewer-loading { border-color: var(--primary-500; background: linear-gradient( 135deg, rgba(255, 102, 0, 0.05) 0%, rgba(255, 102, 0, 0.1) 100% ); } .viewer-loading .viewer-placeholder-icon { animation: pulse 1.5s ease-in-out infinite; } @keyframes pulse { 0%, 100% { opacity: 0.4; transform: scale(1); } 50% { opacity: 0.8; transform: scale(1.05); } } /* Status text */ #status-text { font-size: 13px; opacity: 0.85; margin-top: 0.5rem; } /* Responsive: stack on small screens */ @media (max-width: 900px) { #main-row { flex-direction: column; } #controls-panel, #viewer-panel { min-width: 100% !important; } #viewer-html, #viewer-html iframe, .viewer-placeholder { min-height: 400px; } #input-image img { max-height: 200px; } } """ def _ensure_dir(path: Path) -> Path: path.mkdir(parents=True, exist_ok=True) return path _ensure_dir(OUTPUTS_DIR) _ensure_dir(VIEWER_DIR) # ----------------------------------------------------------------------------- # FOV / Focal Length utilities # ----------------------------------------------------------------------------- def focal_length_to_fov(focal_length_mm: float, sensor_height_mm: float = SENSOR_HEIGHT_MM, sensor_width_mm: float = SENSOR_WIDTH_MM) -> float: """Convert focal length (mm) to diagonal field of view (degrees). Uses the formula: FOV = 2 * atan(diagonal / (2 * focal_length)) where diagonal = sqrt(width^2 + height^2) for full-frame 35mm (36x24mm) """ if focal_length_mm <= 0: focal_length_mm = DEFAULT_FOCAL_LENGTH_MM diagonal_mm = math.sqrt(sensor_width_mm**2 + sensor_height_mm**2) fov_rad = 2 * math.atan(diagonal_mm / (2 * focal_length_mm)) return math.degrees(fov_rad) def create_settings_file(focal_length_mm: float, output_stem: str) -> Path: """Create a unique settings.json for this generation.""" fov = focal_length_to_fov(focal_length_mm) # Load default settings as base settings = { "camera": { "fov": fov, "position": [0, 0, 0], "target": [0, 0, 0], "startAnim": "none", "animTrack": "" }, "background": {"color": [0, 0, 0, 0]}, "animTracks": [] } if DEFAULT_SETTINGS_JSON.exists(): try: existing = json.loads(DEFAULT_SETTINGS_JSON.read_text(encoding="utf-8")) # Merge, preserving existing values but updating FOV if "background" in existing: settings["background"] = existing["background"] if "camera" in existing: settings["camera"] = {**settings["camera"], **existing["camera"]} settings["camera"]["fov"] = fov # Always update FOV if "animTracks" in existing: settings["animTracks"] = existing["animTracks"] except Exception: pass settings_path = OUTPUTS_DIR / f"{output_stem}.settings.json" settings_path.write_text(json.dumps(settings, indent=2), encoding="utf-8") return settings_path # ----------------------------------------------------------------------------- # Validation & file operations # ----------------------------------------------------------------------------- def _validate_image(image_path: str | None) -> None: if not image_path: raise gr.Error("Please upload an image first.") def _generate_output_stem() -> str: """Generate unique output file stem.""" ts = int(time.time() * 1000) uid = uuid.uuid4().hex[:8] return f"scene_{ts}_{uid}" # ----------------------------------------------------------------------------- # HTML generators # ----------------------------------------------------------------------------- def viewer_url_for_output(ply_filename: str, settings_filename: str) -> str: """URL for the viewer with specific output files.""" # Use absolute paths with /gradio_api/file= prefix for content and settings content_path = f"/gradio_api/file=outputs/{ply_filename}" settings_path = f"/gradio_api/file=outputs/{settings_filename}" return f"/gradio_api/file=viewer/index.html?content={content_path}&settings={settings_path}&noanim" def viewer_placeholder_html() -> str: return """
🎨
3D Viewer
Upload an image and click Generate to create a 3D Gaussian scene. The interactive viewer will appear here.
""" def viewer_loading_html() -> str: """Loading placeholder with timer element.""" return """
âš¡
Generating 3D Scene...
Running SHARP model inference. This may take a moment.
0s
""" def viewer_iframe_html(ply_filename: str, settings_filename: str) -> str: src = viewer_url_for_output(ply_filename, settings_filename) return f""" """ # ----------------------------------------------------------------------------- # Main inference function # ----------------------------------------------------------------------------- def run_sharp( image_path: str | None, focal_length_mm: float, ) -> tuple[object, object, str, object, object, object, str]: """Run SHARP inference. Returns: (ply_download, viewer_html, status, generate_btn, clear_btn, open_viewer_btn, current_viewer_url) """ _validate_image(image_path) try: # Generate unique output stem output_stem = _generate_output_stem() # Create settings file with FOV settings_path = create_settings_file(focal_length_mm, output_stem) # Run inference ply_path = predict_to_ply_gpu(image_path) # Rename PLY to unique name in outputs unique_ply_path = OUTPUTS_DIR / f"{output_stem}.ply" ply_path.rename(unique_ply_path) fov = focal_length_to_fov(focal_length_mm) status = f"✓ Generated **{unique_ply_path.name}** | FOV: {fov:.1f}°" viewer_url = viewer_url_for_output(unique_ply_path.name, settings_path.name) return ( gr.update(value=str(unique_ply_path), visible=True, interactive=True), viewer_iframe_html(unique_ply_path.name, settings_path.name), status, gr.update(interactive=True, value="Generate"), gr.update(interactive=True), gr.update(visible=True, interactive=True), viewer_url, ) except gr.Error: raise except Exception as e: raise gr.Error(f"Generation failed: {type(e).__name__}: {e}") from e def start_generation() -> tuple[str, object, object]: """Start generation: show loading state. Returns: (viewer_html, generate_btn, clear_btn) """ return ( viewer_loading_html(), gr.update(interactive=False, value="Generating..."), gr.update(interactive=False), ) def clear_all() -> tuple: """Clear all inputs and outputs. Returns: (image, ply_download, viewer_html, status, generate_btn, clear_btn, open_viewer_btn, current_viewer_url) """ return ( None, gr.update(value=None, visible=False), viewer_placeholder_html(), "", gr.update(interactive=True, value="Generate"), gr.update(interactive=False), gr.update(visible=False), "", ) def on_image_change(image_path: str | None) -> tuple[object, object]: """Handle image upload/removal. Returns: (generate_btn, clear_btn) """ has_image = bool(image_path) return ( gr.update(interactive=has_image, value="Generate"), gr.update(interactive=has_image), ) # ----------------------------------------------------------------------------- # UI # ----------------------------------------------------------------------------- # Global JS for timer control and viewer URL (injected via head parameter) HEAD_JS: Final[str] = """ """ def build_demo() -> gr.Blocks: with gr.Blocks( title="SHARP • Single-Image 3D Gaussian Prediction", elem_id="sharp-root", ) as demo: # Hidden textbox to store viewer URL (State doesn't work well with js param) current_viewer_url = gr.Textbox(value="", visible=False, elem_id="viewer-url-store") # Header with gr.Column(elem_id="app-header"): gr.Markdown("## SHARP") gr.Markdown("Single-image **3D Gaussian scene** prediction") # Main layout: controls (left, narrow) + viewer (right, wide) with gr.Row(elem_id="main-row", equal_height=True): # Left column: Controls with gr.Column(scale=3, min_width=280, elem_id="controls-panel"): # Image upload image_in = gr.Image( label="Input Image", type="filepath", sources=["upload"], elem_id="input-image", show_label=True, ) # Options with gr.Row(elem_id="options-row"): focal_length = gr.Slider( label="Focal Length (mm)", minimum=12, maximum=200, step=1, value=DEFAULT_FOCAL_LENGTH_MM, info="Affects viewer FOV", ) # Action buttons with gr.Row(elem_id="actions-row"): generate_btn = gr.Button( "Generate", variant="primary", interactive=False, elem_id="generate-btn", ) clear_btn = gr.Button( "Clear", variant="secondary", interactive=False, elem_id="clear-btn", ) # Downloads with gr.Row(elem_id="downloads-row"): ply_download = gr.DownloadButton( label="Download PLY", value=None, visible=False, elem_id="ply-download", ) open_viewer_btn = gr.Button( "Open Viewer in New Tab ↗", size="sm", visible=False, elem_id="open-viewer-btn", ) # Status status_md = gr.Markdown("", elem_id="status-text") # Right column: 3D Viewer (dominant) with gr.Column(scale=7, min_width=400, elem_id="viewer-panel"): viewer_html = gr.HTML( value=viewer_placeholder_html(), elem_id="viewer-html", label="3D Viewer", ) # About section (collapsible) with gr.Accordion("About", open=False): gr.Markdown(""" ### SHARP Model **Sharp Monocular View Synthesis in Less Than a Second** (Apple, 2025) SHARP predicts a 3D Gaussian splatting scene from a single image, enabling novel view synthesis. ```bibtex @inproceedings{Sharp2025:arxiv, title = {Sharp Monocular View Synthesis in Less Than a Second}, author = {Lars Mescheder and Wei Dong and Shiwei Li and Xuyang Bai and Marcel Santos and Peiyun Hu and Bruno Lecouat and Mingmin Zhen and Amaël Delaunoy and Tian Fang and Yanghai Tsin and Stephan R. Richter and Vladlen Koltun}, journal = {arXiv preprint arXiv:2512.10685}, year = {2025}, } ``` ### 3D Viewer Powered by [SuperSplat Viewer](https://github.com/playcanvas/supersplat-viewer) by PlayCanvas. """.strip()) # --- Event handlers --- # Image change: enable/disable buttons image_in.change( fn=on_image_change, inputs=[image_in], outputs=[generate_btn, clear_btn], queue=False, show_progress="hidden", ) # Generate: start loading, run inference generate_btn.click( fn=start_generation, outputs=[viewer_html, generate_btn, clear_btn], queue=False, show_progress="hidden", js="() => { window.sharpTimer && window.sharpTimer.start(); }", ).then( fn=run_sharp, inputs=[image_in, focal_length], outputs=[ply_download, viewer_html, status_md, generate_btn, clear_btn, open_viewer_btn, current_viewer_url], show_progress="hidden", ).then( fn=lambda: None, js="() => { window.sharpTimer && window.sharpTimer.stop(); }", ) # Clear clear_btn.click( fn=clear_all, outputs=[image_in, ply_download, viewer_html, status_md, generate_btn, clear_btn, open_viewer_btn, current_viewer_url], queue=False, show_progress="hidden", ) # Open viewer in new tab using global URL open_viewer_btn.click( fn=None, js="() => { window.openSharpViewer(); }", ) demo.queue(max_size=DEFAULT_QUEUE_MAX_SIZE, default_concurrency_limit=1) return demo demo = build_demo() if __name__ == "__main__": demo.launch(theme=THEME, css=CSS, head=HEAD_JS)