notaneimu's picture
feat: add sensor width constant and update FOV calculation; create .gitignore
aef63b7
"""SHARP Gradio demo (PLY export + 3D viewer).
This Space:
- Runs Apple's SHARP model to predict a 3D Gaussian scene from a single image.
- Exports a canonical `.ply` file for download.
- Serves unique PLY and settings files per generation via the SuperSplat Viewer.
Uses Gradio 6's static file serving (no FastAPI/uvicorn needed).
"""
from __future__ import annotations
import json
import math
import time
import uuid
from pathlib import Path
from typing import Final
import gradio as gr
from model_utils import predict_to_ply_gpu
# -----------------------------------------------------------------------------
# Paths & constants
# -----------------------------------------------------------------------------
APP_DIR: Final[Path] = Path(__file__).resolve().parent
OUTPUTS_DIR: Final[Path] = APP_DIR / "outputs"
VIEWER_DIR: Final[Path] = APP_DIR / "viewer"
DEFAULT_SETTINGS_JSON: Final[Path] = VIEWER_DIR / "settings.default.json"
DEFAULT_QUEUE_MAX_SIZE: Final[int] = 32
DEFAULT_FOCAL_LENGTH_MM: Final[float] = 35.0
SENSOR_HEIGHT_MM: Final[float] = 24.0 # Full-frame 35mm equivalent
SENSOR_WIDTH_MM: Final[float] = 36.0 # Full-frame 35mm width
# Register static paths for Gradio 6 file serving
gr.set_static_paths(paths=[str(VIEWER_DIR), str(OUTPUTS_DIR)])
THEME: Final = gr.themes.Origin()
CSS: Final[str] = """
/* Keep layout stable when scrollbars appear/disappear */
html { scrollbar-gutter: stable; }
/* Use normal document flow */
html, body { height: auto; }
body { overflow: auto; }
/* Full-width layout */
.gradio-container {
max-width: none;
width: 100%;
margin: 0;
padding: 0.5rem 1rem 1rem;
box-sizing: border-box;
}
/* Header styling */
#app-header {
margin-bottom: 0.5rem;
}
#app-header h2 {
margin: 0 0 0.25rem 0;
font-size: 1.5rem;
}
#app-header p {
margin: 0;
opacity: 0.85;
}
/* Main layout: controls left, viewer right (larger) */
#main-row {
gap: 1rem;
align-items: stretch;
}
/* Left panel: controls */
#controls-panel {
display: flex;
flex-direction: column;
gap: 0.75rem;
}
#controls-panel .input-image-container {
flex: 1;
min-height: 200px;
}
#input-image {
width: 100%;
}
#input-image img {
width: 100%;
height: auto;
max-height: 280px;
object-fit: contain;
}
/* Options row */
#options-row {
gap: 0.5rem;
}
#options-row > div {
flex: 1;
}
/* Action buttons */
#actions-row {
gap: 0.5rem;
}
#actions-row button {
flex: 1;
min-height: 42px;
}
/* Downloads row */
#downloads-row {
gap: 0.5rem;
align-items: center;
}
#downloads-row > div {
flex: 1;
}
/* Right panel: 3D viewer (dominant) */
#viewer-panel {
display: flex;
flex-direction: column;
min-height: 500px;
}
#viewer-container {
flex: 1;
display: flex;
flex-direction: column;
min-height: 0;
}
/* Viewer iframe/placeholder */
#viewer-html {
flex: 1;
min-height: 500px;
}
#viewer-html iframe {
width: 100%;
height: 100%;
min-height: 500px;
border: 0;
border-radius: 12px;
overflow: hidden;
background: #000;
}
/* Placeholder styling */
.viewer-placeholder {
width: 100%;
height: 100%;
min-height: 500px;
display: flex;
align-items: center;
justify-content: center;
border: 2px dashed var(--border-color-primary, rgba(127, 127, 127, 0.35));
border-radius: 12px;
background: var(--block-background-fill, rgba(127, 127, 127, 0.05));
color: var(--body-text-color, rgba(255, 255, 255, 0.92));
transition: all 0.3s ease;
}
.viewer-placeholder-inner {
max-width: 400px;
padding: 32px;
text-align: center;
}
.viewer-placeholder-icon {
font-size: 48px;
margin-bottom: 16px;
opacity: 0.6;
}
.viewer-placeholder-title {
font-size: 18px;
font-weight: 600;
margin-bottom: 8px;
}
.viewer-placeholder-desc {
font-size: 14px;
line-height: 1.5;
opacity: 0.75;
}
/* Loading state */
.viewer-loading {
border-color: var(--primary-500;
background: linear-gradient(
135deg,
rgba(255, 102, 0, 0.05) 0%,
rgba(255, 102, 0, 0.1) 100%
);
}
.viewer-loading .viewer-placeholder-icon {
animation: pulse 1.5s ease-in-out infinite;
}
@keyframes pulse {
0%, 100% { opacity: 0.4; transform: scale(1); }
50% { opacity: 0.8; transform: scale(1.05); }
}
/* Status text */
#status-text {
font-size: 13px;
opacity: 0.85;
margin-top: 0.5rem;
}
/* Responsive: stack on small screens */
@media (max-width: 900px) {
#main-row {
flex-direction: column;
}
#controls-panel, #viewer-panel {
min-width: 100% !important;
}
#viewer-html, #viewer-html iframe, .viewer-placeholder {
min-height: 400px;
}
#input-image img {
max-height: 200px;
}
}
"""
def _ensure_dir(path: Path) -> Path:
path.mkdir(parents=True, exist_ok=True)
return path
_ensure_dir(OUTPUTS_DIR)
_ensure_dir(VIEWER_DIR)
# -----------------------------------------------------------------------------
# FOV / Focal Length utilities
# -----------------------------------------------------------------------------
def focal_length_to_fov(focal_length_mm: float, sensor_height_mm: float = SENSOR_HEIGHT_MM, sensor_width_mm: float = SENSOR_WIDTH_MM) -> float:
"""Convert focal length (mm) to diagonal field of view (degrees).
Uses the formula: FOV = 2 * atan(diagonal / (2 * focal_length))
where diagonal = sqrt(width^2 + height^2) for full-frame 35mm (36x24mm)
"""
if focal_length_mm <= 0:
focal_length_mm = DEFAULT_FOCAL_LENGTH_MM
diagonal_mm = math.sqrt(sensor_width_mm**2 + sensor_height_mm**2)
fov_rad = 2 * math.atan(diagonal_mm / (2 * focal_length_mm))
return math.degrees(fov_rad)
def create_settings_file(focal_length_mm: float, output_stem: str) -> Path:
"""Create a unique settings.json for this generation."""
fov = focal_length_to_fov(focal_length_mm)
# Load default settings as base
settings = {
"camera": {
"fov": fov,
"position": [0, 0, 0],
"target": [0, 0, 0],
"startAnim": "none",
"animTrack": ""
},
"background": {"color": [0, 0, 0, 0]},
"animTracks": []
}
if DEFAULT_SETTINGS_JSON.exists():
try:
existing = json.loads(DEFAULT_SETTINGS_JSON.read_text(encoding="utf-8"))
# Merge, preserving existing values but updating FOV
if "background" in existing:
settings["background"] = existing["background"]
if "camera" in existing:
settings["camera"] = {**settings["camera"], **existing["camera"]}
settings["camera"]["fov"] = fov # Always update FOV
if "animTracks" in existing:
settings["animTracks"] = existing["animTracks"]
except Exception:
pass
settings_path = OUTPUTS_DIR / f"{output_stem}.settings.json"
settings_path.write_text(json.dumps(settings, indent=2), encoding="utf-8")
return settings_path
# -----------------------------------------------------------------------------
# Validation & file operations
# -----------------------------------------------------------------------------
def _validate_image(image_path: str | None) -> None:
if not image_path:
raise gr.Error("Please upload an image first.")
def _generate_output_stem() -> str:
"""Generate unique output file stem."""
ts = int(time.time() * 1000)
uid = uuid.uuid4().hex[:8]
return f"scene_{ts}_{uid}"
# -----------------------------------------------------------------------------
# HTML generators
# -----------------------------------------------------------------------------
def viewer_url_for_output(ply_filename: str, settings_filename: str) -> str:
"""URL for the viewer with specific output files."""
# Use absolute paths with /gradio_api/file= prefix for content and settings
content_path = f"/gradio_api/file=outputs/{ply_filename}"
settings_path = f"/gradio_api/file=outputs/{settings_filename}"
return f"/gradio_api/file=viewer/index.html?content={content_path}&settings={settings_path}&noanim"
def viewer_placeholder_html() -> str:
return """
<div class="viewer-placeholder">
<div class="viewer-placeholder-inner">
<div class="viewer-placeholder-icon">🎨</div>
<div class="viewer-placeholder-title">3D Viewer</div>
<div class="viewer-placeholder-desc">
Upload an image and click <strong>Generate</strong> to create a 3D Gaussian scene.
The interactive viewer will appear here.
</div>
</div>
</div>
"""
def viewer_loading_html() -> str:
"""Loading placeholder with timer element."""
return """
<div class="viewer-placeholder viewer-loading">
<div class="viewer-placeholder-inner">
<div class="viewer-placeholder-icon">⚡</div>
<div class="viewer-placeholder-title">Generating 3D Scene...</div>
<div class="viewer-placeholder-desc">
Running SHARP model inference. This may take a moment.
</div>
<div id="generation-timer" style="font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace; font-size: 28px; font-weight: 600; color: var(--primary-500); margin-top: 16px;">0s</div>
</div>
</div>
"""
def viewer_iframe_html(ply_filename: str, settings_filename: str) -> str:
src = viewer_url_for_output(ply_filename, settings_filename)
return f"""
<iframe
src="{src}"
allow="xr-spatial-tracking; fullscreen"
referrerpolicy="no-referrer"
loading="eager"
></iframe>
"""
# -----------------------------------------------------------------------------
# Main inference function
# -----------------------------------------------------------------------------
def run_sharp(
image_path: str | None,
focal_length_mm: float,
) -> tuple[object, object, str, object, object, object, str]:
"""Run SHARP inference.
Returns: (ply_download, viewer_html, status, generate_btn, clear_btn, open_viewer_btn, current_viewer_url)
"""
_validate_image(image_path)
try:
# Generate unique output stem
output_stem = _generate_output_stem()
# Create settings file with FOV
settings_path = create_settings_file(focal_length_mm, output_stem)
# Run inference
ply_path = predict_to_ply_gpu(image_path)
# Rename PLY to unique name in outputs
unique_ply_path = OUTPUTS_DIR / f"{output_stem}.ply"
ply_path.rename(unique_ply_path)
fov = focal_length_to_fov(focal_length_mm)
status = f"✓ Generated **{unique_ply_path.name}** | FOV: {fov:.1f}°"
viewer_url = viewer_url_for_output(unique_ply_path.name, settings_path.name)
return (
gr.update(value=str(unique_ply_path), visible=True, interactive=True),
viewer_iframe_html(unique_ply_path.name, settings_path.name),
status,
gr.update(interactive=True, value="Generate"),
gr.update(interactive=True),
gr.update(visible=True, interactive=True),
viewer_url,
)
except gr.Error:
raise
except Exception as e:
raise gr.Error(f"Generation failed: {type(e).__name__}: {e}") from e
def start_generation() -> tuple[str, object, object]:
"""Start generation: show loading state.
Returns: (viewer_html, generate_btn, clear_btn)
"""
return (
viewer_loading_html(),
gr.update(interactive=False, value="Generating..."),
gr.update(interactive=False),
)
def clear_all() -> tuple:
"""Clear all inputs and outputs.
Returns: (image, ply_download, viewer_html, status, generate_btn, clear_btn, open_viewer_btn, current_viewer_url)
"""
return (
None,
gr.update(value=None, visible=False),
viewer_placeholder_html(),
"",
gr.update(interactive=True, value="Generate"),
gr.update(interactive=False),
gr.update(visible=False),
"",
)
def on_image_change(image_path: str | None) -> tuple[object, object]:
"""Handle image upload/removal.
Returns: (generate_btn, clear_btn)
"""
has_image = bool(image_path)
return (
gr.update(interactive=has_image, value="Generate"),
gr.update(interactive=has_image),
)
# -----------------------------------------------------------------------------
# UI
# -----------------------------------------------------------------------------
# Global JS for timer control and viewer URL (injected via head parameter)
HEAD_JS: Final[str] = """
<script>
window.sharpTimer = {
interval: null,
start: function() {
this.stop();
var startTime = Date.now();
this.interval = setInterval(function() {
var el = document.getElementById('generation-timer');
if (!el) return;
var secs = Math.floor((Date.now() - startTime) / 1000);
var mins = Math.floor(secs / 60);
secs = secs % 60;
el.textContent = mins > 0 ? mins + ':' + (secs < 10 ? '0' : '') + secs : secs + 's';
}, 500);
},
stop: function() {
if (this.interval) {
clearInterval(this.interval);
this.interval = null;
}
}
};
window.openSharpViewer = function() {
var iframe = document.querySelector('#viewer-html iframe');
if (iframe && iframe.src) {
window.open(iframe.src, '_blank');
}
};
</script>
"""
def build_demo() -> gr.Blocks:
with gr.Blocks(
title="SHARP • Single-Image 3D Gaussian Prediction",
elem_id="sharp-root",
) as demo:
# Hidden textbox to store viewer URL (State doesn't work well with js param)
current_viewer_url = gr.Textbox(value="", visible=False, elem_id="viewer-url-store")
# Header
with gr.Column(elem_id="app-header"):
gr.Markdown("## SHARP")
gr.Markdown("Single-image **3D Gaussian scene** prediction")
# Main layout: controls (left, narrow) + viewer (right, wide)
with gr.Row(elem_id="main-row", equal_height=True):
# Left column: Controls
with gr.Column(scale=3, min_width=280, elem_id="controls-panel"):
# Image upload
image_in = gr.Image(
label="Input Image",
type="filepath",
sources=["upload"],
elem_id="input-image",
show_label=True,
)
# Options
with gr.Row(elem_id="options-row"):
focal_length = gr.Slider(
label="Focal Length (mm)",
minimum=12,
maximum=200,
step=1,
value=DEFAULT_FOCAL_LENGTH_MM,
info="Affects viewer FOV",
)
# Action buttons
with gr.Row(elem_id="actions-row"):
generate_btn = gr.Button(
"Generate",
variant="primary",
interactive=False,
elem_id="generate-btn",
)
clear_btn = gr.Button(
"Clear",
variant="secondary",
interactive=False,
elem_id="clear-btn",
)
# Downloads
with gr.Row(elem_id="downloads-row"):
ply_download = gr.DownloadButton(
label="Download PLY",
value=None,
visible=False,
elem_id="ply-download",
)
open_viewer_btn = gr.Button(
"Open Viewer in New Tab ↗",
size="sm",
visible=False,
elem_id="open-viewer-btn",
)
# Status
status_md = gr.Markdown("", elem_id="status-text")
# Right column: 3D Viewer (dominant)
with gr.Column(scale=7, min_width=400, elem_id="viewer-panel"):
viewer_html = gr.HTML(
value=viewer_placeholder_html(),
elem_id="viewer-html",
label="3D Viewer",
)
# About section (collapsible)
with gr.Accordion("About", open=False):
gr.Markdown("""
### SHARP Model
**Sharp Monocular View Synthesis in Less Than a Second** (Apple, 2025)
SHARP predicts a 3D Gaussian splatting scene from a single image, enabling novel view synthesis.
```bibtex
@inproceedings{Sharp2025:arxiv,
title = {Sharp Monocular View Synthesis in Less Than a Second},
author = {Lars Mescheder and Wei Dong and Shiwei Li and Xuyang Bai and Marcel Santos and Peiyun Hu and Bruno Lecouat and Mingmin Zhen and Amaël Delaunoy and Tian Fang and Yanghai Tsin and Stephan R. Richter and Vladlen Koltun},
journal = {arXiv preprint arXiv:2512.10685},
year = {2025},
}
```
### 3D Viewer
Powered by [SuperSplat Viewer](https://github.com/playcanvas/supersplat-viewer) by PlayCanvas.
""".strip())
# --- Event handlers ---
# Image change: enable/disable buttons
image_in.change(
fn=on_image_change,
inputs=[image_in],
outputs=[generate_btn, clear_btn],
queue=False,
show_progress="hidden",
)
# Generate: start loading, run inference
generate_btn.click(
fn=start_generation,
outputs=[viewer_html, generate_btn, clear_btn],
queue=False,
show_progress="hidden",
js="() => { window.sharpTimer && window.sharpTimer.start(); }",
).then(
fn=run_sharp,
inputs=[image_in, focal_length],
outputs=[ply_download, viewer_html, status_md, generate_btn, clear_btn, open_viewer_btn, current_viewer_url],
show_progress="hidden",
).then(
fn=lambda: None,
js="() => { window.sharpTimer && window.sharpTimer.stop(); }",
)
# Clear
clear_btn.click(
fn=clear_all,
outputs=[image_in, ply_download, viewer_html, status_md, generate_btn, clear_btn, open_viewer_btn, current_viewer_url],
queue=False,
show_progress="hidden",
)
# Open viewer in new tab using global URL
open_viewer_btn.click(
fn=None,
js="() => { window.openSharpViewer(); }",
)
demo.queue(max_size=DEFAULT_QUEUE_MAX_SIZE, default_concurrency_limit=1)
return demo
demo = build_demo()
if __name__ == "__main__":
demo.launch(theme=THEME, css=CSS, head=HEAD_JS)