Spaces:

ronedgecomb
/

ml-sharp

Running on Zero

App Files Files Community

ronedgecomb commited on 6 days ago

Commit

a1b6914

verified ·

1 Parent(s): e279842

Initial commit

Browse files

Files changed (34) hide show

.gitattributes +38 -35
.gitignore +219 -0
.python-version +1 -0
README.md +17 -12
app.py +500 -0
assets/examples/Booster_train_balanced_Bathroom_camera_00_im0_png_00000_0000-0001.jpg +3 -0
assets/examples/Booster_train_balanced_Bathroom_camera_00_im0_png_00000_0000-0001.mp4 +3 -0
assets/examples/Booster_train_balanced_Bathroom_camera_00_im0_png_00000_0000-0001.ply +3 -0
assets/examples/ETH3D_courtyard_00000_0000-0001.jpg +3 -0
assets/examples/ETH3D_courtyard_00000_0000-0001.mp4 +3 -0
assets/examples/ETH3D_courtyard_00000_0000-0001.ply +3 -0
assets/examples/Middlebury_49b2bcfdd9_000_0000-0001.jpg +3 -0
assets/examples/Middlebury_49b2bcfdd9_000_0000-0001.mp4 +3 -0
assets/examples/Middlebury_49b2bcfdd9_000_0000-0001.ply +3 -0
assets/examples/ScanNetPP_09c1414f1b_00000_0000-0001.jpg +3 -0
assets/examples/ScanNetPP_09c1414f1b_00000_0000-0001.mp4 +3 -0
assets/examples/ScanNetPP_09c1414f1b_00000_0000-0001.ply +3 -0
assets/examples/TanksAndTemples_Church_00022_0000-0002.jpg +3 -0
assets/examples/TanksAndTemples_Church_00022_0000-0002.mp4 +3 -0
assets/examples/TanksAndTemples_Church_00022_0000-0002.ply +3 -0
assets/examples/Unsplash_-591oIJnyEQ_0000-0001.jpg +3 -0
assets/examples/Unsplash_-591oIJnyEQ_0000-0001.mp4 +3 -0
assets/examples/Unsplash_-591oIJnyEQ_0000-0001.ply +3 -0
assets/examples/Unsplash_SharpPaperVideo_-B_lu05yfgE_0000-0001.jpg +3 -0
assets/examples/Unsplash_SharpPaperVideo_-B_lu05yfgE_0000-0001.mp4 +3 -0
assets/examples/Unsplash_SharpPaperVideo_-B_lu05yfgE_0000-0001.ply +3 -0
assets/examples/WildRGBD_TV_scene_000_00028_0000-0002.jpg +0 -0
assets/examples/WildRGBD_TV_scene_000_00028_0000-0002.mp4 +3 -0
assets/examples/WildRGBD_TV_scene_000_00028_0000-0002.ply +3 -0
assets/examples/manifest.json +11 -0
model_utils.py +612 -0
pyproject.toml +23 -0
requirements.txt +6 -0
uv.lock +0 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,38 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.ply filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.mp4 filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,219 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#   Usually these files are written by a python script from a template
+#   before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+# Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+# uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+# poetry.lock
+# poetry.toml
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+# pdm.lock
+# pdm.toml
+.pdm-python
+.pdm-build/
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+# pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# Redis
+*.rdb
+*.aof
+*.pid
+# RabbitMQ
+mnesia/
+rabbitmq/
+rabbitmq-data/
+# ActiveMQ
+activemq-data/
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#   JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#   be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#   and can be added to the global gitignore or merged into this file.  For a more nuclear
+#   option (not recommended) you can uncomment the following to ignore the entire idea folder.
+# .idea/
+# Abstra
+#   Abstra is an AI-powered process automation framework.
+#   Ignore directories containing user credentials, local state, and settings.
+#   Learn more at https://abstra.io/docs
+.abstra/
+# Visual Studio Code
+#   Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#   that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#   and can be added to the global gitignore or merged into this file. However, if you prefer,
+#   you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/
+# Streamlit
+.streamlit/secrets.toml
+# Kilo Code
+.kilocode/

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.13

README.md CHANGED Viewed

@@ -1,12 +1,17 @@
----
-title: Ml Sharp
-emoji: 🐠
-colorFrom: green
-colorTo: purple
-sdk: gradio
-sdk_version: 6.1.0
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: "SHARP - 3D Gaussian Scene Prediction"
+emoji: 🔪
+colorFrom: purple
+colorTo: indigo
+sdk: gradio
+sdk_version: 6.1.0
+python_version: 3.13
+app_file: app.py
+pinned: false
+short_description: "Sharp Monocular View Synthesis in Less Than a Second"
+models:
+  - apple/Sharp
+startup_duration_timeout: 1h
+preload_from_hub:
+  - apple/Sharp sharp_2572gikvuh.pt
+---

app.py ADDED Viewed

	@@ -0,0 +1,500 @@

+"""SHARP Gradio demo (minimal, responsive UI).
+This Space:
+- Runs Apple's SHARP model to predict a 3D Gaussian scene from a single image.
+- Exports a canonical `.ply` file for download.
+- Optionally renders a camera trajectory `.mp4` (CUDA / ZeroGPU only).
+Precompiled examples
+Place precompiled examples under `assets/examples/`.
+Recommended structure (matching stem):
+  assets/examples/<name>.jpg|png|webp
+  assets/examples/<name>.mp4
+  assets/examples/<name>.ply
+Optional manifest (assets/examples/manifest.json):
+  [
+    {"label": "Desk", "image": "desk.jpg", "video": "desk.mp4", "ply": "desk.ply"},
+    ...
+  ]
+"""
+from __future__ import annotations
+import json
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Final
+import gradio as gr
+from model_utils import TrajectoryType, predict_and_maybe_render_gpu
+# -----------------------------------------------------------------------------
+# Paths & constants
+# -----------------------------------------------------------------------------
+APP_DIR: Final[Path] = Path(__file__).resolve().parent
+OUTPUTS_DIR: Final[Path] = APP_DIR / "outputs"
+ASSETS_DIR: Final[Path] = APP_DIR / "assets"
+EXAMPLES_DIR: Final[Path] = ASSETS_DIR / "examples"
+IMAGE_EXTS: Final[tuple[str, ...]] = (".png", ".jpg", ".jpeg", ".webp")
+DEFAULT_QUEUE_MAX_SIZE: Final[int] = 32
+THEME: Final = gr.themes.Soft(
+    primary_hue="indigo",
+    secondary_hue="blue",
+    neutral_hue="slate",
+)
+CSS: Final[str] = """
+/* Keep layout stable when scrollbars appear/disappear */
+html { scrollbar-gutter: stable; }
+/* Use normal document flow (no fixed-height viewport shell) */
+html, body { height: auto; }
+body { overflow: auto; }
+/* Comfortable max width; still fills small screens */
+.gradio-container {
+  max-width: 1400px;
+  margin: 0 auto;
+  padding: 0.75rem 1rem 1rem;
+  box-sizing: border-box;
+}
+/* Make media components responsive without stretching */
+#run-image, #run-video,
+#examples-image, #examples-video {
+  width: 100%;
+}
+/* Keep aspect ratio and prevent runaway vertical growth on tall viewports */
+#run-image img, #examples-image img {
+  width: 100%;
+  height: auto;
+  max-height: 70vh;
+  object-fit: contain;
+}
+#run-video video, #examples-video video {
+  width: 100%;
+  height: auto;
+  max-height: 70vh;
+  object-fit: contain;
+}
+/* On very small screens, reduce max media height a bit */
+@media (max-width: 640px) {
+  #run-image img, #examples-image img,
+  #run-video video, #examples-video video {
+    max-height: 55vh;
+  }
+}
+/* Reduce extra whitespace in markdown blocks */
+.gr-markdown > :first-child { margin-top: 0 !important; }
+.gr-markdown > :last-child { margin-bottom: 0 !important; }
+"""
+# -----------------------------------------------------------------------------
+# Helpers
+# -----------------------------------------------------------------------------
+def _ensure_dir(path: Path) -> Path:
+    path.mkdir(parents=True, exist_ok=True)
+    return path
+@dataclass(frozen=True, slots=True)
+class ExampleSpec:
+    """A precompiled example bundle (image + optional mp4 + optional ply)."""
+    label: str
+    image: Path
+    video: Path | None
+    ply: Path | None
+def _normalize_key(path: str) -> str:
+    """Normalize a path-like string for stable dictionary keys."""
+    try:
+        return str(Path(path).resolve())
+    except Exception:
+        return path
+def _load_manifest(manifest_path: Path) -> list[dict]:
+    """Load manifest.json if present; return an empty list on errors."""
+    try:
+        data = json.loads(manifest_path.read_text(encoding="utf-8"))
+        if not isinstance(data, list):
+            raise ValueError("manifest.json must contain a JSON list.")
+        return [x for x in data if isinstance(x, dict)]
+    except FileNotFoundError:
+        return []
+    except Exception as e:
+        # Manifest errors should not crash the app.
+        print(f"[examples] Failed to parse manifest.json: {type(e).__name__}: {e}")
+        return []
+def discover_examples(examples_dir: Path) -> list[ExampleSpec]:
+    """Discover example bundles under assets/examples/."""
+    _ensure_dir(examples_dir)
+    manifest_rows = _load_manifest(examples_dir / "manifest.json")
+    if manifest_rows:
+        specs: list[ExampleSpec] = []
+        for row in manifest_rows:
+            label = str(row.get("label") or "Example").strip() or "Example"
+            image_rel = row.get("image")
+            if not image_rel:
+                continue
+            image = (examples_dir / str(image_rel)).resolve()
+            if not image.exists():
+                continue
+            video = None
+            ply = None
+            if row.get("video"):
+                v = (examples_dir / str(row["video"])).resolve()
+                if v.exists():
+                    video = v
+            if row.get("ply"):
+                p = (examples_dir / str(row["ply"])).resolve()
+                if p.exists():
+                    ply = p
+            specs.append(ExampleSpec(label=label, image=image, video=video, ply=ply))
+        return specs
+    # Fallback: infer bundles by filename stem
+    images: list[Path] = []
+    for ext in IMAGE_EXTS:
+        images.extend(sorted(examples_dir.glob(f"*{ext}")))
+    specs = []
+    for img in images:
+        stem = img.stem
+        video = examples_dir / f"{stem}.mp4"
+        ply = examples_dir / f"{stem}.ply"
+        specs.append(
+            ExampleSpec(
+                label=stem.replace("_", " ").strip() or stem,
+                image=img.resolve(),
+                video=video.resolve() if video.exists() else None,
+                ply=ply.resolve() if ply.exists() else None,
+            )
+        )
+    return specs
+_ensure_dir(OUTPUTS_DIR)
+EXAMPLE_SPECS: Final[list[ExampleSpec]] = discover_examples(EXAMPLES_DIR)
+EXAMPLE_INDEX_BY_PATH: Final[dict[str, ExampleSpec]] = {
+    _normalize_key(str(s.image)): s for s in EXAMPLE_SPECS
+}
+EXAMPLE_INDEX_BY_NAME: Final[dict[str, ExampleSpec]] = {
+    s.image.name: s for s in EXAMPLE_SPECS
+}
+def load_example_assets(
+    image_path: str | None,
+) -> tuple[str | None, str | None, str | None, str]:
+    """Return (image, video, ply_path, status) for the selected example image."""
+    if not image_path:
+        return None, None, None, "No example selected."
+    spec = EXAMPLE_INDEX_BY_PATH.get(_normalize_key(image_path))
+    if spec is None:
+        spec = EXAMPLE_INDEX_BY_NAME.get(Path(image_path).name)
+    if spec is None:
+        return image_path, None, None, "No matching example bundle found."
+    video = str(spec.video) if spec.video is not None else None
+    ply_path = str(spec.ply) if spec.ply is not None else None
+    missing: list[str] = []
+    if video is None:
+        missing.append("MP4")
+    if ply_path is None:
+        missing.append("PLY")
+    msg = f"Loaded example: **{spec.label}**."
+    if missing:
+        msg += f" Missing: {', '.join(missing)}."
+    return str(spec.image), video, ply_path, msg
+def _validate_image(image_path: str | None) -> None:
+    if not image_path:
+        raise gr.Error("Upload an image first.")
+def run_sharp(
+    image_path: str | None,
+    trajectory_type: TrajectoryType,
+    output_long_side: int,
+    num_frames: int,
+    fps: int,
+    render_video: bool,
+) -> tuple[str | None, str | None, str]:
+    """Run SHARP inference and return (video_path, ply_path, status_markdown)."""
+    _validate_image(image_path)
+    out_long_side: int | None = (
+        None if int(output_long_side) <= 0 else int(output_long_side)
+    )
+    try:
+        video_path, ply_path = predict_and_maybe_render_gpu(
+            image_path,
+            trajectory_type=trajectory_type,
+            num_frames=int(num_frames),
+            fps=int(fps),
+            output_long_side=out_long_side,
+            render_video=bool(render_video),
+        )
+        lines: list[str] = [f"**PLY:** `{ply_path.name}` (ready to download)"]
+        if render_video:
+            if video_path is None:
+                lines.append("**Video:** not rendered (CUDA unavailable).")
+            else:
+                lines.append(f"**Video:** `{video_path.name}`")
+        else:
+            lines.append("**Video:** disabled.")
+        return (
+            str(video_path) if video_path is not None else None,
+            str(ply_path),
+            "\n".join(lines),
+        )
+    except gr.Error:
+        raise
+    except Exception as e:
+        raise gr.Error(f"SHARP failed: {type(e).__name__}: {e}") from e
+# -----------------------------------------------------------------------------
+# UI
+# -----------------------------------------------------------------------------
+def build_demo() -> gr.Blocks:
+    with gr.Blocks(
+        title="SHARP • Single-Image 3D Gaussian Prediction",
+        elem_id="sharp-root",
+        fill_height=True,
+    ) as demo:
+        gr.Markdown("## SHARP\nSingle-image **3D Gaussian scene** prediction.")
+        # Run tab components are referenced by Examples tab, so keep them in outer scope.
+        with gr.Column(elem_id="tabs-shell"):
+            with gr.Tabs():
+                with gr.Tab("Run", id="run"):
+                    with gr.Column(elem_id="run-panel"):
+                        with gr.Row(equal_height=True, elem_id="run-media-row"):
+                            with gr.Column(
+                                scale=5, min_width=360, elem_id="run-left-col"
+                            ):
+                                image_in = gr.Image(
+                                    label="Input image",
+                                    type="filepath",
+                                    sources=["upload"],
+                                    elem_id="run-image",
+                                )
+                                with gr.Row():
+                                    trajectory = gr.Dropdown(
+                                        label="Trajectory",
+                                        choices=[
+                                            "swipe",
+                                            "shake",
+                                            "rotate",
+                                            "rotate_forward",
+                                        ],
+                                        value="rotate_forward",
+                                    )
+                                    output_res = gr.Dropdown(
+                                        label="Output long side",
+                                        info="0 = match input",
+                                        choices=[
+                                            ("Match input", 0),
+                                            ("512", 512),
+                                            ("768", 768),
+                                            ("1024", 1024),
+                                            ("1280", 1280),
+                                            ("1536", 1536),
+                                        ],
+                                        value=0,
+                                    )
+                                with gr.Row():
+                                    frames = gr.Slider(
+                                        label="Frames",
+                                        minimum=24,
+                                        maximum=120,
+                                        step=1,
+                                        value=60,
+                                    )
+                                    fps_in = gr.Slider(
+                                        label="FPS",
+                                        minimum=8,
+                                        maximum=60,
+                                        step=1,
+                                        value=30,
+                                    )
+                                render_toggle = gr.Checkbox(
+                                    label="Render MP4 (CUDA / ZeroGPU only)",
+                                    value=True,
+                                )
+                            with gr.Column(
+                                scale=5, min_width=360, elem_id="run-right-col"
+                            ):
+                                video_out = gr.Video(
+                                    label="Trajectory video (MP4)",
+                                    elem_id="run-video",
+                                )
+                                with gr.Row(elem_id="run-download-row"):
+                                    ply_download = gr.DownloadButton(
+                                        label="Download PLY (.ply)",
+                                        value=None,
+                                        visible=True,
+                                        elem_id="run-ply-download",
+                                    )
+                                status_md = gr.Markdown("", elem_id="run-status")
+                        with gr.Row(elem_id="run-actions-row"):
+                            run_btn = gr.Button("Generate", variant="primary")
+                            clear_btn = gr.ClearButton(
+                                [image_in, video_out, ply_download, status_md],
+                                value="Clear",
+                            )
+                        # Ensure clearing also clears any previous download target.
+                        clear_btn.click(
+                            fn=lambda: None,
+                            outputs=[ply_download],
+                            queue=False,
+                        )
+                    run_btn.click(
+                        fn=run_sharp,
+                        inputs=[
+                            image_in,
+                            trajectory,
+                            output_res,
+                            frames,
+                            fps_in,
+                            render_toggle,
+                        ],
+                        outputs=[video_out, ply_download, status_md],
+                        api_visibility="public",
+                    )
+                with gr.Tab("Examples", id="examples"):
+                    with gr.Column(elem_id="examples-panel"):
+                        if EXAMPLE_SPECS:
+                            gr.Markdown(
+                                "Click an example to preview precompiled outputs. "
+                                "The example image will also be loaded into the Run tab."
+                            )
+                            # Define preview outputs first (unrendered), so we can reference them from gr.Examples.
+                            ex_img = gr.Image(
+                                label="Example image",
+                                type="filepath",
+                                interactive=False,
+                                render=False,
+                                height=360,
+                                elem_id="examples-image",
+                            )
+                            ex_vid = gr.Video(
+                                label="Pre-rendered MP4",
+                                render=False,
+                                height=360,
+                                elem_id="examples-video",
+                            )
+                            ex_ply = gr.DownloadButton(
+                                label="Download PLY (.ply)",
+                                value=None,
+                                visible=True,
+                                render=False,
+                                elem_id="examples-ply-download",
+                            )
+                            ex_status = gr.Markdown(
+                                render=False, elem_id="examples-status"
+                            )
+                            with gr.Row(equal_height=True):
+                                with gr.Column(scale=4, min_width=320):
+                                    gr.Examples(
+                                        examples=[
+                                            [str(s.image)] for s in EXAMPLE_SPECS
+                                        ],
+                                        example_labels=[s.label for s in EXAMPLE_SPECS],
+                                        inputs=[image_in],
+                                        outputs=[ex_img, ex_vid, ex_ply, ex_status],
+                                        fn=load_example_assets,
+                                        cache_examples=False,
+                                        run_on_click=True,
+                                        examples_per_page=10,
+                                        label=None,
+                                    )
+                                with gr.Column(scale=6, min_width=360):
+                                    ex_img.render()
+                                    ex_vid.render()
+                                    ex_ply.render()
+                                    ex_status.render()
+                                    gr.Markdown(
+                                        "Add example bundles under `assets/examples/` "
+                                        "(image + mp4 + ply) or provide a `manifest.json`."
+                                    )
+                        else:
+                            gr.Markdown(
+                                "No precompiled examples found.\n\n"
+                                "Add files under `assets/examples/`:\n"
+                                "- `example.jpg` (or png/webp)\n"
+                                "- `example.mp4`\n"
+                                "- `example.ply`\n\n"
+                                "Optionally add `assets/examples/manifest.json` to define labels and filenames."
+                            )
+                with gr.Tab("About", id="about"):
+                    with gr.Column(elem_id="about-panel"):
+                        gr.Markdown(
+                            """
+*Sharp Monocular View Synthesis in Less Than a Second* (Apple, 2025)
+```bibtex
+@inproceedings{Sharp2025:arxiv,
+  title      = {Sharp Monocular View Synthesis in Less Than a Second},
+  author     = {Lars Mescheder and Wei Dong and Shiwei Li and Xuyang Bai and Marcel Santos and Peiyun Hu and Bruno Lecouat and Mingmin Zhen and Ama\\"{e}l Delaunoyand Tian Fang and Yanghai Tsin and Stephan R. Richter and Vladlen Koltun},
+  journal    = {arXiv preprint arXiv:2512.10685},
+  year       = {2025},
+  url        = {https://arxiv.org/abs/2512.10685},
+}
+```
+                            """.strip()
+                        )
+        demo.queue(max_size=DEFAULT_QUEUE_MAX_SIZE, default_concurrency_limit=1)
+        return demo
+demo = build_demo()
+if __name__ == "__main__":
+    demo.launch(theme=THEME, css=CSS)

assets/examples/Booster_train_balanced_Bathroom_camera_00_im0_png_00000_0000-0001.jpg ADDED Viewed

Git LFS Details

SHA256: 819880be5ee569c066aac4f20b5cb08c450c683eda7e188981b8f30bf25cfd72
Pointer size: 131 Bytes
Size of remote file: 137 kB

assets/examples/Booster_train_balanced_Bathroom_camera_00_im0_png_00000_0000-0001.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:23946e8345738bec5052c11ef259490e8fa003a9f0c87c5cae4b0434d6b0b211
+size 506496

assets/examples/Booster_train_balanced_Bathroom_camera_00_im0_png_00000_0000-0001.ply ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:54d28194b0ae41fd2a2f09b07de28d2305c5181b0664cd25ce29f9e114ba2ea8
+size 66061086

assets/examples/ETH3D_courtyard_00000_0000-0001.jpg ADDED Viewed

Git LFS Details

SHA256: 806be6fcaa6064a7a880835e20aafa4b509fa4d2dee42c7b4d58631f0bed1cd5
Pointer size: 131 Bytes
Size of remote file: 261 kB

assets/examples/ETH3D_courtyard_00000_0000-0001.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a5bdbcc32493174b684aff6b2ab0701f4c037e40929991948a379c9d7c323792
+size 538810

assets/examples/ETH3D_courtyard_00000_0000-0001.ply ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:282fe4719d2822ea66cf3ab02160ec6bc030c7a68bff8849033d679a9d522438
+size 66061086

assets/examples/Middlebury_49b2bcfdd9_000_0000-0001.jpg ADDED Viewed

Git LFS Details

SHA256: 2859bebdf3948c9959b27dabe50b3b2561a77aa813115d32422fb6b7ba964c9f
Pointer size: 131 Bytes
Size of remote file: 202 kB

assets/examples/Middlebury_49b2bcfdd9_000_0000-0001.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b44afd2cf1fffdb3a3b00840461b17bbdf44036a6fed85df6fd69da6db6d256
+size 573636

assets/examples/Middlebury_49b2bcfdd9_000_0000-0001.ply ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:109b98f9262368a69c55bb5771771c887576460efdd1c638924501b3170c35e7
+size 66061086

assets/examples/ScanNetPP_09c1414f1b_00000_0000-0001.jpg ADDED Viewed

Git LFS Details

SHA256: c65657e2b5bcbeb82e4ec37110fc72ae966e8becdbc1d52cee7b841889ef7d40
Pointer size: 131 Bytes
Size of remote file: 159 kB

assets/examples/ScanNetPP_09c1414f1b_00000_0000-0001.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:05b6cc0f5fb874474fe40ae7ce5af4326608c5f71a2bef0be6b9ab9041dec6e8
+size 449544

assets/examples/ScanNetPP_09c1414f1b_00000_0000-0001.ply ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6ac8a893e652a3bce88b6dbface6effaea183d0fe9a8ebf2bc4ce0385738c74a
+size 66061086

assets/examples/TanksAndTemples_Church_00022_0000-0002.jpg ADDED Viewed

Git LFS Details

SHA256: 5043ccde7283bf2367911ef5648ba76630ef967d6776107f47c73bb691a7e1e7
Pointer size: 131 Bytes
Size of remote file: 243 kB

assets/examples/TanksAndTemples_Church_00022_0000-0002.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2270c0185e91fa2222190c5a5546c14b07b806071538c915b2c0aa7fbe6d90be
+size 592290

assets/examples/TanksAndTemples_Church_00022_0000-0002.ply ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f859a1e39d6fd1d9e69a74718f3872e722744ca61260ad7bac1d5c20ad7401ee
+size 66061086

assets/examples/Unsplash_-591oIJnyEQ_0000-0001.jpg ADDED Viewed

Git LFS Details

SHA256: 5c6a626653a6aebe761d9ee02fed8942346f49b1b8ecd7d577c2012719469eb0
Pointer size: 131 Bytes
Size of remote file: 178 kB

assets/examples/Unsplash_-591oIJnyEQ_0000-0001.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:65f9f31b3a4a8a13e50b5133f16805e034582fad81f9125bed64eea5abf9b894
+size 546457

assets/examples/Unsplash_-591oIJnyEQ_0000-0001.ply ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8593a2fc8f6e60f6bcbca2d4d52510ff972d3206e7259fac0d46a49383398296
+size 66061086

assets/examples/Unsplash_SharpPaperVideo_-B_lu05yfgE_0000-0001.jpg ADDED Viewed

Git LFS Details

SHA256: 0a11d10f782c50a0213dc41e5aaae53e8d06a4ceebdfad28476d7b2cb7a0a647
Pointer size: 131 Bytes
Size of remote file: 233 kB

assets/examples/Unsplash_SharpPaperVideo_-B_lu05yfgE_0000-0001.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0be43c1aa7d4295c670aef5f4cb99691cd1a66f04dc7cb41ef11f23c72c46b3c
+size 616789

assets/examples/Unsplash_SharpPaperVideo_-B_lu05yfgE_0000-0001.ply ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0205454aef8b65c7b48d9598cfd4bf963f68e4032e87f00b8fb2da6707965679
+size 66061086

assets/examples/WildRGBD_TV_scene_000_00028_0000-0002.jpg ADDED Viewed

assets/examples/WildRGBD_TV_scene_000_00028_0000-0002.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c7745960cbfb1218a5a41fa58f801de922452c53b330230c075981ee83058404
+size 113720

assets/examples/WildRGBD_TV_scene_000_00028_0000-0002.ply ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4bc4951045bdc51ddfdd4f7d874deebc1e99e32f6d29e3caa2c13f805f27aac9
+size 66061086

assets/examples/manifest.json ADDED Viewed

	@@ -0,0 +1,11 @@

+  [
+    {"label": "Desk", "image": "desk.jpg", "video": "desk.mp4", "ply": "desk.ply"},
+    {"label": "Desk", "image": "desk.jpg", "video": "desk.mp4", "ply": "desk.ply"},
+    {"label": "Desk", "image": "desk.jpg", "video": "desk.mp4", "ply": "desk.ply"},
+    {"label": "Desk", "image": "desk.jpg", "video": "desk.mp4", "ply": "desk.ply"},
+    {"label": "Desk", "image": "desk.jpg", "video": "desk.mp4", "ply": "desk.ply"},
+    {"label": "Desk", "image": "desk.jpg", "video": "desk.mp4", "ply": "desk.ply"},
+    {"label": "Desk", "image": "desk.jpg", "video": "desk.mp4", "ply": "desk.ply"},
+    {"label": "Desk", "image": "desk.jpg", "video": "desk.mp4", "ply": "desk.ply"},
+    {"label": "Desk", "image": "desk.jpg", "video": "desk.mp4", "ply": "desk.ply"}
+  ]

model_utils.py ADDED Viewed

	@@ -0,0 +1,612 @@

+"""SHARP inference + optional CUDA video rendering utilities.
+Design goals:
+- Reuse SHARP's own predict/render pipeline (no subprocess calls).
+- Be robust on Hugging Face Spaces + ZeroGPU.
+- Cache model weights and predictor construction across requests.
+Public API (used by the Gradio app):
+- TrajectoryType
+- predict_and_maybe_render_gpu(...)
+"""
+from __future__ import annotations
+import os
+import threading
+import time
+import uuid
+from contextlib import contextmanager
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Final, Literal
+import torch
+try:
+    import spaces
+except Exception:  # pragma: no cover
+    spaces = None  # type: ignore[assignment]
+try:
+    # Prefer HF cache / Hub downloads (works with Spaces `preload_from_hub`).
+    from huggingface_hub import hf_hub_download, try_to_load_from_cache
+except Exception:  # pragma: no cover
+    hf_hub_download = None  # type: ignore[assignment]
+    try_to_load_from_cache = None  # type: ignore[assignment]
+from sharp.cli.predict import DEFAULT_MODEL_URL, predict_image
+from sharp.cli.render import render_gaussians as sharp_render_gaussians
+from sharp.models import PredictorParams, create_predictor
+from sharp.utils import camera, io
+from sharp.utils.gaussians import Gaussians3D, SceneMetaData, save_ply
+from sharp.utils.gsplat import GSplatRenderer
+TrajectoryType = Literal["swipe", "shake", "rotate", "rotate_forward"]
+# -----------------------------------------------------------------------------
+# Helpers
+# -----------------------------------------------------------------------------
+def _now_ms() -> int:
+    return int(time.time() * 1000)
+def _ensure_dir(path: Path) -> Path:
+    path.mkdir(parents=True, exist_ok=True)
+    return path
+def _make_even(x: int) -> int:
+    return x if x % 2 == 0 else x + 1
+def _select_device(preference: str = "auto") -> torch.device:
+    """Select the best available device for inference (CPU/CUDA/MPS)."""
+    if preference not in {"auto", "cpu", "cuda", "mps"}:
+        raise ValueError("device preference must be one of: auto|cpu|cuda|mps")
+    if preference == "cpu":
+        return torch.device("cpu")
+    if preference == "cuda":
+        return torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    if preference == "mps":
+        return torch.device("mps" if torch.backends.mps.is_available() else "cpu")
+    # auto
+    if torch.cuda.is_available():
+        return torch.device("cuda")
+    if torch.backends.mps.is_available():
+        return torch.device("mps")
+    return torch.device("cpu")
+# -----------------------------------------------------------------------------
+# Prediction outputs
+# -----------------------------------------------------------------------------
+@dataclass(frozen=True, slots=True)
+class PredictionOutputs:
+    """Outputs of SHARP inference (plus derived metadata for rendering)."""
+    ply_path: Path
+    gaussians: Gaussians3D
+    metadata_for_render: SceneMetaData
+    input_resolution_hw: tuple[int, int]
+    focal_length_px: float
+# -----------------------------------------------------------------------------
+# Patch SHARP VideoWriter to properly close the optional depth writer
+# -----------------------------------------------------------------------------
+class _PatchedVideoWriter(io.VideoWriter):
+    """Ensure depth writer is closed so files can be safely cleaned up."""
+    def __init__(
+        self, output_path: Path, fps: float = 30.0, render_depth: bool = True
+    ) -> None:
+        super().__init__(output_path, fps=fps, render_depth=render_depth)
+        # Ensure attribute exists for downstream code paths.
+        if not hasattr(self, "depth_writer"):
+            self.depth_writer = None  # type: ignore[attribute-defined-outside-init]
+    def close(self):
+        super().close()
+        depth_writer = getattr(self, "depth_writer", None)
+        try:
+            if depth_writer is not None:
+                depth_writer.close()
+        except Exception:
+            pass
+@contextmanager
+def _patched_sharp_videowriter():
+    """Temporarily patch `sharp.utils.io.VideoWriter` used by `sharp.cli.render`."""
+    original = io.VideoWriter
+    io.VideoWriter = _PatchedVideoWriter  # type: ignore[assignment]
+    try:
+        yield
+    finally:
+        io.VideoWriter = original  # type: ignore[assignment]
+# -----------------------------------------------------------------------------
+# Model wrapper
+# -----------------------------------------------------------------------------
+class ModelWrapper:
+    """Cached SHARP model wrapper for Gradio/Spaces."""
+    def __init__(
+        self,
+        *,
+        outputs_dir: str | Path = "outputs",
+        checkpoint_url: str = DEFAULT_MODEL_URL,
+        checkpoint_path: str | Path | None = None,
+        device_preference: str = "auto",
+        keep_model_on_device: bool | None = None,
+        hf_repo_id: str | None = None,
+        hf_filename: str | None = None,
+        hf_revision: str | None = None,
+    ) -> None:
+        self.outputs_dir = _ensure_dir(Path(outputs_dir))
+        self.checkpoint_url = checkpoint_url
+        env_ckpt = os.getenv("SHARP_CHECKPOINT_PATH") or os.getenv("SHARP_CHECKPOINT")
+        if checkpoint_path:
+            self.checkpoint_path = Path(checkpoint_path)
+        elif env_ckpt:
+            self.checkpoint_path = Path(env_ckpt)
+        else:
+            self.checkpoint_path = None
+        # Optional Hugging Face Hub fallback (useful when direct CDN download fails).
+        self.hf_repo_id = hf_repo_id or os.getenv("SHARP_HF_REPO_ID", "apple/Sharp")
+        self.hf_filename = hf_filename or os.getenv(
+            "SHARP_HF_FILENAME", "sharp_2572gikvuh.pt"
+        )
+        self.hf_revision = hf_revision or os.getenv("SHARP_HF_REVISION") or None
+        self.device_preference = device_preference
+        # For ZeroGPU, it's safer to not keep large tensors on CUDA across calls.
+        if keep_model_on_device is None:
+            keep_env = (
+                os.getenv("SHARP_KEEP_MODEL_ON_DEVICE")
+            )
+            self.keep_model_on_device = keep_env == "1"
+        else:
+            self.keep_model_on_device = keep_model_on_device
+        self._lock = threading.RLock()
+        self._predictor: torch.nn.Module | None = None
+        self._predictor_device: torch.device | None = None
+        self._state_dict: dict | None = None
+    def has_cuda(self) -> bool:
+        return torch.cuda.is_available()
+    def _load_state_dict(self) -> dict:
+        with self._lock:
+            if self._state_dict is not None:
+                return self._state_dict
+            # 1) Explicit local checkpoint path
+            if self.checkpoint_path is not None:
+                try:
+                    self._state_dict = torch.load(
+                        self.checkpoint_path,
+                        weights_only=True,
+                        map_location="cpu",
+                    )
+                    return self._state_dict
+                except Exception as e:
+                    raise RuntimeError(
+                        "Failed to load SHARP checkpoint from local path.\n\n"
+                        f"Path:\n  {self.checkpoint_path}\n\n"
+                        f"Original error:\n  {type(e).__name__}: {e}"
+                    ) from e
+            # 2) HF cache (no-network): best match for Spaces `preload_from_hub`.
+            hf_cache_error: Exception | None = None
+            if try_to_load_from_cache is not None:
+                try:
+                    cached = try_to_load_from_cache(
+                        repo_id=self.hf_repo_id,
+                        filename=self.hf_filename,
+                        revision=self.hf_revision,
+                        repo_type="model",
+                    )
+                except TypeError:
+                    cached = try_to_load_from_cache(self.hf_repo_id, self.hf_filename)  # type: ignore[misc]
+                try:
+                    if isinstance(cached, str) and Path(cached).exists():
+                        self._state_dict = torch.load(
+                            cached, weights_only=True, map_location="cpu"
+                        )
+                        return self._state_dict
+                except Exception as e:
+                    hf_cache_error = e
+            # 3) HF Hub download (reuse cache when available; may download otherwise).
+            hf_error: Exception | None = None
+            if hf_hub_download is not None:
+                # Attempt "local only" mode if supported (avoids network).
+                try:
+                    import inspect
+                    if "local_files_only" in inspect.signature(hf_hub_download).parameters:
+                        ckpt_path = hf_hub_download(
+                            repo_id=self.hf_repo_id,
+                            filename=self.hf_filename,
+                            revision=self.hf_revision,
+                            local_files_only=True,
+                        )
+                        if Path(ckpt_path).exists():
+                            self._state_dict = torch.load(
+                                ckpt_path, weights_only=True, map_location="cpu"
+                            )
+                            return self._state_dict
+                except Exception:
+                    pass
+                try:
+                    ckpt_path = hf_hub_download(
+                        repo_id=self.hf_repo_id,
+                        filename=self.hf_filename,
+                        revision=self.hf_revision,
+                    )
+                    self._state_dict = torch.load(
+                        ckpt_path,
+                        weights_only=True,
+                        map_location="cpu",
+                    )
+                    return self._state_dict
+                except Exception as e:
+                    hf_error = e
+            # 4) Default upstream CDN (torch hub cache). Last resort.
+            url_error: Exception | None = None
+            try:
+                self._state_dict = torch.hub.load_state_dict_from_url(
+                    self.checkpoint_url,
+                    progress=True,
+                    map_location="cpu",
+                )
+                return self._state_dict
+            except Exception as e:
+                url_error = e
+            # If we got here: all options failed.
+            hint_lines = [
+                "Failed to load SHARP checkpoint.",
+                "",
+                "Tried (in order):",
+                f"  1) HF cache (preload_from_hub): repo_id={self.hf_repo_id}, filename={self.hf_filename}, revision={self.hf_revision or 'None'}",
+                f"  2) HF Hub download: repo_id={self.hf_repo_id}, filename={self.hf_filename}, revision={self.hf_revision or 'None'}",
+                f"  3) URL (torch hub): {self.checkpoint_url}",
+                "",
+                "If network access is restricted, set a local checkpoint path:",
+                "  - SHARP_CHECKPOINT_PATH=/path/to/sharp_2572gikvuh.pt",
+                "",
+                "Original errors:",
+            ]
+            if try_to_load_from_cache is None:
+                hint_lines.append("  HF cache: huggingface_hub not installed")
+            elif hf_cache_error is not None:
+                hint_lines.append(
+                    f"  HF cache: {type(hf_cache_error).__name__}: {hf_cache_error}"
+                )
+            else:
+                hint_lines.append("  HF cache: (not found in cache)")
+            if hf_hub_download is None:
+                hint_lines.append("  HF download: huggingface_hub not installed")
+            else:
+                hint_lines.append(f"  HF download: {type(hf_error).__name__}: {hf_error}")
+            hint_lines.append(f"  URL: {type(url_error).__name__}: {url_error}")
+            raise RuntimeError("\n".join(hint_lines))
+    def _get_predictor(self, device: torch.device) -> torch.nn.Module:
+        with self._lock:
+            if self._predictor is None:
+                state_dict = self._load_state_dict()
+                predictor = create_predictor(PredictorParams())
+                predictor.load_state_dict(state_dict)
+                predictor.eval()
+                self._predictor = predictor
+                self._predictor_device = torch.device("cpu")
+            assert self._predictor is not None
+            assert self._predictor_device is not None
+            if self._predictor_device != device:
+                self._predictor.to(device)
+                self._predictor_device = device
+            return self._predictor
+    def _maybe_move_model_back_to_cpu(self) -> None:
+        if self.keep_model_on_device:
+            return
+        with self._lock:
+            if self._predictor is not None and self._predictor_device is not None:
+                if self._predictor_device.type != "cpu":
+                    self._predictor.to("cpu")
+                    self._predictor_device = torch.device("cpu")
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+    def _make_output_stem(self, input_path: Path) -> str:
+        return f"{input_path.stem}-{_now_ms()}-{uuid.uuid4().hex[:8]}"
+    def predict_to_ply(self, image_path: str | Path) -> PredictionOutputs:
+        """Run SHARP inference and export a .ply file."""
+        image_path = Path(image_path)
+        if not image_path.exists():
+            raise FileNotFoundError(f"Image does not exist: {image_path}")
+        device = _select_device(self.device_preference)
+        predictor = self._get_predictor(device)
+        image_np, _, f_px = io.load_rgb(image_path)
+        height, width = image_np.shape[:2]
+        with torch.no_grad():
+            gaussians = predict_image(predictor, image_np, f_px, device)
+        stem = self._make_output_stem(image_path)
+        ply_path = self.outputs_dir / f"{stem}.ply"
+        # save_ply expects (height, width).
+        save_ply(gaussians, f_px, (height, width), ply_path)
+        # SceneMetaData expects (width, height) for resolution.
+        metadata_for_render = SceneMetaData(
+            focal_length_px=float(f_px),
+            resolution_px=(int(width), int(height)),
+            color_space="linearRGB",
+        )
+        self._maybe_move_model_back_to_cpu()
+        return PredictionOutputs(
+            ply_path=ply_path,
+            gaussians=gaussians,
+            metadata_for_render=metadata_for_render,
+            input_resolution_hw=(int(height), int(width)),
+            focal_length_px=float(f_px),
+        )
+    def _render_video_impl(
+        self,
+        *,
+        gaussians: Gaussians3D,
+        metadata: SceneMetaData,
+        output_path: Path,
+        trajectory_type: TrajectoryType,
+        num_frames: int,
+        fps: int,
+        output_long_side: int | None,
+    ) -> Path:
+        if not torch.cuda.is_available():
+            raise RuntimeError("Rendering requires CUDA (gsplat).")
+        if num_frames < 2:
+            raise ValueError("num_frames must be >= 2")
+        if fps < 1:
+            raise ValueError("fps must be >= 1")
+        # Keep aligned with upstream CLI pipeline where possible.
+        if output_long_side is None and int(fps) == 30:
+            params = camera.TrajectoryParams(
+                type=trajectory_type,
+                num_steps=int(num_frames),
+                num_repeats=1,
+            )
+            with _patched_sharp_videowriter():
+                sharp_render_gaussians(
+                    gaussians=gaussians,
+                    metadata=metadata,
+                    params=params,
+                    output_path=output_path,
+                )
+            depth_path = output_path.with_suffix(".depth.mp4")
+            try:
+                if depth_path.exists():
+                    depth_path.unlink()
+            except Exception:
+                pass
+            return output_path
+        # Adapted pipeline for custom output resolution / FPS.
+        src_w, src_h = metadata.resolution_px
+        src_f = float(metadata.focal_length_px)
+        if output_long_side is None:
+            out_w, out_h, out_f = src_w, src_h, src_f
+        else:
+            long_side = max(src_w, src_h)
+            scale = float(output_long_side) / float(long_side)
+            out_w = _make_even(max(2, int(round(src_w * scale))))
+            out_h = _make_even(max(2, int(round(src_h * scale))))
+            out_f = src_f * scale
+        traj_params = camera.TrajectoryParams(
+            type=trajectory_type,
+            num_steps=int(num_frames),
+            num_repeats=1,
+        )
+        device = torch.device("cuda")
+        gaussians_cuda = gaussians.to(device)
+        intrinsics = torch.tensor(
+            [
+                [out_f, 0.0, (out_w - 1) / 2.0, 0.0],
+                [0.0, out_f, (out_h - 1) / 2.0, 0.0],
+                [0.0, 0.0, 1.0, 0.0],
+                [0.0, 0.0, 0.0, 1.0],
+            ],
+            device=device,
+            dtype=torch.float32,
+        )
+        cam_model = camera.create_camera_model(
+            gaussians_cuda,
+            intrinsics,
+            resolution_px=(out_w, out_h),
+            lookat_mode=traj_params.lookat_mode,
+        )
+        trajectory = camera.create_eye_trajectory(
+            gaussians_cuda,
+            traj_params,
+            resolution_px=(out_w, out_h),
+            f_px=out_f,
+        )
+        renderer = GSplatRenderer(color_space=metadata.color_space)
+        # IMPORTANT: Keep render_depth=True (avoids upstream AttributeError).
+        video_writer = _PatchedVideoWriter(output_path, fps=float(fps), render_depth=True)
+        for eye_position in trajectory:
+            cam_info = cam_model.compute(eye_position)
+            rendering = renderer(
+                gaussians_cuda,
+                extrinsics=cam_info.extrinsics[None].to(device),
+                intrinsics=cam_info.intrinsics[None].to(device),
+                image_width=cam_info.width,
+                image_height=cam_info.height,
+            )
+            color = (rendering.color[0].permute(1, 2, 0) * 255.0).to(dtype=torch.uint8)
+            depth = rendering.depth[0]
+            video_writer.add_frame(color, depth)
+        video_writer.close()
+        depth_path = output_path.with_suffix(".depth.mp4")
+        try:
+            if depth_path.exists():
+                depth_path.unlink()
+        except Exception:
+            pass
+        return output_path
+    def render_video(
+        self,
+        *,
+        gaussians: Gaussians3D,
+        metadata: SceneMetaData,
+        output_stem: str,
+        trajectory_type: TrajectoryType = "rotate_forward",
+        num_frames: int = 60,
+        fps: int = 30,
+        output_long_side: int | None = None,
+    ) -> Path:
+        """Render a camera trajectory as an MP4 (CUDA-only)."""
+        output_path = self.outputs_dir / f"{output_stem}.mp4"
+        return self._render_video_impl(
+            gaussians=gaussians,
+            metadata=metadata,
+            output_path=output_path,
+            trajectory_type=trajectory_type,
+            num_frames=num_frames,
+            fps=fps,
+            output_long_side=output_long_side,
+        )
+    def predict_and_maybe_render(
+        self,
+        image_path: str | Path,
+        *,
+        trajectory_type: TrajectoryType,
+        num_frames: int,
+        fps: int,
+        output_long_side: int | None,
+        render_video: bool = True,
+    ) -> tuple[Path | None, Path]:
+        """One-shot helper for the UI: returns (video_path, ply_path)."""
+        pred = self.predict_to_ply(image_path)
+        if not render_video:
+            return None, pred.ply_path
+        if not torch.cuda.is_available():
+            return None, pred.ply_path
+        output_stem = pred.ply_path.with_suffix("").name
+        video_path = self.render_video(
+            gaussians=pred.gaussians,
+            metadata=pred.metadata_for_render,
+            output_stem=output_stem,
+            trajectory_type=trajectory_type,
+            num_frames=num_frames,
+            fps=fps,
+            output_long_side=output_long_side,
+        )
+        return video_path, pred.ply_path
+# -----------------------------------------------------------------------------
+# ZeroGPU entrypoints
+# -----------------------------------------------------------------------------
+#
+# IMPORTANT: Do NOT decorate bound instance methods with `@spaces.GPU` on ZeroGPU.
+# The wrapper uses multiprocessing queues and pickles args/kwargs. If `self` is
+# included, Python will try to pickle the whole instance. ModelWrapper contains
+# a threading.RLock (not pickleable) and the model itself should not be pickled.
+#
+# Expose module-level functions that accept only pickleable arguments and
+# create/cache the ModelWrapper inside the GPU worker process.
+DEFAULT_OUTPUTS_DIR: Final[Path] = _ensure_dir(Path(__file__).resolve().parent / "outputs")
+_GLOBAL_MODEL: ModelWrapper | None = None
+_GLOBAL_MODEL_INIT_LOCK: Final[threading.Lock] = threading.Lock()
+def get_global_model(*, outputs_dir: str | Path = DEFAULT_OUTPUTS_DIR) -> ModelWrapper:
+    global _GLOBAL_MODEL
+    with _GLOBAL_MODEL_INIT_LOCK:
+        if _GLOBAL_MODEL is None:
+            _GLOBAL_MODEL = ModelWrapper(outputs_dir=outputs_dir)
+    return _GLOBAL_MODEL
+def predict_and_maybe_render(
+    image_path: str | Path,
+    *,
+    trajectory_type: TrajectoryType,
+    num_frames: int,
+    fps: int,
+    output_long_side: int | None,
+    render_video: bool = True,
+) -> tuple[Path | None, Path]:
+    model = get_global_model()
+    return model.predict_and_maybe_render(
+        image_path,
+        trajectory_type=trajectory_type,
+        num_frames=num_frames,
+        fps=fps,
+        output_long_side=output_long_side,
+        render_video=render_video,
+    )
+# Export the GPU-wrapped callable (or a no-op wrapper locally).
+if spaces is not None:
+    predict_and_maybe_render_gpu = spaces.GPU(duration=180)(predict_and_maybe_render)
+else:  # pragma: no cover
+    predict_and_maybe_render_gpu = predict_and_maybe_render

pyproject.toml ADDED Viewed

	@@ -0,0 +1,23 @@

+[project]
+name = "ml-sharp"
+version = "1.0.0"
+description = "Sharp Monocular View Synthesis in Less Than a Second"
+readme = "README.md"
+requires-python = ">=3.13"
+dependencies = [
+    "gradio==6.1.0",
+    "huggingface-hub>=1.2.3",
+    "sharp",
+    "spaces==0.44.0",
+    "torch>=2.9.1",
+    "torchvision>=0.24.1",
+]
+[tool.uv.sources]
+sharp = { git = "https://github.com/apple/ml-sharp.git", rev = "cdb4ddc6796402bee5487c7312260f2edd8bd5f0" }
+[dependency-groups]
+dev = [
+    "hf>=1.2.3",
+    "ruff>=0.14.9",
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio==6.1.0
+spaces==0.44.0
+huggingface_hub>=1.2.3
+torch
+torchvision
+sharp @ git+https://github.com/apple/ml-sharp.git@cdb4ddc6796402bee5487c7312260f2edd8bd5f0

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff