Spaces:

bosh94
/

chronos2-test-app

Running

App Files Files Community

bosh94 commited on 8 days ago

Commit

f79bf21

verified ·

1 Parent(s): 0411375

Update app.py

Browse files

Files changed (1) hide show

app.py +154 -66

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 import numpy as np
 import pandas as pd
 import gradio as gr
@@ -7,34 +8,35 @@ import torch
 from chronos import Chronos2Pipeline
 # =========================
 # Config
 # =========================
-MODEL_ID_DEFAULT = "amazon/chronos-2"
 DATA_DIR = "data"
 # =========================
-# Utils
 # =========================
 def available_test_csv():
     if not os.path.isdir(DATA_DIR):
         return []
-    return sorted(f for f in os.listdir(DATA_DIR) if f.endswith(".csv"))
 def pick_device(ui_choice: str) -> str:
-    if ui_choice.startswith("cuda") and torch.cuda.is_available():
         return "cuda"
     return "cpu"
 _PIPELINE = None
 _PIPELINE_META = {}
 def get_pipeline(model_id: str, device: str):
     global _PIPELINE, _PIPELINE_META
     model_id = (model_id or MODEL_ID_DEFAULT).strip()
@@ -45,44 +47,128 @@ def get_pipeline(model_id: str, device: str):
         or _PIPELINE_META.get("model_id") != model_id
         or _PIPELINE_META.get("device") != device
     ):
-        pipe = Chronos2Pipeline.from_pretrained(
-            model_id,
-            device_map=device,
-        )
-        _PIPELINE = pipe
         _PIPELINE_META = {"model_id": model_id, "device": device}
     return _PIPELINE
 def make_sample_series(n, seed, trend, season_period, season_amp, noise):
     rng = np.random.default_rng(int(seed))
     t = np.arange(int(n))
     y = (
-        trend * t
-        + season_amp * np.sin(2 * np.pi * t / max(1, int(season_period)))
-        + rng.normal(0, noise, size=len(t))
     )
-    if y.min() < 0:
-        y = y - y.min()
     return y.astype(np.float32)
 def load_series_from_csv(path_or_file, column=None):
     df = pd.read_csv(path_or_file)
-    if column is None or column.strip() == "":
         numeric_cols = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
         if not numeric_cols:
-            raise ValueError("Nessuna colonna numerica nel CSV.")
-        column = numeric_cols[0]
-    y = pd.to_numeric(df[column], errors="coerce").dropna().to_numpy()
     if len(y) < 10:
-        raise ValueError("Serie troppo corta (minimo ~10 punti).")
-    return y.astype(np.float32), column
 # =========================
 # Forecast core
@@ -99,59 +185,61 @@ def run_forecast(
     season_amp,
     noise,
     prediction_length,
-    num_samples,
     q_low,
     q_high,
     device_ui,
     model_id,
 ):
-    if q_low >= q_high:
         raise gr.Error("Quantile low deve essere < quantile high.")
     device = pick_device(device_ui)
     pipe = get_pipeline(model_id, device)
-    # -------------------------
-    # Input data
-    # -------------------------
-    if input_mode == "Test CSV" and test_csv_name:
-        path = os.path.join(DATA_DIR, test_csv_name)
-        y, used_col = load_series_from_csv(path, csv_column)
         source = f"Test CSV: {test_csv_name} ({used_col})"
-    elif input_mode == "Upload CSV" and upload_csv is not None:
         y, used_col = load_series_from_csv(upload_csv.name, csv_column)
         source = f"Upload CSV ({used_col})"
-    else:
         y = make_sample_series(n, seed, trend, season_period, season_amp, noise)
         source = "Sample data"
-    # -------------------------
-    # Forecast
-    # -------------------------
-    samples = pipe.predict(
-        inputs=y.tolist(),
         prediction_length=int(prediction_length),
-        num_samples=int(num_samples),
     )
-    samples = np.asarray(samples, dtype=np.float32)
     median = np.quantile(samples, 0.50, axis=0)
-    low = np.quantile(samples, q_low, axis=0)
-    high = np.quantile(samples, q_high, axis=0)
-    # -------------------------
     # Plot
-    # -------------------------
     t_hist = np.arange(len(y))
     t_fcst = np.arange(len(y), len(y) + int(prediction_length))
     fig, ax = plt.subplots(figsize=(10, 4))
     ax.plot(t_hist, y, label="history")
     ax.plot(t_fcst, median, label="forecast (median)")
-    ax.fill_between(t_fcst, low, high, alpha=0.25, label="confidence band")
     ax.axvline(len(y) - 1, linestyle="--", linewidth=1)
     ax.set_title(source)
     ax.set_xlabel("t")
@@ -159,15 +247,13 @@ def run_forecast(
     ax.grid(True, alpha=0.3)
     ax.legend()
-    # -------------------------
-    # Output
-    # -------------------------
     out_df = pd.DataFrame(
         {
             "t": t_fcst,
             "median": median,
-            f"q{q_low:.2f}": low,
-            f"q{q_high:.2f}": high,
         }
     )
@@ -175,22 +261,23 @@ def run_forecast(
     out_df.to_csv(out_path, index=False)
     info = {
-        "model_id": model_id,
         "device": device,
         "source": source,
-        "history_points": len(y),
-        "prediction_length": prediction_length,
-        "num_samples": num_samples,
     }
     return fig, out_df, out_path, info
 # =========================
 # UI
 # =========================
 with gr.Blocks(title="Chronos-2 • HF Spaces Demo") as demo:
-    gr.Markdown("# ⏱️ Chronos-2 Forecast Demo")
     with gr.Row():
         input_mode = gr.Radio(
@@ -208,10 +295,10 @@ with gr.Blocks(title="Chronos-2 • HF Spaces Demo") as demo:
     with gr.Row():
         test_csv_name = gr.Dropdown(
             choices=available_test_csv(),
-            label="Test CSV disponibili",
         )
         upload_csv = gr.File(label="Upload CSV", file_types=[".csv"])
-        csv_column = gr.Textbox(label="Colonna numerica (opzionale)")
     with gr.Accordion("Sample data settings", open=False):
         n = gr.Slider(60, 600, 220, step=10, label="History length")
@@ -223,7 +310,8 @@ with gr.Blocks(title="Chronos-2 • HF Spaces Demo") as demo:
     with gr.Accordion("Forecast settings", open=True):
         prediction_length = gr.Slider(1, 180, 30, step=1, label="Prediction length")
-        num_samples = gr.Slider(20, 400, 200, step=10, label="Num samples")
         q_low = gr.Slider(0.01, 0.49, 0.10, step=0.01, label="Quantile low")
         q_high = gr.Slider(0.51, 0.99, 0.90, step=0.01, label="Quantile high")
@@ -248,7 +336,7 @@ with gr.Blocks(title="Chronos-2 • HF Spaces Demo") as demo:
             season_amp,
             noise,
             prediction_length,
-            num_samples,
             q_low,
             q_high,
             device_ui,

 import os
+import inspect
 import numpy as np
 import pandas as pd
 import gradio as gr
 from chronos import Chronos2Pipeline
 # =========================
 # Config
 # =========================
+MODEL_ID_DEFAULT = os.getenv("CHRONOS_MODEL_ID", "amazon/chronos-2")
 DATA_DIR = "data"
 # =========================
+# Helpers: files & device
 # =========================
 def available_test_csv():
     if not os.path.isdir(DATA_DIR):
         return []
+    return sorted(f for f in os.listdir(DATA_DIR) if f.lower().endswith(".csv"))
 def pick_device(ui_choice: str) -> str:
+    if (ui_choice or "").startswith("cuda") and torch.cuda.is_available():
         return "cuda"
     return "cpu"
+# =========================
+# Model cache
+# =========================
 _PIPELINE = None
 _PIPELINE_META = {}
 def get_pipeline(model_id: str, device: str):
+    """
+    Caches the pipeline across calls to avoid re-downloading and re-loading.
+    """
     global _PIPELINE, _PIPELINE_META
     model_id = (model_id or MODEL_ID_DEFAULT).strip()
         or _PIPELINE_META.get("model_id") != model_id
         or _PIPELINE_META.get("device") != device
     ):
+        # Chronos-2 pipeline
+        _PIPELINE = Chronos2Pipeline.from_pretrained(model_id, device_map=device)
         _PIPELINE_META = {"model_id": model_id, "device": device}
     return _PIPELINE
+# =========================
+# Data generation/loading
+# =========================
 def make_sample_series(n, seed, trend, season_period, season_amp, noise):
     rng = np.random.default_rng(int(seed))
     t = np.arange(int(n))
     y = (
+        float(trend) * t
+        + float(season_amp) * np.sin(2 * np.pi * t / max(1, int(season_period)))
+        + rng.normal(0.0, float(noise), size=len(t))
     )
+    # shift up if negative (not required, but keeps nice plots)
+    mn = float(np.min(y))
+    if mn < 0:
+        y = y - mn
     return y.astype(np.float32)
 def load_series_from_csv(path_or_file, column=None):
     df = pd.read_csv(path_or_file)
+    if df.shape[1] == 0:
+        raise ValueError("CSV vuoto o non leggibile.")
+    col = (column or "").strip()
+    if col == "":
         numeric_cols = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
         if not numeric_cols:
+            # try coercion to numeric on all columns (sometimes dtype is object)
+            numeric_cols = []
+            for c in df.columns:
+                coerced = pd.to_numeric(df[c], errors="coerce")
+                if coerced.notna().sum() >= 10:
+                    numeric_cols.append(c)
+            if not numeric_cols:
+                raise ValueError("Nessuna colonna numerica nel CSV. Specifica una colonna con numeri.")
+        col = numeric_cols[0]
+    if col not in df.columns:
+        raise ValueError(f"Colonna '{col}' non trovata. Colonne: {list(df.columns)}")
+    y = pd.to_numeric(df[col], errors="coerce").dropna().to_numpy()
     if len(y) < 10:
+        raise ValueError("Serie troppo corta (minimo ~10 punti dopo dropna).")
+    return y.astype(np.float32), col
+# =========================
+# Chronos2 predict normalization
+# =========================
+def _extract_samples(pred_out):
+    """
+    Chronos2Pipeline.predict may return:
+      - numpy array / list -> samples
+      - dict with 'samples'
+      - object with attribute 'samples'
+    This returns np.ndarray of shape (n_draws, pred_len) or (pred_len,) if only one draw.
+    """
+    if isinstance(pred_out, np.ndarray):
+        return pred_out
+    if isinstance(pred_out, list):
+        return np.asarray(pred_out)
+    if isinstance(pred_out, dict):
+        if "samples" in pred_out:
+            return np.asarray(pred_out["samples"])
+        # sometimes "forecast" keys etc.
+        for k in ("predictions", "prediction", "outputs"):
+            if k in pred_out:
+                return np.asarray(pred_out[k])
+        return np.asarray(pred_out)
+    # object with samples attribute
+    if hasattr(pred_out, "samples"):
+        return np.asarray(getattr(pred_out, "samples"))
+    # last resort
+    return np.asarray(pred_out)
+def chronos2_predict_samples(pipe, y, prediction_length: int, n_draws: int):
+    """
+    Calls pipe.predict in a robust way across Chronos versions:
+    - Uses `inputs=` (required)
+    - Uses `num_predictions=` if supported
+    - If not supported, falls back to a single prediction and returns shape (1, pred_len)
+    """
+    sig = inspect.signature(pipe.predict)
+    params = sig.parameters
+    kwargs = {"inputs": y.tolist(), "prediction_length": int(prediction_length)}
+    # API differences: some versions accept num_predictions, others not
+    if "num_predictions" in params:
+        kwargs["num_predictions"] = int(n_draws)
+    # Some versions might have different names; try a couple safe fallbacks
+    try:
+        out = pipe.predict(**kwargs)
+    except TypeError as e:
+        # If num_predictions was rejected, retry without it
+        if "num_predictions" in kwargs:
+            kwargs.pop("num_predictions", None)
+            out = pipe.predict(**kwargs)
+        else:
+            raise e
+    samples = _extract_samples(out).astype(np.float32)
+    # Normalize shape: expected (n_draws, pred_len)
+    if samples.ndim == 1:
+        samples = samples[None, :]
+    elif samples.ndim == 2:
+        pass
+    else:
+        # If extra dims, squeeze conservatively
+        samples = np.squeeze(samples)
+        if samples.ndim == 1:
+            samples = samples[None, :]
+    return samples
 # =========================
 # Forecast core
     season_amp,
     noise,
     prediction_length,
+    num_draws,
     q_low,
     q_high,
     device_ui,
     model_id,
 ):
+    # Validate quantiles
+    if float(q_low) >= float(q_high):
         raise gr.Error("Quantile low deve essere < quantile high.")
+    # Device + pipeline
     device = pick_device(device_ui)
     pipe = get_pipeline(model_id, device)
+    # Choose input series
+    if input_mode == "Test CSV":
+        if not test_csv_name:
+            raise gr.Error("Seleziona un file nella dropdown dei Test CSV oppure usa Sample/Upload.")
+        csv_path = os.path.join(DATA_DIR, test_csv_name)
+        if not os.path.exists(csv_path):
+            raise gr.Error(f"Non trovo {csv_path}. Assicurati che esista nel repo dello Space.")
+        y, used_col = load_series_from_csv(csv_path, csv_column)
         source = f"Test CSV: {test_csv_name} ({used_col})"
+    elif input_mode == "Upload CSV":
+        if upload_csv is None:
+            raise gr.Error("Carica un CSV oppure scegli Sample/Test CSV.")
         y, used_col = load_series_from_csv(upload_csv.name, csv_column)
         source = f"Upload CSV ({used_col})"
+    else:  # Sample
         y = make_sample_series(n, seed, trend, season_period, season_amp, noise)
         source = "Sample data"
+    # Forecast samples
+    samples = chronos2_predict_samples(
+        pipe=pipe,
+        y=y,
         prediction_length=int(prediction_length),
+        n_draws=int(num_draws),
     )
+    # Quantiles
     median = np.quantile(samples, 0.50, axis=0)
+    low = np.quantile(samples, float(q_low), axis=0)
+    high = np.quantile(samples, float(q_high), axis=0)
     # Plot
     t_hist = np.arange(len(y))
     t_fcst = np.arange(len(y), len(y) + int(prediction_length))
     fig, ax = plt.subplots(figsize=(10, 4))
     ax.plot(t_hist, y, label="history")
     ax.plot(t_fcst, median, label="forecast (median)")
+    ax.fill_between(t_fcst, low, high, alpha=0.25, label=f"band [{float(q_low):.2f}, {float(q_high):.2f}]")
     ax.axvline(len(y) - 1, linestyle="--", linewidth=1)
     ax.set_title(source)
     ax.set_xlabel("t")
     ax.grid(True, alpha=0.3)
     ax.legend()
+    # Output table + CSV
     out_df = pd.DataFrame(
         {
             "t": t_fcst,
             "median": median,
+            f"q{float(q_low):.2f}": low,
+            f"q{float(q_high):.2f}": high,
         }
     )
     out_df.to_csv(out_path, index=False)
     info = {
+        "model_id": (model_id or MODEL_ID_DEFAULT),
         "device": device,
         "source": source,
+        "history_points": int(len(y)),
+        "prediction_length": int(prediction_length),
+        "requested_draws": int(num_draws),
+        "returned_draws": int(samples.shape[0]),
     }
     return fig, out_df, out_path, info
 # =========================
 # UI
 # =========================
 with gr.Blocks(title="Chronos-2 • HF Spaces Demo") as demo:
+    gr.Markdown("# ⏱️ Chronos-2 Forecast Demo (HF Spaces)\n\n"
+                "Supporta **Sample**, **Test CSV** (da cartella `data/`) e **Upload CSV**.")
     with gr.Row():
         input_mode = gr.Radio(
     with gr.Row():
         test_csv_name = gr.Dropdown(
             choices=available_test_csv(),
+            label="Test CSV disponibili (cartella data/)",
         )
         upload_csv = gr.File(label="Upload CSV", file_types=[".csv"])
+        csv_column = gr.Textbox(label="Colonna numerica (opzionale)", placeholder="es: value")
     with gr.Accordion("Sample data settings", open=False):
         n = gr.Slider(60, 600, 220, step=10, label="History length")
     with gr.Accordion("Forecast settings", open=True):
         prediction_length = gr.Slider(1, 180, 30, step=1, label="Prediction length")
+        # UI label stays "Num samples", internally treated as number of prediction draws if supported
+        num_draws = gr.Slider(1, 400, 200, step=10, label="Num samples (draws)")
         q_low = gr.Slider(0.01, 0.49, 0.10, step=0.01, label="Quantile low")
         q_high = gr.Slider(0.51, 0.99, 0.90, step=0.01, label="Quantile high")
             season_amp,
             noise,
             prediction_length,
+            num_draws,
             q_low,
             q_high,
             device_ui,