| """ |
| Shared utilities for time-based feature engineering. |
| |
| Centralises cyclical encodings for hour-of-day and day-of-year so that |
| Preprocessor, ChronosForecaster, and LLMDataEngineer use the same logic. |
| """ |
|
|
| from __future__ import annotations |
|
|
| from typing import Literal |
|
|
| import numpy as np |
| import pandas as pd |
|
|
|
|
| def add_cyclical_time_features( |
| df: pd.DataFrame, |
| timestamp_col: str | None = None, |
| index_is_timestamp: bool = False, |
| day_period: float = 365.25, |
| ) -> pd.DataFrame: |
| """ |
| Add hour_sin/hour_cos and doy_sin/doy_cos to a DataFrame. |
| |
| Parameters |
| ---------- |
| df : DataFrame |
| Input data. |
| timestamp_col : str or None |
| Column containing timestamps; if None and index_is_timestamp=True, |
| the index is used as the timestamp source. |
| index_is_timestamp : bool |
| Whether to treat the index as the timestamp source when timestamp_col |
| is None. |
| day_period : float |
| Period for day-of-year cycle (default 365.25). |
| |
| Returns |
| ------- |
| DataFrame |
| Copy of df with four extra columns: hour_sin, hour_cos, doy_sin, doy_cos. |
| """ |
| out = df.copy() |
|
|
| if timestamp_col is not None and timestamp_col in out.columns: |
| ts = pd.to_datetime(out[timestamp_col], utc=True) |
| elif index_is_timestamp and isinstance(out.index, pd.DatetimeIndex): |
| ts = out.index |
| else: |
| |
| return out |
|
|
| hour = ts.dt.hour + ts.dt.minute / 60.0 |
| doy = ts.dt.dayofyear.astype(float) |
|
|
| out["hour_sin"] = np.sin(2 * np.pi * hour / 24.0) |
| out["hour_cos"] = np.cos(2 * np.pi * hour / 24.0) |
| out["doy_sin"] = np.sin(2 * np.pi * doy / day_period) |
| out["doy_cos"] = np.cos(2 * np.pi * doy / day_period) |
| return out |
|
|
|
|