Spaces:

PD03
/

SOP

Sleeping

App Files Files Community

PD03 commited on Sep 4

Commit

840ed4e

verified ·

1 Parent(s): 748249e

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -20

app.py CHANGED Viewed

@@ -21,22 +21,26 @@ def _round_df(df: pd.DataFrame, places: int = 2) -> pd.DataFrame:
     return out
 # ---------- Tool 1: Forecast ----------
 @tool
-def forecast_tool(horizon_months: int = 1, use_demo: bool = True, history_csv_path: str = "") -> str:
     """
-    Forecast monthly demand for finished goods using Prophet (demo-friendly).
     Args:
-        horizon_months (int): Number of future months to forecast (>=1). Defaults to 1.
-        use_demo (bool): If True, generate synthetic history for FG100/FG200. Defaults to True.
-        history_csv_path (str): Optional CSV path with columns [product_id,date,qty] to override demo.
     Returns:
-        str: JSON string list of {"product_id": str, "period_start": "YYYY-MM-01", "forecast_qty": float}.
     """
-    from prophet import Prophet
-    # 1) History
     if use_demo or not history_csv_path:
         rng = pd.date_range("2023-01-01", periods=24, freq="MS")
         rows = []
@@ -49,22 +53,75 @@ def forecast_tool(horizon_months: int = 1, use_demo: bool = True, history_csv_pa
         df = pd.DataFrame(rows)
     else:
         df = pd.read_csv(history_csv_path)
-        assert {"product_id", "date", "qty"} <= set(df.columns), "CSV must have product_id,date,qty"
         df["date"] = pd.to_datetime(df["date"], errors="coerce")
         df = df.dropna(subset=["date"])
         df["qty"] = pd.to_numeric(df["qty"], errors="coerce").fillna(0.0)
-    # 2) Forecast per product
-    out = []
-    H = max(1, int(horizon_months))
     for pid, g in df.groupby("product_id"):
-        s = (g.set_index("date")["qty"].resample("MS").sum().asfreq("MS").fillna(0.0))
-        m = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False, n_changepoints=10)
-        m.fit(pd.DataFrame({"ds": s.index, "y": s.values}))
-        future = m.make_future_dataframe(periods=H, freq="MS", include_history=False)
-        pred = m.predict(future)[["ds", "yhat"]]
-        for _, r in pred.iterrows():
-            out.append({"product_id": str(pid), "period_start": r["ds"].strftime("%Y-%m-%d"), "forecast_qty": float(r["yhat"])})
     return json.dumps(out)
 # ---------- Tool 2: Optimize (LP) ----------

     return out
 # ---------- Tool 1: Forecast ----------
+from smolagents import tool
+import json, pandas as pd, numpy as np
 @tool
+def forecast_tool(horizon_months: int = 1, use_demo: bool = True, history_csv_path: str = "",
+                  use_covariates: bool = False) -> str:
     """
+    Forecast monthly demand using a GLOBAL N-HiTS model (fast & accurate).
     Args:
+        horizon_months (int): Number of future months to forecast (>=1).
+        use_demo (bool): If True, generates synthetic history for FG100/FG200.
+        history_csv_path (str): Optional CSV with columns [product_id,date,qty,(optional extra covariates...)].
+        use_covariates (bool): If True and extra numeric columns exist, use them as past covariates
+                               (for future effects you must provide future values too).
     Returns:
+        str: JSON list of {"product_id","period_start","forecast_qty"} for the next horizon_months.
     """
+    # --- 1) Load data in long form ---
     if use_demo or not history_csv_path:
         rng = pd.date_range("2023-01-01", periods=24, freq="MS")
         rows = []
         df = pd.DataFrame(rows)
     else:
         df = pd.read_csv(history_csv_path)
+        assert {"product_id","date","qty"} <= set(df.columns), "CSV must have product_id,date,qty"
         df["date"] = pd.to_datetime(df["date"], errors="coerce")
         df = df.dropna(subset=["date"])
         df["qty"] = pd.to_numeric(df["qty"], errors="coerce").fillna(0.0)
+    # Ensure proper monthly frequency per SKU
+    df = df.copy()
+    df["product_id"] = df["product_id"].astype(str)
+    # --- 2) Build Darts series (GLOBAL model across SKUs) ---
+    from darts import TimeSeries
+    series_list = []
+    past_cov_list = []  # optional
+    extra_cols = [c for c in df.columns if c not in ["product_id","date","qty"]]
+    # keep only numeric covariates (categoricals must be pre-encoded)
+    num_covs = [c for c in extra_cols if pd.api.types.is_numeric_dtype(df[c])]
     for pid, g in df.groupby("product_id"):
+        g = (g.set_index("date")
+               .sort_index()
+               .resample("MS")
+               .agg({**{"qty":"sum"}, **{c:"last" for c in num_covs}})
+               .fillna(method="ffill")
+               .fillna(0.0))
+        y = TimeSeries.from_dataframe(g.reset_index(), time_col="date", value_cols="qty", freq="MS")
+        series_list.append(y)
+        if use_covariates and num_covs:
+            pc = TimeSeries.from_dataframe(g.reset_index(), time_col="date", value_cols=num_covs, freq="MS")
+            past_cov_list.append(pc)
+        else:
+            past_cov_list.append(None)
+    # --- 3) Train N-HiTS (fast settings) ---
+    from darts.models import NHiTSModel
+    H = max(1, int(horizon_months))
+    # keep chunk length small for short histories; model is global
+    input_chunk = max(6, min(12, min(len(s) for s in series_list) - 1)) if series_list else 12
+    model = NHiTSModel(
+        input_chunk_length=input_chunk,
+        output_chunk_length=min(H, 3),   # can roll to reach H
+        n_epochs=60,                     # keep fast; tune up if needed
+        batch_size=32,
+        random_state=0,
+        dropout=0.0,
+    )
+    if use_covariates and any(pc is not None for pc in past_cov_list):
+        model.fit(series=series_list, past_covariates=past_cov_list, verbose=False)
+    else:
+        model.fit(series=series_list, verbose=False)
+    # --- 4) Predict per SKU and return JSON ---
+    out = []
+    for pid, s, pc in zip(df["product_id"].unique(), series_list, past_cov_list):
+        if use_covariates and pc is not None:
+            pred = model.predict(n=H, series=s, past_covariates=pc)
+        else:
+            pred = model.predict(n=H, series=s)
+        for ts, val in zip(pred.time_index, pred.values().flatten()):
+            out.append({
+                "product_id": str(pid),
+                "period_start": pd.Timestamp(ts).strftime("%Y-%m-%d"),
+                "forecast_qty": float(val)
+            })
     return json.dumps(out)
 # ---------- Tool 2: Optimize (LP) ----------