Spaces:

PD03
/

PurchaseAgent

Build error

App Files Files Community

PD03 commited on Aug 10

Commit

fad5f5f

verified ·

1 Parent(s): 3ffc475

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +451 -36

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,455 @@
-import altair as alt
 import numpy as np
 import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

+import os
+import re
+import math
+from datetime import datetime, date
+from dateutil.relativedelta import relativedelta
 import numpy as np
 import pandas as pd
+import plotly.express as px
 import streamlit as st
+# =========================
+# Theming and Page Config
+# =========================
+st.set_page_config(
+    page_title="Procurement Insight Agent",
+    page_icon="🧭",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Custom CSS for a premium look
+st.markdown("""
+<style>
+/* Global font and spacing */
+html, body, [class*="css"]  {
+  font-family: "Inter", -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
+}
+section[data-testid="stSidebar"] {
+  background: linear-gradient(180deg, #0f172a 0%, #111827 100%);
+  border-right: 1px solid #1f2937;
+}
+section[data-testid="stSidebar"] * {
+  color: #e5e7eb !important;
+}
+.block-container {
+  padding-top: 1.5rem;
+}
+.card {
+  background: #0b1220;
+  border: 1px solid #1f2937;
+  border-radius: 14px;
+  padding: 16px;
+}
+.kpi {
+  background: linear-gradient(180deg, #0b1220 0%, #0f172a 100%);
+  border: 1px solid #1f2937;
+  border-radius: 14px;
+  padding: 18px;
+}
+.kpi h3 {
+  margin: 0;
+  color: #93c5fd;
+  font-weight: 600;
+  font-size: 0.95rem;
+}
+.kpi .value {
+  margin-top: 6px;
+  font-size: 1.6rem;
+  font-weight: 700;
+  color: #e5e7eb;
+}
+.caption-note {
+  color: #94a3b8;
+  font-size: 0.85rem;
+}
+.prompt-box {
+  background: #0b1220;
+  border: 1px solid #1f2937;
+  border-radius: 12px;
+  padding: 10px 12px;
+}
+.footer-note {
+  color: #94a3b8;
+  font-size: 0.8rem;
+  text-align: center;
+  margin-top: 24px;
+}
+.stTabs [data-baseweb="tab-list"] {
+  gap: 12px;
+}
+.stTabs [data-baseweb="tab"] {
+  background-color: #0b1220;
+  border-radius: 10px 10px 0 0;
+  padding: 10px 16px;
+  color: #e5e7eb;
+  border: 1px solid #1f2937;
+}
+.stTabs [aria-selected="true"] {
+  background: linear-gradient(180deg, #0b1220 0%, #0f172a 100%) !important;
+  color: #93c5fd !important;
+}
+</style>
+""", unsafe_allow_html=True)
+# =========================
+# Synthetic Data Generator
+# =========================
+@st.cache_data(show_spinner=False)
+def generate_synthetic_procurement(seed=42, start_year=2023, end_year=2025, rows=40_000):
+    """
+    Generates synthetic procurement line items reflecting common S/4HANA Embedded Analytics procurement fields.
+    """
+    rng = np.random.default_rng(seed)
+    months = pd.date_range(f"{start_year}-01-01", f"{end_year}-12-31", freq="MS")
+    purchasing_orgs = ["1000", "2000", "3000", "4000"]
+    company_codes = ["C100", "C200", "C300"]
+    suppliers = [f"SUPP-{i:03d}" for i in range(1, 61)]
+    material_groups = ["RAW", "PACK", "SERV", "CAPEX", "MRO"]
+    currencies = ["USD", "EUR", "GBP", "INR"]
+    # Seasonality and org effects
+    def base_amount(month):
+        # Seasonality: higher in Q2/Q4
+        q = (month.month-1)//3 + 1
+        base = 1.0
+        if q in (2, 4):
+            base *= 1.2
+        if month.month in (11, 12):
+            base *= 1.1
+        return base
+    data = {
+        "CalendarMonth": rng.choice(months, size=rows),
+        "PurchasingOrganization": rng.choice(purchasing_orgs, size=rows, p=[0.35, 0.25, 0.25, 0.15]),
+        "CompanyCode": rng.choice(company_codes, size=rows, p=[0.45, 0.35, 0.20]),
+        "Supplier": rng.choice(suppliers, size=rows),
+        "MaterialGroup": rng.choice(material_groups, size=rows, p=[0.3, 0.2, 0.25, 0.15, 0.10]),
+        "Currency": rng.choice(currencies, size=rows, p=[0.5, 0.25, 0.1, 0.15]),
+    }
+    df = pd.DataFrame(data)
+    df["CalendarYear"] = df["CalendarMonth"].dt.year
+    df["Month"] = df["CalendarMonth"].dt.to_period("M").astype(str)
+    # Amount generation with org/supplier effects
+    org_factor = df["PurchasingOrganization"].map({"1000":1.2, "2000":0.9, "3000":1.0, "4000":0.8})
+    supp_strength = df["Supplier"].str[-3:].astype(int)
+    supp_factor = 0.8 + (supp_strength / 1000.0)  # small lift for higher IDs
+    seasonal = df["CalendarMonth"].apply(base_amount).astype(float)
+    base_val = rng.lognormal(mean=7.5, sigma=0.6, size=rows)  # realistic skew
+    df["NetAmount"] = (base_val * org_factor * supp_factor * seasonal).round(2)
+    # Random off-contract flag
+    df["OffContract"] = rng.choice([True, False], size=rows, p=[0.18, 0.82])
+    # For “service POs”
+    df["IsService"] = df["MaterialGroup"].eq("SERV")
+    return df
+# Load synthetic data
+df_raw = generate_synthetic_procurement()
+# ==================================
+# Helper: Natural Language to Query
+# ==================================
+def parse_prompt(prompt: str):
+    """
+    Very lightweight rules to detect:
+    - metric (po_value, off_contract, service_spend)
+    - time grain (month, quarter, year)
+    - time window (YTD, QTD, last quarter, range)
+    - grouping (Supplier, PurchasingOrganization, CompanyCode, MaterialGroup)
+    - top_n
+    """
+    text = (prompt or "").lower()
+    # Metric
+    if "off-contract" in text or "off contract" in text or "leakage" in text:
+        metric = "off_contract"
+    elif "service" in text:
+        metric = "service_spend"
+    else:
+        metric = "po_value"
+    # Grain
+    if "by month" in text or "monthly" in text:
+        grain = "month"
+    elif "by quarter" in text or "quarterly" in text or "qtr" in text:
+        grain = "quarter"
+    else:
+        grain = "month" if "trend" in text else "year"
+    # Grouping
+    group_map = {
+        "supplier": "Supplier",
+        "purchasing org": "PurchasingOrganization",
+        "purchasing organization": "PurchasingOrganization",
+        "company": "CompanyCode",
+        "companycode": "CompanyCode",
+        "material group": "MaterialGroup",
+        "material": "MaterialGroup",
+    }
+    group_by = None
+    for k, v in group_map.items():
+        if k in text:
+            group_by = v
+            break
+    # Top-N
+    top_n = None
+    m = re.search(r"top\s+(\d+)", text)
+    if m:
+        top_n = int(m.group(1))
+    # Time window
+    today = date.today()
+    year = today.year
+    if "last year" in text or "previous year" in text:
+        start = date(year-1, 1, 1)
+        end = date(year-1, 12, 31)
+    elif "this year" in text or "ytd" in text:
+        start = date(year, 1, 1)
+        end = today
+    elif "last quarter" in text or "previous quarter" in text:
+        this_q = (today.month - 1)//3 + 1
+        last_q_end = date(year, (this_q-1)*3, 1) - relativedelta(days=1) if this_q > 1 else date(year-1, 12, 31)
+        last_q = (last_q_end.month - 1)//3 + 1
+        last_q_start = date(last_q_end.year, (last_q-1)*3 + 1, 1)
+        start, end = last_q_start, last_q_end
+    elif "q" in text and re.search(r"q[1-4]\s*\d{4}", text):
+        qm = re.search(r"q([1-4])\s*(\d{4})", text)
+        q, y = int(qm.group(1)), int(qm.group(2))
+        start = date(y, (q-1)*3 + 1, 1)
+        end = (start + relativedelta(months=3)) - relativedelta(days=1)
+    elif re.search(r"\b20\d{2}\b", text):
+        y = int(re.search(r"\b(20\d{2})\b", text).group(1))
+        start, end = date(y, 1, 1), date(y, 12, 31)
+    else:
+        # Default to this year to keep it demo-friendly
+        start, end = date(year, 1, 1), today
+    return {
+        "metric": metric,
+        "grain": grain,
+        "group_by": group_by,
+        "top_n": top_n,
+        "start": start,
+        "end": end
+    }
+# =========================
+# Query/Compute Functions
+# =========================
+def filter_timeframe(df, start: date, end: date):
+    return df[(df["CalendarMonth"].dt.date >= start) & (df["CalendarMonth"].dt.date <= end)]
+def compute_metric(df, metric: str):
+    if metric == "off_contract":
+        return df[df["OffContract"]]
+    if metric == "service_spend":
+        return df[df["IsService"]]
+    return df
+def group_and_aggregate(df, grain: str, group_by: str | None):
+    work = df.copy()
+    # Derive time buckets
+    work["Year"] = work["CalendarMonth"].dt.year
+    work["Quarter"] = work["CalendarMonth"].dt.to_period("Q").astype(str)
+    work["Month"] = work["CalendarMonth"].dt.to_period("M").astype(str)
+    time_col = {"year":"Year", "quarter":"Quarter", "month":"Month"}[grain]
+    group_cols = [time_col] + ([group_by] if group_by else [])
+    agg = work.groupby(group_cols, dropna=False)["NetAmount"].sum().reset_index()
+    agg = agg.rename(columns={"NetAmount":"TotalAmount"})
+    agg = agg.sort_values("TotalAmount", ascending=False)
+    return agg, time_col
+def topn_if_needed(df, top_n: int | None, group_by: str | None, time_col: str = None):
+    if top_n and group_by:
+        # For time series with group, take top entities over total, then filter
+        total_by_entity = df.groupby(group_by)["TotalAmount"].sum().sort_values(ascending=False)
+        keep = list(total_by_entity.head(top_n).index)
+        return df[df[group_by].isin(keep)]
+    return df
+def kpi_summary(df_filtered, df_metric):
+    total_spend = df_metric["NetAmount"].sum()
+    total_pos = len(df_metric)
+    suppliers = df_metric["Supplier"].nunique()
+    off_ratio = df_metric["OffContract"].mean()
+    return {
+        "total_spend": total_spend,
+        "lines": total_pos,
+        "suppliers": suppliers,
+        "off_ratio": off_ratio
+    }
+def insight_sentence(parsed, kpis, group_by):
+    metric_name = {
+        "po_value": "PO value",
+        "off_contract": "Off-contract spend",
+        "service_spend": "Service PO spend"
+    }[parsed["metric"]]
+    date_str = f'{parsed["start"].isoformat()} to {parsed["end"].isoformat()}'
+    base = f"{metric_name} from {date_str}"
+    details = f"{kpis['suppliers']} suppliers across {kpis['lines']} line items; off-contract ratio {kpis['off_ratio']:.1%}."
+    if group_by:
+        return f"{base}. Grouped by {group_by}. {details}"
+    return f"{base}. {details}"
+# =========================
+# UI: Sidebar Controls
+# =========================
+with st.sidebar:
+    st.image("https://huggingface.co/front/assets/huggingface_logo-noborder.svg", width=140)
+    st.markdown("## 🧭 Procurement Insight Agent")
+    st.markdown("Elegant Streamlit demo with synthetic procurement data.")
+    st.markdown("---")
+    seed = st.slider("Random seed", 1, 9999, 42)
+    rows = st.select_slider("Dataset size", options=[10_000, 20_000, 40_000, 80_000, 120_000], value=40_000)
+    st.caption("Higher rows = richer charts, slower compute.")
+    st.markdown("---")
+    default_prompt = "Top 5 suppliers by PO value this year by month"
+    user_prompt = st.text_area("Ask a question", value=default_prompt, height=96, label_visibility="visible", placeholder="e.g., Off-contract spend by purchasing org last quarter")
+    st.markdown("---")
+    st.caption("Tip: Try prompts like:")
+    st.code("PO value by month in 2025\nTop 5 suppliers this year by month\nOff-contract spend by purchasing org last quarter\nService spend by company in Q2 2024", language="text")
+# Regenerate data if settings changed
+if (seed != 42) or (rows != 40_000):
+    df_raw = generate_synthetic_procurement(seed=seed, rows=rows)
+# =========================
+# Header
+# =========================
+st.markdown("# 🧠 Procurement Insight Agent")
+st.markdown(
+    "Turn natural-language questions into procurement insights using a polished Streamlit UI and synthetic S/4HANA-style analytics data."
+)
+# =========================
+# Agent Parse + Compute
+# =========================
+parsed = parse_prompt(user_prompt)
+df_time = filter_timeframe(df_raw, parsed["start"], parsed["end"])
+df_metric = compute_metric(df_time, parsed["metric"])
+kpis = kpi_summary(df_time, df_metric)
+summary_text = insight_sentence(parsed, kpis, parsed["group_by"])
+# =========================
+# KPI Row
+# =========================
+c1, c2, c3, c4 = st.columns(4)
+with c1:
+    st.markdown('<div class="kpi"><h3>Total Spend</h3><div class="value">${:,.0f}</div></div>'.format(kpis["total_spend"]), unsafe_allow_html=True)
+with c2:
+    st.markdown('<div class="kpi"><h3>Line Items</h3><div class="value">{:,}</div></div>'.format(kpis["lines"]), unsafe_allow_html=True)
+with c3:
+    st.markdown('<div class="kpi"><h3>Suppliers</h3><div class="value">{:,}</div></div>'.format(kpis["suppliers"]), unsafe_allow_html=True)
+with c4:
+    st.markdown('<div class="kpi"><h3>Off-Contract Ratio</h3><div class="value">{:.1%}</div></div>'.format(kpis["off_ratio"]), unsafe_allow_html=True)
+st.markdown(f"#### {summary_text}")
+# =========================
+# Main Tabs
+# =========================
+tab_trend, tab_breakdown, tab_table, tab_agent = st.tabs(["Trend & Composition", "Breakdowns & Drilldowns", "Data Table", "Agent Plan"])
+# --- Trend & Composition ---
+with tab_trend:
+    agg, time_col = group_and_aggregate(df_metric, parsed["grain"], parsed["group_by"])
+    agg_top = topn_if_needed(agg, parsed["top_n"], parsed["group_by"], time_col)
+    if parsed["group_by"]:
+        fig = px.line(
+            agg_top.sort_values(time_col),
+            x=time_col, y="TotalAmount", color=parsed["group_by"],
+            markers=True, line_shape="spline",
+            title="Trend"
+        )
+    else:
+        fig = px.bar(
+            agg_top.sort_values(time_col),
+            x=time_col, y="TotalAmount", title="Trend"
+        )
+    fig.update_layout(
+        margin=dict(l=10, r=10, t=50, b=10),
+        height=420,
+        template="plotly_dark",
+        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
+    )
+    st.plotly_chart(fig, use_container_width=True)
+    # Composition pie for current period
+    comp_group = parsed["group_by"] or "Supplier"
+    comp = df_metric.groupby(comp_group)["NetAmount"].sum().reset_index().sort_values("NetAmount", ascending=False).head(10)
+    fig2 = px.pie(comp, values="NetAmount", names=comp_group, title=f"Composition by {comp_group} (Top 10)")
+    fig2.update_layout(template="plotly_dark", height=420)
+    st.plotly_chart(fig2, use_container_width=True)
+# --- Breakdowns & Drilldowns ---
+with tab_breakdown:
+    st.markdown("##### Drilldown Controls")
+    col_a, col_b, col_c = st.columns(3)
+    with col_a:
+        dim1 = st.selectbox("Primary dimension", ["Supplier", "PurchasingOrganization", "CompanyCode", "MaterialGroup"], index=0)
+    with col_b:
+        dim2 = st.selectbox("Secondary dimension (optional)", ["None", "PurchasingOrganization", "CompanyCode", "MaterialGroup", "Supplier"], index=1)
+        dim2 = None if dim2 == "None" else dim2
+    with col_c:
+        n = st.slider("Top N", 5, 25, 10, step=5)
+    # Calculate totals
+    if dim2:
+        gb = df_metric.groupby([dim1, dim2])["NetAmount"].sum().reset_index().rename(columns={"NetAmount":"TotalAmount"})
+        top_entities = gb.groupby(dim1)["TotalAmount"].sum().sort_values(ascending=False).head(n).index
+        gb = gb[gb[dim1].isin(top_entities)]
+        fig3 = px.bar(gb, x="TotalAmount", y=dim1, color=dim2, orientation="h", title=f"Top {n} by {dim1} with {dim2}")
+    else:
+        gb = df_metric.groupby(dim1)["NetAmount"].sum().reset_index().rename(columns={"NetAmount":"TotalAmount"})
+        gb = gb.sort_values("TotalAmount", ascending=False).head(n)
+        fig3 = px.bar(gb, x="TotalAmount", y=dim1, orientation="h", title=f"Top {n} by {dim1}")
+    fig3.update_layout(template="plotly_dark", height=520, margin=dict(l=10, r=10, t=50, b=10))
+    st.plotly_chart(fig3, use_container_width=True)
+    st.markdown("##### Cohort Trend")
+    cohort_value = st.selectbox(f"Pick a {dim1} cohort for trend", options=sorted(df_metric[dim1].unique().tolist())[:250])
+    cohort = df_metric[df_metric[dim1] == cohort_value]
+    cohort_trend = cohort.groupby(cohort["CalendarMonth"].dt.to_period("M").astype(str))["NetAmount"].sum().reset_index()
+    cohort_trend.columns = ["Month", "TotalAmount"]
+    fig4 = px.area(cohort_trend, x="Month", y="TotalAmount", title=f"Trend for {dim1}: {cohort_value}")
+    fig4.update_layout(template="plotly_dark", height=380)
+    st.plotly_chart(fig4, use_container_width=True)
+# --- Data Table ---
+with tab_table:
+    st.markdown("##### Result Table")
+    # Prepare final display table aligned with parsed settings
+    final = agg_top.copy()
+    # Beautify column order
+    cols = [c for c in ["Year", "Quarter", "Month", parsed.get("group_by"), "TotalAmount"] if c in final.columns]
+    final = final[cols]
+    st.dataframe(final, use_container_width=True, hide_index=True)
+    st.download_button("Download CSV", data=final.to_csv(index=False).encode("utf-8"), file_name="procurement_results.csv", mime="text/csv")
+# --- Agent Plan ---
+with tab_agent:
+    st.markdown("##### Agent Interpretation")
+    st.json({
+        "parsed_prompt": parsed,
+        "notes": "Replace synthetic data functions with an OData client to C_* CDS queries in S/4HANA. The rest of the pipeline remains unchanged."
+    })
+st.markdown('<div class="footer-note">UI inspired by enterprise analytics dashboards. Built with Streamlit, Plotly, and synthetic data that mimics S/4HANA Embedded Analytics structures.</div>', unsafe_allow_html=True)