FinanceDemo / app.py
PD03's picture
Update app.py
baaf3da verified
raw
history blame
22.9 kB
import streamlit as st
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import shap
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error
st.set_page_config(page_title="Profitability Intelligence", layout="wide", initial_sidebar_state="collapsed")
# Custom CSS for better UI
st.markdown("""
<style>
.main-header {
font-size: 2.5rem;
font-weight: 700;
color: #1f77b4;
margin-bottom: 0.5rem;
}
.sub-header {
font-size: 1.1rem;
color: #666;
margin-bottom: 2rem;
}
.insight-box {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
padding: 1.5rem;
border-radius: 10px;
color: white;
margin: 1rem 0;
}
.metric-card {
background: white;
padding: 1.5rem;
border-radius: 8px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
border-left: 4px solid #1f77b4;
}
.recommendation-card {
background: #f0f9ff;
padding: 1rem;
border-radius: 8px;
border-left: 4px solid #22c55e;
margin: 0.5rem 0;
}
.warning-card {
background: #fef3c7;
padding: 1rem;
border-radius: 8px;
border-left: 4px solid #f59e0b;
margin: 0.5rem 0;
}
</style>
""", unsafe_allow_html=True)
# -----------------------------
# Data Generation (Hidden from UI)
# -----------------------------
@st.cache_data(show_spinner=False)
def generate_synthetic_data(days=90, seed=42, rows_per_day=800):
rng = np.random.default_rng(seed)
start_date = datetime.today().date() - timedelta(days=days)
dates = pd.date_range(start_date, periods=days, freq="D")
products = ["Premium Widget", "Standard Widget", "Economy Widget", "Deluxe Widget"]
regions = ["North America", "Europe", "Asia Pacific"]
channels = ["Direct Sales", "Distribution Partners", "E-Commerce"]
base_price = {"Premium Widget": 120, "Standard Widget": 135, "Economy Widget": 110, "Deluxe Widget": 150}
base_cost = {"Premium Widget": 70, "Standard Widget": 88, "Economy Widget": 60, "Deluxe Widget": 95}
region_price_bump = {"North America": 1.00, "Europe": 1.03, "Asia Pacific": 0.97}
region_cost_bump = {"North America": 1.00, "Europe": 1.02, "Asia Pacific": 1.01}
channel_discount_mean = {"Direct Sales": 0.06, "Distribution Partners": 0.12, "E-Commerce": 0.04}
channel_discount_std = {"Direct Sales": 0.02, "Distribution Partners": 0.03, "E-Commerce": 0.02}
seg_epsilon = {}
for p in products:
for r in regions:
for c in channels:
base_eps = rng.uniform(-0.9, -0.25)
if c == "Distribution Partners":
base_eps -= rng.uniform(0.1, 0.3)
if c == "E-Commerce":
base_eps += rng.uniform(0.05, 0.15)
seg_epsilon[(p, r, c)] = base_eps
records = []
for d in dates:
dow = d.weekday()
dow_mult = 1.0 + (0.06 if dow in (5, 6) else 0)
macro = 1.0 + 0.03*np.sin((d.toordinal()%365)/365*2*np.pi)
n = rows_per_day
prod = rng.choice(products, size=n, p=[0.35, 0.3, 0.2, 0.15])
reg = rng.choice(regions, size=n, p=[0.4, 0.35, 0.25])
ch = rng.choice(channels, size=n, p=[0.45, 0.35, 0.20])
base_p = np.array([base_price[x] for x in prod]) * np.array([region_price_bump[x] for x in reg])
base_c = np.array([base_cost[x] for x in prod]) * np.array([region_cost_bump[x] for x in reg])
discount = np.clip(
np.array([channel_discount_mean[x] for x in ch]) +
rng.normal(0, [channel_discount_std[x] for x in ch]), 0, 0.45
)
list_price = rng.normal(base_p, 5)
net_price = np.clip(list_price * (1 - discount), 20, None)
unit_cost = np.clip(rng.normal(base_c, 4), 10, None)
eps = np.array([seg_epsilon[(pp, rr, cc)] for pp, rr, cc in zip(prod, reg, ch)])
ref_price = np.array([base_price[x] for x in prod])
qty_mu = np.exp(eps * (net_price - ref_price) / np.maximum(ref_price, 1e-6))
qty = np.maximum(1, rng.poisson(8 * dow_mult * macro * qty_mu))
revenue = net_price * qty
cogs = unit_cost * qty
gm_val = revenue - cogs
gm_pct = np.where(revenue > 0, gm_val / revenue, 0.0)
for i in range(n):
records.append({
"date": d, "product": prod[i], "region": reg[i], "channel": ch[i],
"list_price": float(list_price[i]), "discount_pct": float(discount[i]),
"net_price": float(net_price[i]), "unit_cost": float(unit_cost[i]),
"qty": int(qty[i]), "revenue": float(revenue[i]), "cogs": float(cogs[i]),
"gm_value": float(gm_val[i]), "gm_pct": float(gm_pct[i]), "dow": dow
})
return pd.DataFrame(records)
def build_features(df: pd.DataFrame):
feats_num = ["net_price", "unit_cost", "qty", "discount_pct", "list_price", "dow"]
feats_cat = ["product", "region", "channel"]
df = df.sort_values("date").copy()
seg = ["product", "region", "channel"]
df["price_per_unit"] = df["net_price"]
df["cost_per_unit"] = df["unit_cost"]
df["roll7_qty"] = df.groupby(seg)["qty"].transform(lambda s: s.rolling(7, min_periods=1).median())
df["roll7_price"] = df.groupby(seg)["price_per_unit"].transform(lambda s: s.rolling(7, min_periods=1).median())
df["roll7_cost"] = df.groupby(seg)["cost_per_unit"].transform(lambda s: s.rolling(7, min_periods=1).median())
feats_num += ["price_per_unit", "cost_per_unit", "roll7_qty", "roll7_price", "roll7_cost"]
return df, feats_num, feats_cat, "gm_pct"
@st.cache_resource(show_spinner=False)
def train_model(df: pd.DataFrame, feats_num, feats_cat, target):
X = df[feats_num + feats_cat]
y = df[target]
pre = ColumnTransformer(
transformers=[
("cat", OneHotEncoder(handle_unknown="ignore"), feats_cat),
("num", "passthrough", feats_num),
]
)
model = RandomForestRegressor(n_estimators=300, max_depth=None, random_state=42, n_jobs=-1, min_samples_leaf=3)
pipe = Pipeline([("pre", pre), ("rf", model)])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)
pipe.fit(X_train, y_train)
pred = pipe.predict(X_test)
return pipe, {"r2": r2_score(y_test, pred), "mae": mean_absolute_error(y_test, pred)}, X_test
@st.cache_resource(show_spinner=False)
def compute_shap(pipe, X_sample, feats_num, feats_cat, shap_sample=1000, seed=42):
np.random.seed(seed)
preproc = pipe.named_steps["pre"]
rf = pipe.named_steps["rf"]
feature_names = list(preproc.named_transformers_["cat"].get_feature_names_out(feats_cat)) + feats_num
if len(X_sample) > shap_sample:
sample_idx = np.random.choice(len(X_sample), size=shap_sample, replace=False)
X_sample = X_sample.iloc[sample_idx]
X_t = preproc.transform(X_sample)
try:
X_t = X_t.toarray()
except:
pass
explainer = shap.TreeExplainer(rf)
shap_values = explainer.shap_values(X_t)
shap_df = pd.DataFrame(shap_values, columns=feature_names)
joined = pd.concat([X_sample.reset_index(drop=True), shap_df.reset_index(drop=True)], axis=1)
return shap_df, X_sample.reset_index(drop=True), feature_names, joined
def estimate_segment_elasticity(df: pd.DataFrame, product, region, channel):
seg_df = df[(df["product"]==product)&(df["region"]==region)&(df["channel"]==channel)]
if len(seg_df) < 100 or seg_df["net_price"].std() < 1e-6 or seg_df["qty"].std() < 1e-6:
return -0.5, False
x = np.log(np.clip(seg_df["net_price"].values, 1e-6, None)).reshape(-1,1)
y = np.log(np.clip(seg_df["qty"].values, 1e-6, None))
lin = LinearRegression().fit(x, y)
return float(lin.coef_[0]), True
def simulate_action(segment_df: pd.DataFrame, elasticity, delta_discount=0.0, delta_unit_cost=0.0):
if segment_df.empty:
return None
base = segment_df.iloc[-1]
p0, c0, q0, d0 = base["net_price"], base["unit_cost"], base["qty"], base["discount_pct"]
new_discount = np.clip(d0 + delta_discount, 0.0, 0.45)
p1 = max(0.01, base["list_price"] * (1 - new_discount))
c1 = max(0.01, c0 + delta_unit_cost)
q1 = max(0.0, q0 * (p1 / p0) ** elasticity) if p0 > 0 else q0
rev0, cogs0 = p0 * q0, c0 * q0
rev1, cogs1 = p1 * q1, c1 * q1
return {
"baseline_price": p0, "new_price": p1, "baseline_cost": c0, "new_cost": c1,
"baseline_qty": q0, "new_qty": q1, "gm_delta_value": (rev1 - cogs1) - (rev0 - cogs0),
"gm0_pct": (rev0 - cogs0)/rev0 if rev0>0 else 0.0,
"gm1_pct": (rev1 - cogs1)/rev1 if rev1>0 else 0.0,
"new_discount": new_discount
}
# -----------------------------
# Initialize Data
# -----------------------------
if "data_loaded" not in st.session_state:
with st.spinner("πŸ”„ Loading SAP data and building intelligence models..."):
df = generate_synthetic_data(days=90, seed=42, rows_per_day=800)
df_feat, feats_num, feats_cat, target = build_features(df)
pipe, metrics, X_test = train_model(df_feat, feats_num, feats_cat, target)
shap_df, X_test_sample, feature_names, joined = compute_shap(pipe, X_test, feats_num, feats_cat)
st.session_state["df"] = df
st.session_state["df_feat"] = df_feat
st.session_state["pipe"] = pipe
st.session_state["metrics"] = metrics
st.session_state["shap_df"] = shap_df
st.session_state["joined"] = joined
st.session_state["feats_num"] = feats_num
st.session_state["feats_cat"] = feats_cat
st.session_state["data_loaded"] = True
df = st.session_state["df"]
joined = st.session_state["joined"]
metrics = st.session_state["metrics"]
shap_df = st.session_state["shap_df"]
# -----------------------------
# HEADER
# -----------------------------
st.markdown('<p class="main-header">πŸ’° Profitability Intelligence Dashboard</p>', unsafe_allow_html=True)
st.markdown('<p class="sub-header">AI-powered insights to understand and optimize your gross margin drivers</p>', unsafe_allow_html=True)
# -----------------------------
# EXECUTIVE SUMMARY
# -----------------------------
st.markdown("## πŸ“Š Executive Summary")
daily = df.groupby("date").agg(revenue=("revenue","sum"), cogs=("cogs","sum"), gm_value=("gm_value","sum")).reset_index()
daily["gm_pct"] = np.where(daily["revenue"]>0, daily["gm_value"]/daily["revenue"], 0.0)
today_row = daily.iloc[-1]
yesterday_row = daily.iloc[-2] if len(daily) > 1 else today_row
roll7 = daily["gm_pct"].tail(7).mean()
roll30 = daily["gm_pct"].tail(30).mean()
col1, col2, col3, col4 = st.columns(4)
with col1:
delta = today_row["gm_pct"] - yesterday_row["gm_pct"]
st.metric("Today's Gross Margin %", f"{today_row['gm_pct']*100:.1f}%",
f"{delta*100:+.1f}% vs yesterday")
with col2:
st.metric("Revenue (Today)", f"${today_row['revenue']/1e6:.2f}M")
with col3:
trend = "↗️" if roll7 > roll30 else "β†˜οΈ"
st.metric("7-Day Avg GM%", f"{roll7*100:.1f}%", f"{trend} vs 30-day avg")
with col4:
st.metric("Gross Profit (Today)", f"${today_row['gm_value']/1e6:.2f}M")
# Trend chart
fig_trend = go.Figure()
fig_trend.add_trace(go.Scatter(x=daily["date"], y=daily["gm_pct"]*100,
mode='lines', name='Daily GM%', line=dict(color='#1f77b4', width=2)))
fig_trend.add_trace(go.Scatter(x=daily["date"], y=daily["gm_pct"].rolling(7).mean()*100,
mode='lines', name='7-Day Average', line=dict(color='#ff7f0e', width=2, dash='dash')))
fig_trend.update_layout(title="Gross Margin % Trend", xaxis_title="Date", yaxis_title="GM %",
height=300, hovermode='x unified')
st.plotly_chart(fig_trend, use_container_width=True)
# Key Insight Box
gm_change = (today_row["gm_pct"] - roll30) * 100
if abs(gm_change) > 0.5:
trend_word = "improved" if gm_change > 0 else "declined"
st.markdown(f"""
<div class="insight-box">
<h3>πŸ’‘ Key Insight</h3>
<p>Your gross margin has <strong>{trend_word} by {abs(gm_change):.1f} percentage points</strong> compared to the 30-day average.
The analysis below identifies the specific drivers and business segments responsible for this change.</p>
</div>
""", unsafe_allow_html=True)
st.markdown("---")
# -----------------------------
# DRIVER ANALYSIS
# -----------------------------
st.markdown("## πŸ” What's Driving Your Profitability?")
st.markdown("""
Our AI model has analyzed thousands of transactions to identify which factors have the biggest impact on your gross margin.
Think of this as understanding which levers you can pull to improve profitability.
""")
# Calculate driver importance
mean_abs = shap_df.abs().mean().sort_values(ascending=False)
# Simplify feature names for business users
def simplify_feature_name(name):
if "discount" in name.lower():
return "Discount Level"
elif "cost_per_unit" in name.lower() or "unit_cost" in name.lower():
return "Unit Cost"
elif "price_per_unit" in name.lower() or "net_price" in name.lower():
return "Selling Price"
elif "qty" in name.lower():
return "Volume"
elif "product_" in name.lower():
return name.replace("product_", "Product: ")
elif "channel_" in name.lower():
return name.replace("channel_", "Channel: ")
elif "region_" in name.lower():
return name.replace("region_", "Region: ")
return name
# Top 10 drivers
top_drivers = mean_abs.head(10)
driver_names = [simplify_feature_name(f) for f in top_drivers.index]
fig_drivers = go.Figure(go.Bar(
y=driver_names[::-1],
x=top_drivers.values[::-1],
orientation='h',
marker=dict(color=top_drivers.values[::-1], colorscale='Blues', showscale=False)
))
fig_drivers.update_layout(
title="Top 10 Profit Margin Drivers (Impact Strength)",
xaxis_title="Impact on Gross Margin",
yaxis_title="",
height=400,
showlegend=False
)
st.plotly_chart(fig_drivers, use_container_width=True)
# Business interpretation
st.markdown("""
**What does this mean?**
- **Higher bars** = Bigger impact on your gross margin
- Focus your attention on the top 3-5 drivers for maximum profitability improvement
""")
st.markdown("---")
# -----------------------------
# SEGMENT PERFORMANCE
# -----------------------------
st.markdown("## πŸ“ Performance by Business Segment")
st.markdown("Not all business segments perform equally. Here's where you're winning and where there's opportunity:")
# Calculate segment performance
key_feats = [c for c in joined.columns if any(k in c for k in ["discount", "price_per_unit", "cost_per_unit","unit_cost","net_price"])]
grp = joined.groupby(["product","region","channel"]).mean(numeric_only=True)
rank_cols = [c for c in grp.columns if c in key_feats]
segment_impact = grp[rank_cols].sum(axis=1).sort_values()
col1, col2 = st.columns(2)
with col1:
st.markdown("### πŸ”΄ Segments Dragging Margin Down")
worst = segment_impact.head(8)
worst_df = pd.DataFrame({
'Segment': [f"{p} β€’ {r} β€’ {c}" for p, r, c in worst.index],
'Margin Impact': worst.values
})
worst_df['Impact Score'] = worst_df['Margin Impact'].apply(lambda x: 'πŸ”΄' * min(5, int(abs(x)*10)))
st.dataframe(worst_df[['Segment', 'Impact Score']], hide_index=True, use_container_width=True)
with col2:
st.markdown("### 🟒 Segments Lifting Margin Up")
best = segment_impact.tail(8).sort_values(ascending=False)
best_df = pd.DataFrame({
'Segment': [f"{p} β€’ {r} β€’ {c}" for p, r, c in best.index],
'Margin Impact': best.values
})
best_df['Performance'] = best_df['Margin Impact'].apply(lambda x: '🟒' * min(5, max(1, int(x*10))))
st.dataframe(best_df[['Segment', 'Performance']], hide_index=True, use_container_width=True)
st.markdown("---")
# -----------------------------
# WHAT-IF SIMULATOR
# -----------------------------
st.markdown("## 🎯 What-If Simulator: Test Your Strategies")
st.markdown("""
Use this simulator to model the financial impact of potential pricing or cost optimization strategies.
Select a segment and adjust the levers to see the projected outcome.
""")
# Segment selector
last_day = df["date"].max()
seg_today = df[df["date"]==last_day][["product","region","channel"]].drop_duplicates().sort_values(["product","region","channel"])
seg_options = seg_today.apply(lambda r: f"{r['product']} β€’ {r['region']} β€’ {r['channel']}", axis=1).tolist()
col1, col2 = st.columns([2, 1])
with col1:
selected_segment = st.selectbox("**Select Business Segment:**", seg_options, key="segment_selector")
with col2:
st.markdown("**Scenario Type:**")
scenario = st.radio("", ["Optimize Discount", "Reduce Costs", "Custom"], horizontal=True, label_visibility="collapsed")
prod_sel, reg_sel, ch_sel = [s.strip() for s in selected_segment.split("β€’")]
seg_hist = df[(df["product"]==prod_sel)&(df["region"]==reg_sel)&(df["channel"]==ch_sel)].sort_values("date")
elasticity, _ = estimate_segment_elasticity(seg_hist, prod_sel, reg_sel, ch_sel)
# Pre-set scenarios
if scenario == "Optimize Discount":
delta_disc = -2.0
delta_cost = 0.0
st.info("πŸ“‰ Testing a 2 percentage point discount reduction to improve margin")
elif scenario == "Reduce Costs":
delta_disc = 0.0
delta_cost = -3.0
st.info("πŸ’° Testing a $3 reduction in unit cost through operational efficiency")
else:
col1, col2 = st.columns(2)
with col1:
delta_disc = st.slider("Adjust Discount (percentage points)", -10.0, 10.0, -2.0, 0.5,
help="Negative = tighter discount, Positive = deeper discount")
with col2:
delta_cost = st.slider("Adjust Unit Cost ($)", -10.0, 10.0, 0.0, 0.5,
help="Negative = cost reduction, Positive = cost increase")
# Run simulation
sim_res = simulate_action(seg_hist, elasticity, delta_discount=delta_disc/100.0, delta_unit_cost=delta_cost)
if sim_res:
st.markdown("### πŸ“ˆ Projected Impact")
# Results in clean columns
metric_col1, metric_col2, metric_col3, metric_col4 = st.columns(4)
with metric_col1:
price_change = ((sim_res['new_price'] - sim_res['baseline_price']) / sim_res['baseline_price']) * 100
st.metric("Price per Unit", f"${sim_res['new_price']:.2f}", f"{price_change:+.1f}%")
with metric_col2:
cost_change = ((sim_res['new_cost'] - sim_res['baseline_cost']) / sim_res['baseline_cost']) * 100
st.metric("Cost per Unit", f"${sim_res['new_cost']:.2f}", f"{cost_change:+.1f}%")
with metric_col3:
qty_change = ((sim_res['new_qty'] - sim_res['baseline_qty']) / sim_res['baseline_qty']) * 100
st.metric("Volume", f"{sim_res['new_qty']:.0f} units", f"{qty_change:+.1f}%")
with metric_col4:
gm_change = (sim_res['gm1_pct'] - sim_res['gm0_pct']) * 100
st.metric("Gross Margin %", f"{sim_res['gm1_pct']*100:.1f}%", f"{gm_change:+.1f} pts")
# Financial impact
if sim_res['gm_delta_value'] > 0:
st.markdown(f"""
<div class="recommendation-card">
<h4>βœ… Positive Impact: +${sim_res['gm_delta_value']:.2f} in daily gross profit</h4>
<p>This strategy would <strong>improve profitability</strong> for this segment.
Expected price elasticity of {elasticity:.2f} means volume {('decreases' if elasticity < 0 and delta_disc < 0 else 'adjusts')}
as prices change, but margin improvement outweighs volume impact.</p>
</div>
""", unsafe_allow_html=True)
else:
st.markdown(f"""
<div class="warning-card">
<h4>⚠️ Negative Impact: ${sim_res['gm_delta_value']:.2f} in daily gross profit</h4>
<p>This strategy would <strong>reduce profitability</strong> for this segment.
Consider alternative approaches or test smaller adjustments.</p>
</div>
""", unsafe_allow_html=True)
st.markdown("---")
# -----------------------------
# AI RECOMMENDATIONS
# -----------------------------
st.markdown("## πŸ’‘ AI-Powered Recommendations")
st.markdown("""
Based on the analysis of all segments, here are the top opportunities to improve profitability.
These recommendations are ranked by expected financial impact.
""")
# Generate recommendations
worst_keys = segment_impact.head(20).index.tolist()
recs = []
for p, r, c in worst_keys:
hist = df[(df["product"]==p)&(df["region"]==r)&(df["channel"]==c)].sort_values("date")
if hist.empty:
continue
eps, _ = estimate_segment_elasticity(hist, p, r, c)
prop_disc_pts = -np.clip(abs(segment_impact[(p,r,c)])*10, 0.5, 3.0)
sim = simulate_action(hist, eps, delta_discount=prop_disc_pts/100.0, delta_unit_cost=0.0)
if sim and sim["gm_delta_value"] > 0:
recs.append({
"Segment": f"{p} β€’ {r} β€’ {c}",
"Recommended Action": f"Reduce discount by {abs(prop_disc_pts):.1f}%",
"Expected Daily Uplift": f"${sim['gm_delta_value']:.2f}",
"New Margin %": f"{sim['gm1_pct']*100:.1f}%",
"Risk Level": "Low" if abs(eps) < 0.5 else "Medium"
})
rec_df = pd.DataFrame(recs).sort_values("Expected Daily Uplift", ascending=False).head(10)
if not rec_df.empty:
st.dataframe(rec_df, hide_index=True, use_container_width=True)
total_potential = rec_df["Expected Daily Uplift"].str.replace("$", "").astype(float).sum()
st.success(f"🎯 **Total Daily Profit Opportunity: ${total_potential:.2f}** | Annualized: ${total_potential * 365:,.0f}")
# Download button
csv = rec_df.to_csv(index=False).encode('utf-8')
st.download_button(
label="πŸ“₯ Download Full Recommendations (CSV)",
data=csv,
file_name=f"profitability_recommendations_{datetime.now().strftime('%Y%m%d')}.csv",
mime="text/csv"
)
else:
st.info("No high-confidence recommendations available at this time. Current segment performance is well-optimized.")
st.markdown("---")
# Footer
st.caption("πŸ”’ **Demo Environment** | Data shown is synthetic for demonstration. Connect to your SAP system for live insights.")
st.caption(f"Model Performance: RΒ² = {metrics['r2']:.3f} | Analyzing {len(df):,} transactions across {len(df['product'].unique())} products")