PD03 commited on
Commit
4a5ff6e
Β·
verified Β·
1 Parent(s): fc95108

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +604 -386
app.py CHANGED
@@ -3,9 +3,9 @@ import numpy as np
3
  import pandas as pd
4
  import plotly.express as px
5
  import plotly.graph_objects as go
 
6
  import shap
7
  import matplotlib.pyplot as plt
8
-
9
  from datetime import datetime, timedelta
10
  from sklearn.model_selection import train_test_split
11
  from sklearn.compose import ColumnTransformer
@@ -14,75 +14,89 @@ from sklearn.pipeline import Pipeline
14
  from sklearn.ensemble import RandomForestRegressor
15
  from sklearn.linear_model import LinearRegression
16
  from sklearn.metrics import r2_score, mean_absolute_error
 
 
 
 
 
 
 
 
 
 
17
 
18
- st.set_page_config(page_title="Profitability Intelligence", layout="wide", initial_sidebar_state="collapsed")
19
-
20
- # Custom CSS for better UI
21
  st.markdown("""
22
  <style>
23
  .main-header {
24
- font-size: 2.5rem;
25
  font-weight: 700;
26
  color: #1f77b4;
 
27
  margin-bottom: 0.5rem;
 
28
  }
29
  .sub-header {
30
- font-size: 1.1rem;
31
  color: #666;
 
32
  margin-bottom: 2rem;
33
  }
34
  .insight-box {
35
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
 
36
  padding: 1.5rem;
37
- border-radius: 10px;
38
- color: white;
39
  margin: 1rem 0;
 
 
40
  }
41
- .metric-card {
42
  background: white;
 
 
43
  padding: 1.5rem;
44
- border-radius: 8px;
45
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);
46
- border-left: 4px solid #1f77b4;
47
  }
48
- .recommendation-card {
49
- background: #f0f9ff;
50
- padding: 1rem;
51
- border-radius: 8px;
52
- border-left: 4px solid #22c55e;
53
- margin: 0.5rem 0;
54
  }
55
- .warning-card {
56
- background: #fef3c7;
57
- padding: 1rem;
58
- border-radius: 8px;
59
- border-left: 4px solid #f59e0b;
60
- margin: 0.5rem 0;
 
 
 
 
 
 
61
  }
62
  </style>
63
  """, unsafe_allow_html=True)
64
 
65
  # -----------------------------
66
- # Data Generation (Hidden from UI)
67
  # -----------------------------
68
  @st.cache_data(show_spinner=False)
69
- def generate_synthetic_data(days=90, seed=42, rows_per_day=800):
70
  rng = np.random.default_rng(seed)
71
  start_date = datetime.today().date() - timedelta(days=days)
72
  dates = pd.date_range(start_date, periods=days, freq="D")
73
-
74
  products = ["Premium Widget", "Standard Widget", "Economy Widget", "Deluxe Widget"]
75
- regions = ["North America", "Europe", "Asia Pacific"]
76
  channels = ["Direct Sales", "Distribution Partners", "E-Commerce"]
77
 
78
  base_price = {"Premium Widget": 120, "Standard Widget": 135, "Economy Widget": 110, "Deluxe Widget": 150}
79
- base_cost = {"Premium Widget": 70, "Standard Widget": 88, "Economy Widget": 60, "Deluxe Widget": 95}
80
-
81
- region_price_bump = {"North America": 1.00, "Europe": 1.03, "Asia Pacific": 0.97}
82
- region_cost_bump = {"North America": 1.00, "Europe": 1.02, "Asia Pacific": 1.01}
83
-
84
  channel_discount_mean = {"Direct Sales": 0.06, "Distribution Partners": 0.12, "E-Commerce": 0.04}
85
- channel_discount_std = {"Direct Sales": 0.02, "Distribution Partners": 0.03, "E-Commerce": 0.02}
86
 
87
  seg_epsilon = {}
88
  for p in products:
@@ -103,16 +117,17 @@ def generate_synthetic_data(days=90, seed=42, rows_per_day=800):
103
 
104
  n = rows_per_day
105
  prod = rng.choice(products, size=n, p=[0.35, 0.3, 0.2, 0.15])
106
- reg = rng.choice(regions, size=n, p=[0.4, 0.35, 0.25])
107
- ch = rng.choice(channels, size=n, p=[0.45, 0.35, 0.20])
108
 
109
  base_p = np.array([base_price[x] for x in prod]) * np.array([region_price_bump[x] for x in reg])
110
- base_c = np.array([base_cost[x] for x in prod]) * np.array([region_cost_bump[x] for x in reg])
111
 
112
  discount = np.clip(
113
  np.array([channel_discount_mean[x] for x in ch]) +
114
  rng.normal(0, [channel_discount_std[x] for x in ch]), 0, 0.45
115
  )
 
116
  list_price = rng.normal(base_p, 5)
117
  net_price = np.clip(list_price * (1 - discount), 20, None)
118
  unit_cost = np.clip(rng.normal(base_c, 4), 10, None)
@@ -123,433 +138,636 @@ def generate_synthetic_data(days=90, seed=42, rows_per_day=800):
123
  qty = np.maximum(1, rng.poisson(8 * dow_mult * macro * qty_mu))
124
 
125
  revenue = net_price * qty
126
- cogs = unit_cost * qty
127
- gm_val = revenue - cogs
128
- gm_pct = np.where(revenue > 0, gm_val / revenue, 0.0)
129
 
130
  for i in range(n):
131
  records.append({
132
- "date": d, "product": prod[i], "region": reg[i], "channel": ch[i],
133
- "list_price": float(list_price[i]), "discount_pct": float(discount[i]),
134
- "net_price": float(net_price[i]), "unit_cost": float(unit_cost[i]),
135
- "qty": int(qty[i]), "revenue": float(revenue[i]), "cogs": float(cogs[i]),
136
- "gm_value": float(gm_val[i]), "gm_pct": float(gm_pct[i]), "dow": dow
 
 
 
 
 
 
 
 
 
137
  })
138
- return pd.DataFrame(records)
139
 
140
- def build_features(df: pd.DataFrame):
 
 
 
 
 
141
  feats_num = ["net_price", "unit_cost", "qty", "discount_pct", "list_price", "dow"]
142
  feats_cat = ["product", "region", "channel"]
143
-
144
  df = df.sort_values("date").copy()
145
  seg = ["product", "region", "channel"]
146
  df["price_per_unit"] = df["net_price"]
147
- df["cost_per_unit"] = df["unit_cost"]
148
-
149
  df["roll7_qty"] = df.groupby(seg)["qty"].transform(lambda s: s.rolling(7, min_periods=1).median())
150
  df["roll7_price"] = df.groupby(seg)["price_per_unit"].transform(lambda s: s.rolling(7, min_periods=1).median())
151
- df["roll7_cost"] = df.groupby(seg)["cost_per_unit"].transform(lambda s: s.rolling(7, min_periods=1).median())
152
-
153
  feats_num += ["price_per_unit", "cost_per_unit", "roll7_qty", "roll7_price", "roll7_cost"]
154
- return df, feats_num, feats_cat, "gm_pct"
 
155
 
156
  @st.cache_resource(show_spinner=False)
157
- def train_model(df: pd.DataFrame, feats_num, feats_cat, target):
158
- X = df[feats_num + feats_cat]
159
- y = df[target]
160
-
161
  pre = ColumnTransformer(
162
  transformers=[
163
  ("cat", OneHotEncoder(handle_unknown="ignore"), feats_cat),
164
  ("num", "passthrough", feats_num),
165
  ]
166
  )
167
- model = RandomForestRegressor(n_estimators=300, max_depth=None, random_state=42, n_jobs=-1, min_samples_leaf=3)
168
  pipe = Pipeline([("pre", pre), ("rf", model)])
169
-
170
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)
171
  pipe.fit(X_train, y_train)
172
  pred = pipe.predict(X_test)
 
 
 
173
 
174
- return pipe, {"r2": r2_score(y_test, pred), "mae": mean_absolute_error(y_test, pred)}, X_test
 
 
 
 
 
175
 
176
- @st.cache_resource(show_spinner=False)
177
- def compute_shap(_pipe, X_sample, feats_num, feats_cat, shap_sample=1000, seed=42):
178
- np.random.seed(seed)
179
- preproc = _pipe.named_steps["pre"]
180
- rf = _pipe.named_steps["rf"]
181
- feature_names = list(preproc.named_transformers_["cat"].get_feature_names_out(feats_cat)) + feats_num
182
 
183
- if len(X_sample) > shap_sample:
184
- sample_idx = np.random.choice(len(X_sample), size=shap_sample, replace=False)
185
- X_sample = X_sample.iloc[sample_idx]
 
186
 
187
- X_t = preproc.transform(X_sample)
188
- try:
189
- X_t = X_t.toarray()
190
- except:
191
- pass
 
 
 
 
 
192
 
193
- explainer = shap.TreeExplainer(rf)
194
- shap_values = explainer.shap_values(X_t)
195
- shap_df = pd.DataFrame(shap_values, columns=feature_names)
196
-
197
- joined = pd.concat([X_sample.reset_index(drop=True), shap_df.reset_index(drop=True)], axis=1)
198
- return shap_df, X_sample.reset_index(drop=True), feature_names, joined
199
 
200
- def estimate_segment_elasticity(df: pd.DataFrame, product, region, channel):
201
  seg_df = df[(df["product"]==product)&(df["region"]==region)&(df["channel"]==channel)]
202
  if len(seg_df) < 100 or seg_df["net_price"].std() < 1e-6 or seg_df["qty"].std() < 1e-6:
203
  return -0.5, False
204
- x = np.log(np.clip(seg_df["net_price"].values, 1e-6, None)).reshape(-1,1)
205
- y = np.log(np.clip(seg_df["qty"].values, 1e-6, None))
206
- lin = LinearRegression().fit(x, y)
207
- return float(lin.coef_[0]), True
 
 
 
208
 
209
- def simulate_action(segment_df: pd.DataFrame, elasticity, delta_discount=0.0, delta_unit_cost=0.0):
210
  if segment_df.empty:
211
  return None
212
- base = segment_df.iloc[-1]
213
- p0, c0, q0, d0 = base["net_price"], base["unit_cost"], base["qty"], base["discount_pct"]
214
-
215
- new_discount = np.clip(d0 + delta_discount, 0.0, 0.45)
216
- p1 = max(0.01, base["list_price"] * (1 - new_discount))
217
- c1 = max(0.01, c0 + delta_unit_cost)
218
- q1 = max(0.0, q0 * (p1 / p0) ** elasticity) if p0 > 0 else q0
219
-
220
- rev0, cogs0 = p0 * q0, c0 * q0
221
- rev1, cogs1 = p1 * q1, c1 * q1
222
-
223
- return {
224
- "baseline_price": p0, "new_price": p1, "baseline_cost": c0, "new_cost": c1,
225
- "baseline_qty": q0, "new_qty": q1, "gm_delta_value": (rev1 - cogs1) - (rev0 - cogs0),
226
- "gm0_pct": (rev0 - cogs0)/rev0 if rev0>0 else 0.0,
227
- "gm1_pct": (rev1 - cogs1)/rev1 if rev1>0 else 0.0,
228
- "new_discount": new_discount
229
- }
230
-
231
- # -----------------------------
232
- # Initialize Data
233
- # -----------------------------
234
- if "data_loaded" not in st.session_state:
235
- with st.spinner("πŸ”„ Loading SAP data and building intelligence models..."):
236
- df = generate_synthetic_data(days=90, seed=42, rows_per_day=800)
237
- df_feat, feats_num, feats_cat, target = build_features(df)
238
- pipe, metrics, X_test = train_model(df_feat, feats_num, feats_cat, target)
239
- shap_df, X_test_sample, feature_names, joined = compute_shap(pipe, X_test, feats_num, feats_cat)
240
-
241
- st.session_state["df"] = df
242
- st.session_state["df_feat"] = df_feat
243
- st.session_state["pipe"] = pipe
244
- st.session_state["metrics"] = metrics
245
- st.session_state["shap_df"] = shap_df
246
- st.session_state["joined"] = joined
247
- st.session_state["feats_num"] = feats_num
248
- st.session_state["feats_cat"] = feats_cat
249
- st.session_state["data_loaded"] = True
250
-
251
- df = st.session_state["df"]
252
- joined = st.session_state["joined"]
253
- metrics = st.session_state["metrics"]
254
- shap_df = st.session_state["shap_df"]
255
-
256
- # -----------------------------
257
- # HEADER
258
- # -----------------------------
259
- st.markdown('<p class="main-header">πŸ’° Profitability Intelligence Dashboard</p>', unsafe_allow_html=True)
260
- st.markdown('<p class="sub-header">AI-powered insights to understand and optimize your gross margin drivers</p>', unsafe_allow_html=True)
261
 
262
  # -----------------------------
263
- # EXECUTIVE SUMMARY
264
  # -----------------------------
265
- st.markdown("## πŸ“Š Executive Summary")
266
 
267
- daily = df.groupby("date").agg(revenue=("revenue","sum"), cogs=("cogs","sum"), gm_value=("gm_value","sum")).reset_index()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  daily["gm_pct"] = np.where(daily["revenue"]>0, daily["gm_value"]/daily["revenue"], 0.0)
 
269
  today_row = daily.iloc[-1]
270
  yesterday_row = daily.iloc[-2] if len(daily) > 1 else today_row
 
271
  roll7 = daily["gm_pct"].tail(7).mean()
272
- roll30 = daily["gm_pct"].tail(30).mean()
 
 
273
 
274
  col1, col2, col3, col4 = st.columns(4)
275
 
276
  with col1:
277
- delta = today_row["gm_pct"] - yesterday_row["gm_pct"]
278
- st.metric("Today's Gross Margin %", f"{today_row['gm_pct']*100:.1f}%",
279
- f"{delta*100:+.1f}% vs yesterday")
 
 
 
 
280
 
281
  with col2:
282
- st.metric("Revenue (Today)", f"${today_row['revenue']/1e6:.2f}M")
 
 
 
 
 
 
283
 
284
  with col3:
285
- trend = "↗️" if roll7 > roll30 else "β†˜οΈ"
286
- st.metric("7-Day Avg GM%", f"{roll7*100:.1f}%", f"{trend} vs 30-day avg")
 
 
 
 
287
 
288
  with col4:
289
- st.metric("Gross Profit (Today)", f"${today_row['gm_value']/1e6:.2f}M")
290
-
291
- # Trend chart
292
- fig_trend = go.Figure()
293
- fig_trend.add_trace(go.Scatter(x=daily["date"], y=daily["gm_pct"]*100,
294
- mode='lines', name='Daily GM%', line=dict(color='#1f77b4', width=2)))
295
- fig_trend.add_trace(go.Scatter(x=daily["date"], y=daily["gm_pct"].rolling(7).mean()*100,
296
- mode='lines', name='7-Day Average', line=dict(color='#ff7f0e', width=2, dash='dash')))
297
- fig_trend.update_layout(title="Gross Margin % Trend", xaxis_title="Date", yaxis_title="GM %",
298
- height=300, hovermode='x unified')
299
- st.plotly_chart(fig_trend, use_container_width=True)
300
-
301
- # Key Insight Box
302
- gm_change = (today_row["gm_pct"] - roll30) * 100
303
- if abs(gm_change) > 0.5:
304
- trend_word = "improved" if gm_change > 0 else "declined"
305
- st.markdown(f"""
306
- <div class="insight-box">
307
- <h3>πŸ’‘ Key Insight</h3>
308
- <p>Your gross margin has <strong>{trend_word} by {abs(gm_change):.1f} percentage points</strong> compared to the 30-day average.
309
- The analysis below identifies the specific drivers and business segments responsible for this change.</p>
310
- </div>
311
- """, unsafe_allow_html=True)
312
 
313
- st.markdown("---")
 
314
 
315
- # -----------------------------
316
- # DRIVER ANALYSIS
317
- # -----------------------------
318
- st.markdown("## πŸ” What's Driving Your Profitability?")
 
319
 
320
- st.markdown("""
321
- Our AI model has analyzed thousands of transactions to identify which factors have the biggest impact on your gross margin.
322
- Think of this as understanding which levers you can pull to improve profitability.
323
- """)
324
-
325
- # Calculate driver importance
326
- mean_abs = shap_df.abs().mean().sort_values(ascending=False)
327
-
328
- # Simplify feature names for business users
329
- def simplify_feature_name(name):
330
- if "discount" in name.lower():
331
- return "Discount Level"
332
- elif "cost_per_unit" in name.lower() or "unit_cost" in name.lower():
333
- return "Unit Cost"
334
- elif "price_per_unit" in name.lower() or "net_price" in name.lower():
335
- return "Selling Price"
336
- elif "qty" in name.lower():
337
- return "Volume"
338
- elif "product_" in name.lower():
339
- return name.replace("product_", "Product: ")
340
- elif "channel_" in name.lower():
341
- return name.replace("channel_", "Channel: ")
342
- elif "region_" in name.lower():
343
- return name.replace("region_", "Region: ")
344
- return name
345
-
346
- # Top 10 drivers
347
- top_drivers = mean_abs.head(10)
348
- driver_names = [simplify_feature_name(f) for f in top_drivers.index]
349
-
350
- fig_drivers = go.Figure(go.Bar(
351
- y=driver_names[::-1],
352
- x=top_drivers.values[::-1],
353
- orientation='h',
354
- marker=dict(color=top_drivers.values[::-1], colorscale='Blues', showscale=False)
355
- ))
356
- fig_drivers.update_layout(
357
- title="Top 10 Profit Margin Drivers (Impact Strength)",
358
- xaxis_title="Impact on Gross Margin",
359
- yaxis_title="",
360
- height=400,
361
- showlegend=False
362
  )
363
- st.plotly_chart(fig_drivers, use_container_width=True)
364
 
365
- # Business interpretation
366
- st.markdown("""
367
- **What does this mean?**
368
- - **Higher bars** = Bigger impact on your gross margin
369
- - Focus your attention on the top 3-5 drivers for maximum profitability improvement
370
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
371
 
372
  st.markdown("---")
373
 
374
- # -----------------------------
375
- # SEGMENT PERFORMANCE
376
- # -----------------------------
377
- st.markdown("## πŸ“ Performance by Business Segment")
 
 
378
 
379
- st.markdown("Not all business segments perform equally. Here's where you're winning and where there's opportunity:")
 
 
380
 
381
- # Calculate segment performance
382
- key_feats = [c for c in joined.columns if any(k in c for k in ["discount", "price_per_unit", "cost_per_unit","unit_cost","net_price"])]
383
- grp = joined.groupby(["product","region","channel"]).mean(numeric_only=True)
384
- rank_cols = [c for c in grp.columns if c in key_feats]
385
- segment_impact = grp[rank_cols].sum(axis=1).sort_values()
386
 
387
- col1, col2 = st.columns(2)
 
 
 
 
 
 
 
388
 
389
- with col1:
390
- st.markdown("### πŸ”΄ Segments Dragging Margin Down")
391
- worst = segment_impact.head(8)
392
- worst_df = pd.DataFrame({
393
- 'Segment': [f"{p} β€’ {r} β€’ {c}" for p, r, c in worst.index],
394
- 'Margin Impact': worst.values
395
- })
396
- worst_df['Impact Score'] = worst_df['Margin Impact'].apply(lambda x: 'πŸ”΄' * min(5, int(abs(x)*10)))
397
- st.dataframe(worst_df[['Segment', 'Impact Score']], hide_index=True, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
398
 
399
- with col2:
400
- st.markdown("### 🟒 Segments Lifting Margin Up")
401
- best = segment_impact.tail(8).sort_values(ascending=False)
402
- best_df = pd.DataFrame({
403
- 'Segment': [f"{p} β€’ {r} β€’ {c}" for p, r, c in best.index],
404
- 'Margin Impact': best.values
405
- })
406
- best_df['Performance'] = best_df['Margin Impact'].apply(lambda x: '🟒' * min(5, max(1, int(x*10))))
407
- st.dataframe(best_df[['Segment', 'Performance']], hide_index=True, use_container_width=True)
408
 
409
- st.markdown("---")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
410
 
411
- # -----------------------------
412
- # WHAT-IF SIMULATOR
413
- # -----------------------------
414
- st.markdown("## 🎯 What-If Simulator: Test Your Strategies")
 
 
 
 
415
 
416
- st.markdown("""
417
- Use this simulator to model the financial impact of potential pricing or cost optimization strategies.
418
- Select a segment and adjust the levers to see the projected outcome.
419
- """)
420
 
421
- # Segment selector
422
- last_day = df["date"].max()
423
- seg_today = df[df["date"]==last_day][["product","region","channel"]].drop_duplicates().sort_values(["product","region","channel"])
424
- seg_options = seg_today.apply(lambda r: f"{r['product']} β€’ {r['region']} β€’ {r['channel']}", axis=1).tolist()
425
 
426
- col1, col2 = st.columns([2, 1])
 
 
 
 
 
427
 
428
- with col1:
429
- selected_segment = st.selectbox("**Select Business Segment:**", seg_options, key="segment_selector")
 
 
 
 
 
 
 
430
 
431
- with col2:
432
- st.markdown("**Scenario Type:**")
433
- scenario = st.radio("", ["Optimize Discount", "Reduce Costs", "Custom"], horizontal=True, label_visibility="collapsed")
434
-
435
- prod_sel, reg_sel, ch_sel = [s.strip() for s in selected_segment.split("β€’")]
436
- seg_hist = df[(df["product"]==prod_sel)&(df["region"]==reg_sel)&(df["channel"]==ch_sel)].sort_values("date")
437
- elasticity, _ = estimate_segment_elasticity(seg_hist, prod_sel, reg_sel, ch_sel)
438
-
439
- # Pre-set scenarios
440
- if scenario == "Optimize Discount":
441
- delta_disc = -2.0
442
- delta_cost = 0.0
443
- st.info("πŸ“‰ Testing a 2 percentage point discount reduction to improve margin")
444
- elif scenario == "Reduce Costs":
445
- delta_disc = 0.0
446
- delta_cost = -3.0
447
- st.info("πŸ’° Testing a $3 reduction in unit cost through operational efficiency")
448
- else:
449
- col1, col2 = st.columns(2)
450
- with col1:
451
- delta_disc = st.slider("Adjust Discount (percentage points)", -10.0, 10.0, -2.0, 0.5,
452
- help="Negative = tighter discount, Positive = deeper discount")
453
- with col2:
454
- delta_cost = st.slider("Adjust Unit Cost ($)", -10.0, 10.0, 0.0, 0.5,
455
- help="Negative = cost reduction, Positive = cost increase")
456
-
457
- # Run simulation
458
- sim_res = simulate_action(seg_hist, elasticity, delta_discount=delta_disc/100.0, delta_unit_cost=delta_cost)
459
-
460
- if sim_res:
461
- st.markdown("### πŸ“ˆ Projected Impact")
462
-
463
- # Results in clean columns
464
- metric_col1, metric_col2, metric_col3, metric_col4 = st.columns(4)
465
-
466
- with metric_col1:
467
- price_change = ((sim_res['new_price'] - sim_res['baseline_price']) / sim_res['baseline_price']) * 100
468
- st.metric("Price per Unit", f"${sim_res['new_price']:.2f}", f"{price_change:+.1f}%")
469
-
470
- with metric_col2:
471
- cost_change = ((sim_res['new_cost'] - sim_res['baseline_cost']) / sim_res['baseline_cost']) * 100
472
- st.metric("Cost per Unit", f"${sim_res['new_cost']:.2f}", f"{cost_change:+.1f}%")
473
-
474
- with metric_col3:
475
- qty_change = ((sim_res['new_qty'] - sim_res['baseline_qty']) / sim_res['baseline_qty']) * 100
476
- st.metric("Volume", f"{sim_res['new_qty']:.0f} units", f"{qty_change:+.1f}%")
477
-
478
- with metric_col4:
479
- gm_change = (sim_res['gm1_pct'] - sim_res['gm0_pct']) * 100
480
- st.metric("Gross Margin %", f"{sim_res['gm1_pct']*100:.1f}%", f"{gm_change:+.1f} pts")
481
-
482
- # Financial impact
483
- if sim_res['gm_delta_value'] > 0:
484
- st.markdown(f"""
485
- <div class="recommendation-card">
486
- <h4>βœ… Positive Impact: +${sim_res['gm_delta_value']:.2f} in daily gross profit</h4>
487
- <p>This strategy would <strong>improve profitability</strong> for this segment.
488
- Expected price elasticity of {elasticity:.2f} means volume {('decreases' if elasticity < 0 and delta_disc < 0 else 'adjusts')}
489
- as prices change, but margin improvement outweighs volume impact.</p>
490
- </div>
491
- """, unsafe_allow_html=True)
492
- else:
493
  st.markdown(f"""
494
- <div class="warning-card">
495
- <h4>⚠️ Negative Impact: ${sim_res['gm_delta_value']:.2f} in daily gross profit</h4>
496
- <p>This strategy would <strong>reduce profitability</strong> for this segment.
497
- Consider alternative approaches or test smaller adjustments.</p>
 
 
 
 
498
  </div>
499
  """, unsafe_allow_html=True)
500
 
501
- st.markdown("---")
502
 
503
- # -----------------------------
504
- # AI RECOMMENDATIONS
505
- # -----------------------------
506
- st.markdown("## πŸ’‘ AI-Powered Recommendations")
 
 
 
 
507
 
508
- st.markdown("""
509
- Based on the analysis of all segments, here are the top opportunities to improve profitability.
510
- These recommendations are ranked by expected financial impact.
511
- """)
512
-
513
- # Generate recommendations
514
- worst_keys = segment_impact.head(20).index.tolist()
515
- recs = []
516
- for p, r, c in worst_keys:
517
- hist = df[(df["product"]==p)&(df["region"]==r)&(df["channel"]==c)].sort_values("date")
518
- if hist.empty:
519
- continue
520
- eps, _ = estimate_segment_elasticity(hist, p, r, c)
521
- prop_disc_pts = -np.clip(abs(segment_impact[(p,r,c)])*10, 0.5, 3.0)
522
- sim = simulate_action(hist, eps, delta_discount=prop_disc_pts/100.0, delta_unit_cost=0.0)
523
- if sim and sim["gm_delta_value"] > 0:
524
- recs.append({
525
- "Segment": f"{p} β€’ {r} β€’ {c}",
526
- "Recommended Action": f"Reduce discount by {abs(prop_disc_pts):.1f}%",
527
- "Expected Daily Uplift": f"${sim['gm_delta_value']:.2f}",
528
- "New Margin %": f"{sim['gm1_pct']*100:.1f}%",
529
- "Risk Level": "Low" if abs(eps) < 0.5 else "Medium"
530
- })
531
-
532
- rec_df = pd.DataFrame(recs).sort_values("Expected Daily Uplift", ascending=False).head(10)
533
-
534
- if not rec_df.empty:
535
- st.dataframe(rec_df, hide_index=True, use_container_width=True)
536
-
537
- total_potential = rec_df["Expected Daily Uplift"].str.replace("$", "").astype(float).sum()
538
- st.success(f"🎯 **Total Daily Profit Opportunity: ${total_potential:.2f}** | Annualized: ${total_potential * 365:,.0f}")
539
-
540
- # Download button
541
- csv = rec_df.to_csv(index=False).encode('utf-8')
542
- st.download_button(
543
- label="πŸ“₯ Download Full Recommendations (CSV)",
544
- data=csv,
545
- file_name=f"profitability_recommendations_{datetime.now().strftime('%Y%m%d')}.csv",
546
- mime="text/csv"
547
- )
548
- else:
549
- st.info("No high-confidence recommendations available at this time. Current segment performance is well-optimized.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
550
 
551
  st.markdown("---")
552
-
553
- # Footer
554
- st.caption("πŸ”’ **Demo Environment** | Data shown is synthetic for demonstration. Connect to your SAP system for live insights.")
555
- st.caption(f"Model Performance: RΒ² = {metrics['r2']:.3f} | Analyzing {len(df):,} transactions across {len(df['product'].unique())} products")
 
 
 
 
3
  import pandas as pd
4
  import plotly.express as px
5
  import plotly.graph_objects as go
6
+ from plotly.subplots import make_subplots
7
  import shap
8
  import matplotlib.pyplot as plt
 
9
  from datetime import datetime, timedelta
10
  from sklearn.model_selection import train_test_split
11
  from sklearn.compose import ColumnTransformer
 
14
  from sklearn.ensemble import RandomForestRegressor
15
  from sklearn.linear_model import LinearRegression
16
  from sklearn.metrics import r2_score, mean_absolute_error
17
+ import warnings
18
+ warnings.filterwarnings('ignore')
19
+
20
+ # Enhanced page config
21
+ st.set_page_config(
22
+ page_title="Profitability Intelligence Suite",
23
+ page_icon="πŸ“Š",
24
+ layout="wide",
25
+ initial_sidebar_state="collapsed"
26
+ )
27
 
28
+ # Custom CSS for premium look
 
 
29
  st.markdown("""
30
  <style>
31
  .main-header {
32
+ font-size: 2.8rem;
33
  font-weight: 700;
34
  color: #1f77b4;
35
+ text-align: center;
36
  margin-bottom: 0.5rem;
37
+ text-shadow: 2px 2px 4px rgba(0,0,0,0.1);
38
  }
39
  .sub-header {
40
+ font-size: 1.2rem;
41
  color: #666;
42
+ text-align: center;
43
  margin-bottom: 2rem;
44
  }
45
  .insight-box {
46
+ background: #f8f9fa;
47
+ border-left: 5px solid #1f77b4;
48
  padding: 1.5rem;
 
 
49
  margin: 1rem 0;
50
+ border-radius: 8px;
51
+ box-shadow: 0 4px 8px rgba(0,0,0,0.05);
52
  }
53
+ .recommendation-card {
54
  background: white;
55
+ border: 2px solid #e9ecef;
56
+ border-radius: 12px;
57
  padding: 1.5rem;
58
+ margin: 1rem 0;
59
+ box-shadow: 0 4px 12px rgba(0,0,0,0.08);
60
+ transition: transform 0.2s;
61
  }
62
+ .recommendation-card:hover {
63
+ transform: translateY(-5px);
64
+ box-shadow: 0 8px 20px rgba(0,0,0,0.12);
 
 
 
65
  }
66
+ .positive-impact {
67
+ color: #28a745;
68
+ font-weight: 700;
69
+ font-size: 1.5rem;
70
+ }
71
+ .stTabs [data-baseweb="tab-list"] {
72
+ gap: 2rem;
73
+ }
74
+ .stTabs [data-baseweb="tab"] {
75
+ height: 3rem;
76
+ font-size: 1.1rem;
77
+ font-weight: 600;
78
  }
79
  </style>
80
  """, unsafe_allow_html=True)
81
 
82
  # -----------------------------
83
+ # Data Generation
84
  # -----------------------------
85
  @st.cache_data(show_spinner=False)
86
+ def generate_synthetic_data(days=60, seed=42, rows_per_day=600):
87
  rng = np.random.default_rng(seed)
88
  start_date = datetime.today().date() - timedelta(days=days)
89
  dates = pd.date_range(start_date, periods=days, freq="D")
 
90
  products = ["Premium Widget", "Standard Widget", "Economy Widget", "Deluxe Widget"]
91
+ regions = ["Americas", "EMEA", "Asia Pacific"]
92
  channels = ["Direct Sales", "Distribution Partners", "E-Commerce"]
93
 
94
  base_price = {"Premium Widget": 120, "Standard Widget": 135, "Economy Widget": 110, "Deluxe Widget": 150}
95
+ base_cost = {"Premium Widget": 70, "Standard Widget": 88, "Economy Widget": 60, "Deluxe Widget": 95}
96
+ region_price_bump = {"Americas": 1.00, "EMEA": 1.03, "Asia Pacific": 0.97}
97
+ region_cost_bump = {"Americas": 1.00, "EMEA": 1.02, "Asia Pacific": 1.01}
 
 
98
  channel_discount_mean = {"Direct Sales": 0.06, "Distribution Partners": 0.12, "E-Commerce": 0.04}
99
+ channel_discount_std = {"Direct Sales": 0.02, "Distribution Partners": 0.03, "E-Commerce": 0.02}
100
 
101
  seg_epsilon = {}
102
  for p in products:
 
117
 
118
  n = rows_per_day
119
  prod = rng.choice(products, size=n, p=[0.35, 0.3, 0.2, 0.15])
120
+ reg = rng.choice(regions, size=n, p=[0.4, 0.35, 0.25])
121
+ ch = rng.choice(channels, size=n, p=[0.45, 0.35, 0.20])
122
 
123
  base_p = np.array([base_price[x] for x in prod]) * np.array([region_price_bump[x] for x in reg])
124
+ base_c = np.array([base_cost[x] for x in prod]) * np.array([region_cost_bump[x] for x in reg])
125
 
126
  discount = np.clip(
127
  np.array([channel_discount_mean[x] for x in ch]) +
128
  rng.normal(0, [channel_discount_std[x] for x in ch]), 0, 0.45
129
  )
130
+
131
  list_price = rng.normal(base_p, 5)
132
  net_price = np.clip(list_price * (1 - discount), 20, None)
133
  unit_cost = np.clip(rng.normal(base_c, 4), 10, None)
 
138
  qty = np.maximum(1, rng.poisson(8 * dow_mult * macro * qty_mu))
139
 
140
  revenue = net_price * qty
141
+ cogs = unit_cost * qty
142
+ gm_val = revenue - cogs
143
+ gm_pct = np.where(revenue > 0, gm_val / revenue, 0.0)
144
 
145
  for i in range(n):
146
  records.append({
147
+ "date": d,
148
+ "product": prod[i],
149
+ "region": reg[i],
150
+ "channel": ch[i],
151
+ "list_price": float(list_price[i]),
152
+ "discount_pct": float(discount[i]),
153
+ "net_price": float(net_price[i]),
154
+ "unit_cost": float(unit_cost[i]),
155
+ "qty": int(qty[i]),
156
+ "revenue": float(revenue[i]),
157
+ "cogs": float(cogs[i]),
158
+ "gm_value": float(gm_val[i]),
159
+ "gm_pct": float(gm_pct[i]),
160
+ "dow": dow
161
  })
 
162
 
163
+ df = pd.DataFrame(records)
164
+ return df
165
+
166
+ @st.cache_data(show_spinner=False)
167
+ def build_features(_df):
168
+ df = _df.copy()
169
  feats_num = ["net_price", "unit_cost", "qty", "discount_pct", "list_price", "dow"]
170
  feats_cat = ["product", "region", "channel"]
 
171
  df = df.sort_values("date").copy()
172
  seg = ["product", "region", "channel"]
173
  df["price_per_unit"] = df["net_price"]
174
+ df["cost_per_unit"] = df["unit_cost"]
 
175
  df["roll7_qty"] = df.groupby(seg)["qty"].transform(lambda s: s.rolling(7, min_periods=1).median())
176
  df["roll7_price"] = df.groupby(seg)["price_per_unit"].transform(lambda s: s.rolling(7, min_periods=1).median())
177
+ df["roll7_cost"] = df.groupby(seg)["cost_per_unit"].transform(lambda s: s.rolling(7, min_periods=1).median())
 
178
  feats_num += ["price_per_unit", "cost_per_unit", "roll7_qty", "roll7_price", "roll7_cost"]
179
+ target = "gm_pct"
180
+ return df, feats_num, feats_cat, target
181
 
182
  @st.cache_resource(show_spinner=False)
183
+ def train_model(feats_num, feats_cat, target, _X, _y):
 
 
 
184
  pre = ColumnTransformer(
185
  transformers=[
186
  ("cat", OneHotEncoder(handle_unknown="ignore"), feats_cat),
187
  ("num", "passthrough", feats_num),
188
  ]
189
  )
190
+ model = RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42, n_jobs=-1)
191
  pipe = Pipeline([("pre", pre), ("rf", model)])
192
+ X_train, X_test, y_train, y_test = train_test_split(_X, _y, test_size=0.25, shuffle=False, random_state=42)
 
193
  pipe.fit(X_train, y_train)
194
  pred = pipe.predict(X_test)
195
+ r2 = r2_score(y_test, pred)
196
+ mae = mean_absolute_error(y_test, pred)
197
+ return pipe, {"r2": r2, "mae": mae}, X_test
198
 
199
+ @st.cache_data(show_spinner=False)
200
+ def compute_shap_values(_pipe, _X_sample, feats_num, feats_cat, shap_sample=500):
201
+ try:
202
+ np.random.seed(42)
203
+ # Get sample
204
+ X_sample = _X_sample.copy() if hasattr(_X_sample, 'copy') else pd.DataFrame(_X_sample)
205
 
206
+ if len(X_sample) > shap_sample:
207
+ sample_idx = np.random.choice(len(X_sample), size=shap_sample, replace=False)
208
+ X_sample = X_sample.iloc[sample_idx]
 
 
 
209
 
210
+ # Transform data
211
+ X_t = _pipe.named_steps["pre"].transform(X_sample)
212
+ if hasattr(X_t, 'toarray'):
213
+ X_t = X_t.toarray()
214
 
215
+ # Get feature names
216
+ cat_features = list(_pipe.named_steps["pre"].named_transformers_["cat"].get_feature_names_out(feats_cat))
217
+ feature_names = cat_features + feats_num
218
+
219
+ # Compute SHAP
220
+ explainer = shap.TreeExplainer(_pipe.named_steps["rf"])
221
+ shap_values = explainer.shap_values(X_t)
222
+
223
+ # Create DataFrame
224
+ shap_df = pd.DataFrame(shap_values, columns=feature_names)
225
 
226
+ return shap_df, X_sample.reset_index(drop=True), feature_names
227
+ except Exception as e:
228
+ st.error(f"Error computing SHAP: {str(e)}")
229
+ return None, None, None
 
 
230
 
231
+ def estimate_segment_elasticity(df, product, region, channel):
232
  seg_df = df[(df["product"]==product)&(df["region"]==region)&(df["channel"]==channel)]
233
  if len(seg_df) < 100 or seg_df["net_price"].std() < 1e-6 or seg_df["qty"].std() < 1e-6:
234
  return -0.5, False
235
+ try:
236
+ x = np.log(np.clip(seg_df["net_price"].values, 1e-6, None)).reshape(-1,1)
237
+ y = np.log(np.clip(seg_df["qty"].values, 1e-6, None))
238
+ lin = LinearRegression().fit(x, y)
239
+ return float(lin.coef_[0]), True
240
+ except:
241
+ return -0.5, False
242
 
243
+ def simulate_pricing_action(segment_df, elasticity, discount_reduction_pct):
244
  if segment_df.empty:
245
  return None
246
+ try:
247
+ base = segment_df.iloc[-1]
248
+ p0 = base["net_price"]
249
+ c0 = base["unit_cost"]
250
+ q0 = base["qty"]
251
+ d0 = base["discount_pct"]
252
+
253
+ new_discount = np.clip(d0 - (discount_reduction_pct/100), 0.0, 0.45)
254
+ p1 = max(0.01, base["list_price"] * (1 - new_discount))
255
+ c1 = c0
256
+
257
+ if p0 <= 0:
258
+ q1 = q0
259
+ else:
260
+ q1 = max(0.0, q0 * (p1 / p0) ** elasticity)
261
+
262
+ rev0 = p0 * q0
263
+ cogs0 = c0 * q0
264
+ rev1 = p1 * q1
265
+ cogs1 = c1 * q1
266
+
267
+ gm_delta_value = (rev1 - cogs1) - (rev0 - cogs0)
268
+ gm0_pct = (rev0 - cogs0)/rev0 if rev0>0 else 0.0
269
+ gm1_pct = (rev1 - cogs1)/rev1 if rev1>0 else 0.0
270
+
271
+ return {
272
+ "baseline_price": p0, "new_price": p1,
273
+ "baseline_cost": c0, "new_cost": c1,
274
+ "baseline_qty": q0, "new_qty": q1,
275
+ "baseline_discount": d0*100, "new_discount": new_discount*100,
276
+ "gm_delta_value": gm_delta_value,
277
+ "gm0_pct": gm0_pct, "gm1_pct": gm1_pct,
278
+ "revenue_delta": rev1 - rev0
279
+ }
280
+ except:
281
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
282
 
283
  # -----------------------------
284
+ # Main App
285
  # -----------------------------
 
286
 
287
+ # Header
288
+ st.markdown('<h1 class="main-header">🎯 Profitability Intelligence Suite</h1>', unsafe_allow_html=True)
289
+ st.markdown('<p class="sub-header">AI-Powered Margin Analysis & Strategic Recommendations</p>', unsafe_allow_html=True)
290
+
291
+ # Generate data
292
+ with st.spinner("πŸ”„ Loading business data..."):
293
+ df = generate_synthetic_data(days=60, seed=42, rows_per_day=600)
294
+ df_feat, feats_num, feats_cat, target = build_features(df)
295
+
296
+ # Calculate KPIs
297
+ daily = df.groupby("date").agg(
298
+ revenue=("revenue","sum"),
299
+ cogs=("cogs","sum"),
300
+ gm_value=("gm_value","sum")
301
+ ).reset_index()
302
  daily["gm_pct"] = np.where(daily["revenue"]>0, daily["gm_value"]/daily["revenue"], 0.0)
303
+
304
  today_row = daily.iloc[-1]
305
  yesterday_row = daily.iloc[-2] if len(daily) > 1 else today_row
306
+ week_ago_row = daily.iloc[-8] if len(daily) > 7 else today_row
307
  roll7 = daily["gm_pct"].tail(7).mean()
308
+
309
+ # Executive Dashboard Section
310
+ st.markdown("### πŸ“Š Executive Performance Dashboard")
311
 
312
  col1, col2, col3, col4 = st.columns(4)
313
 
314
  with col1:
315
+ delta_gm = (today_row["gm_pct"] - yesterday_row["gm_pct"]) * 100
316
+ st.metric(
317
+ label="Gross Margin %",
318
+ value=f"{today_row['gm_pct']*100:.1f}%",
319
+ delta=f"{delta_gm:+.2f}pp vs yesterday",
320
+ delta_color="normal"
321
+ )
322
 
323
  with col2:
324
+ delta_rev = ((today_row["revenue"] - yesterday_row["revenue"]) / yesterday_row["revenue"] * 100) if yesterday_row["revenue"] > 0 else 0
325
+ st.metric(
326
+ label="Revenue (Today)",
327
+ value=f"${today_row['revenue']/1e6:.2f}M",
328
+ delta=f"{delta_rev:+.1f}% DoD",
329
+ delta_color="normal"
330
+ )
331
 
332
  with col3:
333
+ st.metric(
334
+ label="Gross Margin $ (Today)",
335
+ value=f"${today_row['gm_value']/1e6:.2f}M",
336
+ delta=f"${(today_row['gm_value'] - yesterday_row['gm_value'])/1e6:+.2f}M",
337
+ delta_color="normal"
338
+ )
339
 
340
  with col4:
341
+ avg_gm_vs_week = (today_row["gm_pct"] - week_ago_row["gm_pct"]) * 100
342
+ st.metric(
343
+ label="7-Day Avg GM%",
344
+ value=f"{roll7*100:.1f}%",
345
+ delta=f"{avg_gm_vs_week:+.2f}pp WoW",
346
+ delta_color="normal"
347
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
 
349
+ # Trend visualization
350
+ st.markdown("#### πŸ“ˆ Performance Trend Analysis")
351
 
352
+ fig_trends = make_subplots(
353
+ rows=1, cols=2,
354
+ subplot_titles=("Gross Margin % Trend", "Revenue & Margin $ Trend"),
355
+ specs=[[{"secondary_y": False}, {"secondary_y": True}]]
356
+ )
357
 
358
+ fig_trends.add_trace(
359
+ go.Scatter(
360
+ x=daily["date"],
361
+ y=daily["gm_pct"]*100,
362
+ name="GM%",
363
+ line=dict(color="#1f77b4", width=3),
364
+ fill='tozeroy',
365
+ fillcolor="rgba(31, 119, 180, 0.1)"
366
+ ),
367
+ row=1, col=1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
368
  )
 
369
 
370
+ fig_trends.add_trace(
371
+ go.Scatter(
372
+ x=daily["date"],
373
+ y=daily["revenue"]/1e6,
374
+ name="Revenue",
375
+ line=dict(color="#2ca02c", width=2)
376
+ ),
377
+ row=1, col=2
378
+ )
379
+
380
+ fig_trends.add_trace(
381
+ go.Scatter(
382
+ x=daily["date"],
383
+ y=daily["gm_value"]/1e6,
384
+ name="GM Value",
385
+ line=dict(color="#ff7f0e", width=2, dash="dash")
386
+ ),
387
+ row=1, col=2, secondary_y=True
388
+ )
389
+
390
+ fig_trends.update_xaxes(title_text="Date", row=1, col=1)
391
+ fig_trends.update_xaxes(title_text="Date", row=1, col=2)
392
+ fig_trends.update_yaxes(title_text="Gross Margin %", row=1, col=1)
393
+ fig_trends.update_yaxes(title_text="Revenue ($M)", row=1, col=2)
394
+ fig_trends.update_yaxes(title_text="GM Value ($M)", row=1, col=2, secondary_y=True)
395
+
396
+ fig_trends.update_layout(height=400, showlegend=True, hovermode="x unified")
397
+ st.plotly_chart(fig_trends, use_container_width=True)
398
 
399
  st.markdown("---")
400
 
401
+ # Train model
402
+ with st.spinner("πŸ€– Training AI model..."):
403
+ X = df_feat[feats_num + feats_cat].copy()
404
+ y = df_feat[target].copy()
405
+ pipe, metrics, X_test = train_model(feats_num, feats_cat, target, X, y)
406
+ st.success(f"βœ… Model trained: RΒ² = {metrics['r2']:.3f}, MAE = {metrics['mae']:.4f}")
407
 
408
+ # Compute SHAP once for all tabs
409
+ with st.spinner("πŸ”¬ Analyzing profitability drivers..."):
410
+ shap_df, X_test_sample, feature_names = compute_shap_values(pipe, X_test, feats_num, feats_cat, shap_sample=500)
411
 
412
+ # Tabs for different sections
413
+ tab1, tab2, tab3 = st.tabs(["πŸ” Key Drivers Analysis", "🎯 Strategic Recommendations", "πŸ§ͺ What-If Simulator"])
 
 
 
414
 
415
+ with tab1:
416
+ st.markdown("### Understanding What Drives Your Profitability")
417
+ st.markdown("""
418
+ <div class="insight-box">
419
+ <b>πŸŽ“ Business Insight:</b> This analysis reveals which business factors have the strongest impact on gross margin.
420
+ Understanding these drivers helps prioritize strategic initiatives and operational improvements.
421
+ </div>
422
+ """, unsafe_allow_html=True)
423
 
424
+ if shap_df is not None and X_test_sample is not None:
425
+ # Calculate mean absolute SHAP
426
+ mean_abs = shap_df.abs().mean().sort_values(ascending=False)
427
+
428
+ # Map technical names to business names
429
+ business_name_map = {
430
+ "discount_pct": "Discount Level",
431
+ "unit_cost": "Unit Cost",
432
+ "net_price": "Net Selling Price",
433
+ "list_price": "List Price",
434
+ "qty": "Order Quantity",
435
+ "price_per_unit": "Price per Unit",
436
+ "cost_per_unit": "Cost per Unit",
437
+ "roll7_qty": "7-Day Avg Quantity",
438
+ "roll7_price": "7-Day Avg Price",
439
+ "roll7_cost": "7-Day Avg Cost",
440
+ "dow": "Day of Week"
441
+ }
442
+
443
+ # Get top drivers with business names
444
+ top_drivers = []
445
+ for feat, val in mean_abs.head(10).items():
446
+ bus_name = feat
447
+ for key, name in business_name_map.items():
448
+ if key in feat.lower():
449
+ bus_name = name
450
+ break
451
+ if feat.startswith("cat__"):
452
+ parts = feat.replace("cat__", "").replace("product_", "").replace("region_", "").replace("channel_", "")
453
+ if "product" in feat.lower():
454
+ bus_name = f"Product: {parts}"
455
+ elif "region" in feat.lower():
456
+ bus_name = f"Region: {parts}"
457
+ elif "channel" in feat.lower():
458
+ bus_name = f"Channel: {parts}"
459
+ top_drivers.append({"Driver": bus_name, "Impact Score": val})
460
+
461
+ drivers_df = pd.DataFrame(top_drivers)
462
+
463
+ col_a, col_b = st.columns([1, 1])
464
+
465
+ with col_a:
466
+ st.markdown("#### Top 10 Profitability Drivers")
467
+
468
+ fig_drivers = go.Figure()
469
+ fig_drivers.add_trace(go.Bar(
470
+ y=drivers_df["Driver"][::-1],
471
+ x=drivers_df["Impact Score"][::-1],
472
+ orientation='h',
473
+ marker=dict(
474
+ color=drivers_df["Impact Score"][::-1],
475
+ colorscale='Blues',
476
+ line=dict(color='rgb(8,48,107)', width=1.5)
477
+ ),
478
+ text=[f"{v:.4f}" for v in drivers_df["Impact Score"][::-1]],
479
+ textposition='outside',
480
+ ))
481
+
482
+ fig_drivers.update_layout(
483
+ title="Ranked by Average Impact on Gross Margin",
484
+ xaxis_title="Impact Score",
485
+ yaxis_title="",
486
+ height=500,
487
+ showlegend=False
488
+ )
489
+ st.plotly_chart(fig_drivers, use_container_width=True)
490
+
491
+ with col_b:
492
+ st.markdown("#### Key Insights")
493
+
494
+ top_3 = drivers_df.head(3)
495
+
496
+ st.markdown(f"""
497
+ <div class="insight-box">
498
+ <b>πŸ₯‡ Primary Driver:</b> {top_3.iloc[0]['Driver']}<br>
499
+ <small>Impact Score: {top_3.iloc[0]['Impact Score']:.4f}</small>
500
+ </div>
501
+ """, unsafe_allow_html=True)
502
+
503
+ st.markdown(f"""
504
+ <div class="insight-box">
505
+ <b>πŸ₯ˆ Secondary Driver:</b> {top_3.iloc[1]['Driver']}<br>
506
+ <small>Impact Score: {top_3.iloc[1]['Impact Score']:.4f}</small>
507
+ </div>
508
+ """, unsafe_allow_html=True)
509
+
510
+ st.markdown(f"""
511
+ <div class="insight-box">
512
+ <b>πŸ₯‰ Tertiary Driver:</b> {top_3.iloc[2]['Driver']}<br>
513
+ <small>Impact Score: {top_3.iloc[2]['Impact Score']:.4f}</small>
514
+ </div>
515
+ """, unsafe_allow_html=True)
516
+
517
+ st.markdown("#### Segment Performance")
518
+
519
+ # Safely join SHAP with original data
520
+ try:
521
+ cat_cols = ["product", "region", "channel"]
522
+ joined = pd.concat([X_test_sample[cat_cols].reset_index(drop=True),
523
+ shap_df.reset_index(drop=True)], axis=1)
524
+
525
+ grp = joined.groupby(cat_cols, as_index=False).mean(numeric_only=True)
526
+ key_shap_cols = [c for c in shap_df.columns if c in grp.columns]
527
+ grp["net_impact"] = grp[key_shap_cols].sum(axis=1)
528
+
529
+ top_negative = grp.nsmallest(5, "net_impact")
530
+ top_positive = grp.nlargest(5, "net_impact")
531
+
532
+ st.markdown("**⚠️ Segments Reducing Margin:**")
533
+ for _, row in top_negative.head(3).iterrows():
534
+ st.markdown(f"β€’ **{row['product']}** β€’ {row['region']} β€’ {row['channel']} *(Impact: {row['net_impact']:.4f})*")
535
+
536
+ st.markdown("**βœ… Segments Boosting Margin:**")
537
+ for _, row in top_positive.head(3).iterrows():
538
+ st.markdown(f"β€’ **{row['product']}** β€’ {row['region']} β€’ {row['channel']} *(Impact: {row['net_impact']:.4f})*")
539
+ except Exception as e:
540
+ st.warning(f"Unable to compute segment analysis: {str(e)}")
541
+ else:
542
+ st.error("Unable to compute driver analysis. Please check your data.")
543
 
544
+ with tab2:
545
+ st.markdown("### AI-Generated Strategic Recommendations")
546
+ st.markdown("""
547
+ <div class="insight-box">
548
+ <b>πŸ’‘ How This Works:</b> The AI identifies segments with margin pressure and suggests specific pricing actions
549
+ to improve profitability. Recommendations are ranked by expected financial impact.
550
+ </div>
551
+ """, unsafe_allow_html=True)
 
552
 
553
+ if shap_df is not None and X_test_sample is not None:
554
+ with st.spinner("🧠 Generating strategic recommendations..."):
555
+ try:
556
+ joined = pd.concat([X_test_sample.reset_index(drop=True), shap_df.reset_index(drop=True)], axis=1)
557
+ joined["key"] = joined["product"] + "|" + joined["region"] + "|" + joined["channel"]
558
+
559
+ cand_cols = [c for c in joined.columns if ("discount" in c.lower() or "cost" in c.lower() or "price" in c.lower()) and c in shap_df.columns]
560
+ seg_scores = joined.groupby("key")[cand_cols].mean().sum(axis=1)
561
+ worst_keys = seg_scores.sort_values().head(15).index.tolist()
562
+
563
+ recs = []
564
+ for key in worst_keys:
565
+ p, r, c = key.split("|")
566
+ hist = df[(df["product"]==p)&(df["region"]==r)&(df["channel"]==c)].sort_values("date")
567
+ if hist.empty or len(hist) < 50:
568
+ continue
569
+
570
+ eps, _ = estimate_segment_elasticity(hist, p, r, c)
571
+ prop_disc_pts = np.clip(abs(seg_scores[key])*10, 1.0, 3.0)
572
+ sim = simulate_pricing_action(hist, eps, prop_disc_pts)
573
+
574
+ if sim is None or sim["gm_delta_value"] <= 0:
575
+ continue
576
+
577
+ daily_transactions = len(hist) / ((hist["date"].max() - hist["date"].min()).days + 1)
578
+ annual_impact = sim["gm_delta_value"] * daily_transactions * 365
579
+
580
+ recs.append({
581
+ "Segment": f"{p}",
582
+ "Region": r,
583
+ "Channel": c,
584
+ "Current Discount": f"{sim['baseline_discount']:.1f}%",
585
+ "Recommended Discount": f"{sim['new_discount']:.1f}%",
586
+ "Expected GM Uplift": sim["gm_delta_value"],
587
+ "Annual Impact Estimate": annual_impact,
588
+ "Current GM%": sim["gm0_pct"]*100,
589
+ "Projected GM%": sim["gm1_pct"]*100
590
+ })
591
+
592
+ recs_df = pd.DataFrame(recs).sort_values("Expected GM Uplift", ascending=False)
593
+
594
+ if len(recs_df) > 0:
595
+ st.markdown("#### πŸ† Top 3 Priority Actions")
596
+
597
+ for i, (idx, rec) in enumerate(recs_df.head(3).iterrows()):
598
+ st.markdown(f"""
599
+ <div class="recommendation-card">
600
+ <h4>#{i+1}: {rec['Segment']} β€’ {rec['Region']} β€’ {rec['Channel']}</h4>
601
+ <p style="font-size: 1.1rem; margin: 0.5rem 0;">
602
+ <b>Recommended Action:</b> Reduce discount from <b>{rec['Current Discount']}</b> to <b>{rec['Recommended Discount']}</b>
603
+ </p>
604
+ <p style="font-size: 1rem; color: #666; margin: 0.5rem 0;">
605
+ Current GM: <b>{rec['Current GM%']:.1f}%</b> β†’ Projected GM: <b style="color: #28a745;">{rec['Projected GM%']:.1f}%</b>
606
+ </p>
607
+ <p class="positive-impact">
608
+ πŸ’° Expected Daily Impact: ${rec['Expected GM Uplift']:.2f}
609
+ </p>
610
+ <p style="font-size: 0.95rem; color: #666;">
611
+ πŸ“Š Estimated Annual Impact: <b>${rec['Annual Impact Estimate']/1e3:.1f}K</b>
612
+ </p>
613
+ </div>
614
+ """, unsafe_allow_html=True)
615
+
616
+ st.markdown("---")
617
+ st.markdown("#### πŸ“‹ Complete Recommendations List")
618
+ st.dataframe(recs_df, use_container_width=True, height=400)
619
+
620
+ total_daily_impact = recs_df["Expected GM Uplift"].sum()
621
+ total_annual_impact = recs_df["Annual Impact Estimate"].sum()
622
+
623
+ st.markdown(f"""
624
+ <div class="insight-box" style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border: none;">
625
+ <h3 style="color: white; margin-top: 0;">πŸ’Ž Total Opportunity</h3>
626
+ <p style="font-size: 1.3rem; margin: 0.5rem 0;">
627
+ <b>Daily GM Impact:</b> ${total_daily_impact:.2f}
628
+ </p>
629
+ <p style="font-size: 1.6rem; margin: 0.5rem 0;">
630
+ <b>Estimated Annual Impact:</b> ${total_annual_impact/1e6:.2f}M
631
+ </p>
632
+ </div>
633
+ """, unsafe_allow_html=True)
634
+ else:
635
+ st.info("No significant optimization opportunities detected in current data.")
636
+ except Exception as e:
637
+ st.error(f"Error generating recommendations: {str(e)}")
638
+ else:
639
+ st.error("Unable to generate recommendations. Please check your data.")
640
 
641
+ with tab3:
642
+ st.markdown("### Custom What-If Analysis")
643
+ st.markdown("""
644
+ <div class="insight-box">
645
+ <b>πŸ§ͺ Interactive Simulation:</b> Test different pricing strategies for specific segments to understand
646
+ the potential impact on revenue, volume, and profitability.
647
+ </div>
648
+ """, unsafe_allow_html=True)
649
 
650
+ last_day = df["date"].max()
651
+ seg_today = df[df["date"]==last_day][["product","region","channel"]].drop_duplicates()
 
 
652
 
653
+ col_sim1, col_sim2, col_sim3 = st.columns(3)
 
 
 
654
 
655
+ with col_sim1:
656
+ selected_product = st.selectbox("πŸ“¦ Select Product", sorted(seg_today["product"].unique()))
657
+ with col_sim2:
658
+ selected_region = st.selectbox("🌍 Select Region", sorted(seg_today["region"].unique()))
659
+ with col_sim3:
660
+ selected_channel = st.selectbox("πŸ›’ Select Channel", sorted(seg_today["channel"].unique()))
661
 
662
+ seg_hist = df[
663
+ (df["product"]==selected_product) &
664
+ (df["region"]==selected_region) &
665
+ (df["channel"]==selected_channel)
666
+ ].sort_values("date")
667
+
668
+ if not seg_hist.empty and len(seg_hist) >= 50:
669
+ elasticity, _ = estimate_segment_elasticity(seg_hist, selected_product, selected_region, selected_channel)
670
+ current = seg_hist.iloc[-1]
671
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
672
  st.markdown(f"""
673
+ <div class="insight-box">
674
+ <b>πŸ“Š Current State:</b><br>
675
+ β€’ Current Discount: <b>{current['discount_pct']*100:.1f}%</b><br>
676
+ β€’ Net Price: <b>${current['net_price']:.2f}</b><br>
677
+ β€’ Unit Cost: <b>${current['unit_cost']:.2f}</b><br>
678
+ β€’ Avg Daily Volume: <b>{seg_hist.tail(7)['qty'].mean():.0f} units</b><br>
679
+ β€’ Current GM%: <b>{current['gm_pct']*100:.1f}%</b><br>
680
+ β€’ Price Elasticity: <b>{elasticity:.2f}</b>
681
  </div>
682
  """, unsafe_allow_html=True)
683
 
684
+ st.markdown("#### 🎯 Test Pricing Strategy")
685
 
686
+ discount_change = st.slider(
687
+ "Adjust Discount Level (percentage points)",
688
+ min_value=-10.0,
689
+ max_value=5.0,
690
+ value=0.0,
691
+ step=0.5,
692
+ help="Negative values reduce discount (increase price)"
693
+ )
694
 
695
+ if discount_change != 0:
696
+ sim = simulate_pricing_action(seg_hist, elasticity, -discount_change)
697
+
698
+ if sim:
699
+ col_res1, col_res2 = st.columns(2)
700
+
701
+ with col_res1:
702
+ comparison_data = pd.DataFrame({
703
+ 'Metric': ['Price', 'Volume', 'GM%'],
704
+ 'Current': [sim['baseline_price'], sim['baseline_qty'], sim['gm0_pct']*100],
705
+ 'Projected': [sim['new_price'], sim['new_qty'], sim['gm1_pct']*100]
706
+ })
707
+
708
+ fig_comp = go.Figure()
709
+ fig_comp.add_trace(go.Bar(
710
+ name='Current',
711
+ x=comparison_data['Metric'],
712
+ y=comparison_data['Current'],
713
+ marker_color='#94a3b8'
714
+ ))
715
+ fig_comp.add_trace(go.Bar(
716
+ name='Projected',
717
+ x=comparison_data['Metric'],
718
+ y=comparison_data['Projected'],
719
+ marker_color='#3b82f6'
720
+ ))
721
+
722
+ fig_comp.update_layout(
723
+ title="Current vs. Projected Performance",
724
+ barmode='group',
725
+ height=350
726
+ )
727
+ st.plotly_chart(fig_comp, use_container_width=True)
728
+
729
+ with col_res2:
730
+ st.markdown("#### πŸ“ˆ Simulation Results")
731
+
732
+ gm_change = sim['gm1_pct'] - sim['gm0_pct']
733
+
734
+ st.metric(
735
+ "Gross Margin Impact",
736
+ f"{sim['gm1_pct']*100:.1f}%",
737
+ f"{gm_change*100:+.1f}pp"
738
+ )
739
+
740
+ st.metric(
741
+ "Revenue Impact",
742
+ f"${sim['new_price'] * sim['new_qty']:.2f}",
743
+ f"${sim['revenue_delta']:+.2f}"
744
+ )
745
+
746
+ vol_change = sim['new_qty'] - sim['baseline_qty']
747
+ st.metric(
748
+ "Volume Impact",
749
+ f"{sim['new_qty']:.0f} units",
750
+ f"{vol_change:+.0f} units"
751
+ )
752
+
753
+ st.markdown(f"""
754
+ <div class="insight-box" style="margin-top: 1rem;">
755
+ <b>πŸ’° Daily P&L Impact:</b><br>
756
+ <span style="font-size: 1.5rem; {'color: #28a745' if sim['gm_delta_value'] > 0 else 'color: #dc3545'}">
757
+ ${sim['gm_delta_value']:+.2f}
758
+ </span>
759
+ </div>
760
+ """, unsafe_allow_html=True)
761
+ else:
762
+ st.info("πŸ‘† Adjust the discount slider above to simulate different pricing strategies")
763
+ else:
764
+ st.warning("⚠️ Insufficient data for selected segment. Please choose a different combination.")
765
 
766
  st.markdown("---")
767
+ st.markdown("""
768
+ <div style="text-align: center; color: #666; padding: 2rem 0;">
769
+ <small>
770
+ πŸ”’ Demo Mode: Using synthetic SAP-style data for illustration purposes
771
+ </small>
772
+ </div>
773
+ """, unsafe_allow_html=True)