PD03 commited on
Commit
a85e11a
Β·
verified Β·
1 Parent(s): 4a5ff6e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +161 -22
app.py CHANGED
@@ -50,6 +50,14 @@ st.markdown("""
50
  border-radius: 8px;
51
  box-shadow: 0 4px 8px rgba(0,0,0,0.05);
52
  }
 
 
 
 
 
 
 
 
53
  .recommendation-card {
54
  background: white;
55
  border: 2px solid #e9ecef;
@@ -166,16 +174,19 @@ def generate_synthetic_data(days=60, seed=42, rows_per_day=600):
166
  @st.cache_data(show_spinner=False)
167
  def build_features(_df):
168
  df = _df.copy()
 
169
  feats_num = ["net_price", "unit_cost", "qty", "discount_pct", "list_price", "dow"]
170
  feats_cat = ["product", "region", "channel"]
171
  df = df.sort_values("date").copy()
172
  seg = ["product", "region", "channel"]
173
- df["price_per_unit"] = df["net_price"]
174
- df["cost_per_unit"] = df["unit_cost"]
175
  df["roll7_qty"] = df.groupby(seg)["qty"].transform(lambda s: s.rolling(7, min_periods=1).median())
176
- df["roll7_price"] = df.groupby(seg)["price_per_unit"].transform(lambda s: s.rolling(7, min_periods=1).median())
177
- df["roll7_cost"] = df.groupby(seg)["cost_per_unit"].transform(lambda s: s.rolling(7, min_periods=1).median())
178
- feats_num += ["price_per_unit", "cost_per_unit", "roll7_qty", "roll7_price", "roll7_cost"]
 
 
179
  target = "gm_pct"
180
  return df, feats_num, feats_cat, target
181
 
@@ -200,33 +211,31 @@ def train_model(feats_num, feats_cat, target, _X, _y):
200
  def compute_shap_values(_pipe, _X_sample, feats_num, feats_cat, shap_sample=500):
201
  try:
202
  np.random.seed(42)
203
- # Get sample
204
  X_sample = _X_sample.copy() if hasattr(_X_sample, 'copy') else pd.DataFrame(_X_sample)
205
 
206
  if len(X_sample) > shap_sample:
207
  sample_idx = np.random.choice(len(X_sample), size=shap_sample, replace=False)
208
  X_sample = X_sample.iloc[sample_idx]
209
 
210
- # Transform data
211
  X_t = _pipe.named_steps["pre"].transform(X_sample)
212
  if hasattr(X_t, 'toarray'):
213
  X_t = X_t.toarray()
214
 
215
- # Get feature names
216
  cat_features = list(_pipe.named_steps["pre"].named_transformers_["cat"].get_feature_names_out(feats_cat))
217
  feature_names = cat_features + feats_num
218
 
219
- # Compute SHAP
220
  explainer = shap.TreeExplainer(_pipe.named_steps["rf"])
221
  shap_values = explainer.shap_values(X_t)
222
 
223
- # Create DataFrame
 
 
224
  shap_df = pd.DataFrame(shap_values, columns=feature_names)
225
 
226
- return shap_df, X_sample.reset_index(drop=True), feature_names
227
  except Exception as e:
228
  st.error(f"Error computing SHAP: {str(e)}")
229
- return None, None, None
230
 
231
  def estimate_segment_elasticity(df, product, region, channel):
232
  seg_df = df[(df["product"]==product)&(df["region"]==region)&(df["channel"]==channel)]
@@ -280,11 +289,81 @@ def simulate_pricing_action(segment_df, elasticity, discount_reduction_pct):
280
  except:
281
  return None
282
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  # -----------------------------
284
  # Main App
285
  # -----------------------------
286
 
287
- # Header
288
  st.markdown('<h1 class="main-header">🎯 Profitability Intelligence Suite</h1>', unsafe_allow_html=True)
289
  st.markdown('<p class="sub-header">AI-Powered Margin Analysis & Strategic Recommendations</p>', unsafe_allow_html=True)
290
 
@@ -406,8 +485,8 @@ with st.spinner("πŸ€– Training AI model..."):
406
  st.success(f"βœ… Model trained: RΒ² = {metrics['r2']:.3f}, MAE = {metrics['mae']:.4f}")
407
 
408
  # Compute SHAP once for all tabs
409
- with st.spinner("πŸ”¬ Analyzing profitability drivers..."):
410
- shap_df, X_test_sample, feature_names = compute_shap_values(pipe, X_test, feats_num, feats_cat, shap_sample=500)
411
 
412
  # Tabs for different sections
413
  tab1, tab2, tab3 = st.tabs(["πŸ” Key Drivers Analysis", "🎯 Strategic Recommendations", "πŸ§ͺ What-If Simulator"])
@@ -416,8 +495,8 @@ with tab1:
416
  st.markdown("### Understanding What Drives Your Profitability")
417
  st.markdown("""
418
  <div class="insight-box">
419
- <b>πŸŽ“ Business Insight:</b> This analysis reveals which business factors have the strongest impact on gross margin.
420
- Understanding these drivers helps prioritize strategic initiatives and operational improvements.
421
  </div>
422
  """, unsafe_allow_html=True)
423
 
@@ -432,8 +511,6 @@ with tab1:
432
  "net_price": "Net Selling Price",
433
  "list_price": "List Price",
434
  "qty": "Order Quantity",
435
- "price_per_unit": "Price per Unit",
436
- "cost_per_unit": "Cost per Unit",
437
  "roll7_qty": "7-Day Avg Quantity",
438
  "roll7_price": "7-Day Avg Price",
439
  "roll7_cost": "7-Day Avg Cost",
@@ -445,7 +522,7 @@ with tab1:
445
  for feat, val in mean_abs.head(10).items():
446
  bus_name = feat
447
  for key, name in business_name_map.items():
448
- if key in feat.lower():
449
  bus_name = name
450
  break
451
  if feat.startswith("cat__"):
@@ -514,9 +591,8 @@ with tab1:
514
  </div>
515
  """, unsafe_allow_html=True)
516
 
517
- st.markdown("#### Segment Performance")
518
 
519
- # Safely join SHAP with original data
520
  try:
521
  cat_cols = ["product", "region", "channel"]
522
  joined = pd.concat([X_test_sample[cat_cols].reset_index(drop=True),
@@ -538,6 +614,69 @@ with tab1:
538
  st.markdown(f"β€’ **{row['product']}** β€’ {row['region']} β€’ {row['channel']} *(Impact: {row['net_impact']:.4f})*")
539
  except Exception as e:
540
  st.warning(f"Unable to compute segment analysis: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
541
  else:
542
  st.error("Unable to compute driver analysis. Please check your data.")
543
 
 
50
  border-radius: 8px;
51
  box-shadow: 0 4px 8px rgba(0,0,0,0.05);
52
  }
53
+ .warning-box {
54
+ background: #fff3cd;
55
+ border-left: 5px solid #ffc107;
56
+ padding: 1.5rem;
57
+ margin: 1rem 0;
58
+ border-radius: 8px;
59
+ box-shadow: 0 4px 8px rgba(0,0,0,0.05);
60
+ }
61
  .recommendation-card {
62
  background: white;
63
  border: 2px solid #e9ecef;
 
174
  @st.cache_data(show_spinner=False)
175
  def build_features(_df):
176
  df = _df.copy()
177
+ # FIXED: Remove duplicate features - keep only one of unit_cost/cost_per_unit and net_price/price_per_unit
178
  feats_num = ["net_price", "unit_cost", "qty", "discount_pct", "list_price", "dow"]
179
  feats_cat = ["product", "region", "channel"]
180
  df = df.sort_values("date").copy()
181
  seg = ["product", "region", "channel"]
182
+
183
+ # Add rolling features
184
  df["roll7_qty"] = df.groupby(seg)["qty"].transform(lambda s: s.rolling(7, min_periods=1).median())
185
+ df["roll7_price"] = df.groupby(seg)["net_price"].transform(lambda s: s.rolling(7, min_periods=1).median())
186
+ df["roll7_cost"] = df.groupby(seg)["unit_cost"].transform(lambda s: s.rolling(7, min_periods=1).median())
187
+
188
+ # Add these to numeric features
189
+ feats_num += ["roll7_qty", "roll7_price", "roll7_cost"]
190
  target = "gm_pct"
191
  return df, feats_num, feats_cat, target
192
 
 
211
  def compute_shap_values(_pipe, _X_sample, feats_num, feats_cat, shap_sample=500):
212
  try:
213
  np.random.seed(42)
 
214
  X_sample = _X_sample.copy() if hasattr(_X_sample, 'copy') else pd.DataFrame(_X_sample)
215
 
216
  if len(X_sample) > shap_sample:
217
  sample_idx = np.random.choice(len(X_sample), size=shap_sample, replace=False)
218
  X_sample = X_sample.iloc[sample_idx]
219
 
 
220
  X_t = _pipe.named_steps["pre"].transform(X_sample)
221
  if hasattr(X_t, 'toarray'):
222
  X_t = X_t.toarray()
223
 
 
224
  cat_features = list(_pipe.named_steps["pre"].named_transformers_["cat"].get_feature_names_out(feats_cat))
225
  feature_names = cat_features + feats_num
226
 
 
227
  explainer = shap.TreeExplainer(_pipe.named_steps["rf"])
228
  shap_values = explainer.shap_values(X_t)
229
 
230
+ # ADDED: Compute SHAP interaction values for deeper insights
231
+ shap_interaction_values = explainer.shap_interaction_values(X_t)
232
+
233
  shap_df = pd.DataFrame(shap_values, columns=feature_names)
234
 
235
+ return shap_df, shap_interaction_values, X_sample.reset_index(drop=True), feature_names
236
  except Exception as e:
237
  st.error(f"Error computing SHAP: {str(e)}")
238
+ return None, None, None, None
239
 
240
  def estimate_segment_elasticity(df, product, region, channel):
241
  seg_df = df[(df["product"]==product)&(df["region"]==region)&(df["channel"]==channel)]
 
289
  except:
290
  return None
291
 
292
+ def analyze_shap_interactions(shap_interaction_values, X_sample, feature_names, feats_cat):
293
+ """Analyze SHAP interaction effects to find problematic combinations"""
294
+ try:
295
+ # Get categorical feature indices in the transformed feature space
296
+ cat_feature_indices = {}
297
+ for cat in feats_cat:
298
+ cat_feature_indices[cat] = [i for i, fname in enumerate(feature_names) if fname.startswith(f"cat__{cat}_")]
299
+
300
+ # Find strongest interactions for each sample
301
+ interaction_insights = []
302
+
303
+ for sample_idx in range(min(100, len(X_sample))): # Analyze first 100 samples
304
+ interaction_matrix = shap_interaction_values[sample_idx]
305
+
306
+ # Get the sample's categorical values
307
+ sample_product = X_sample.iloc[sample_idx]["product"]
308
+ sample_region = X_sample.iloc[sample_idx]["region"]
309
+ sample_channel = X_sample.iloc[sample_idx]["channel"]
310
+
311
+ # Find product feature index
312
+ prod_idx = [i for i, fname in enumerate(feature_names) if f"product_{sample_product}" in fname]
313
+ reg_idx = [i for i, fname in enumerate(feature_names) if f"region_{sample_region}" in fname]
314
+ chan_idx = [i for i, fname in enumerate(feature_names) if f"channel_{sample_channel}" in fname]
315
+
316
+ if prod_idx and reg_idx:
317
+ prod_reg_interaction = interaction_matrix[prod_idx[0], reg_idx[0]]
318
+ if abs(prod_reg_interaction) > 0.001:
319
+ interaction_insights.append({
320
+ "Product": sample_product,
321
+ "Region": sample_region,
322
+ "Channel": sample_channel,
323
+ "Interaction_Type": "Product Γ— Region",
324
+ "Interaction_Effect": prod_reg_interaction
325
+ })
326
+
327
+ if prod_idx and chan_idx:
328
+ prod_chan_interaction = interaction_matrix[prod_idx[0], chan_idx[0]]
329
+ if abs(prod_chan_interaction) > 0.001:
330
+ interaction_insights.append({
331
+ "Product": sample_product,
332
+ "Region": sample_region,
333
+ "Channel": sample_channel,
334
+ "Interaction_Type": "Product Γ— Channel",
335
+ "Interaction_Effect": prod_chan_interaction
336
+ })
337
+
338
+ if reg_idx and chan_idx:
339
+ reg_chan_interaction = interaction_matrix[reg_idx[0], chan_idx[0]]
340
+ if abs(reg_chan_interaction) > 0.001:
341
+ interaction_insights.append({
342
+ "Product": sample_product,
343
+ "Region": sample_region,
344
+ "Channel": sample_channel,
345
+ "Interaction_Type": "Region Γ— Channel",
346
+ "Interaction_Effect": reg_chan_interaction
347
+ })
348
+
349
+ if interaction_insights:
350
+ interactions_df = pd.DataFrame(interaction_insights)
351
+ # Aggregate by combination
352
+ agg_interactions = interactions_df.groupby(["Product", "Region", "Channel", "Interaction_Type"]).agg({
353
+ "Interaction_Effect": "mean"
354
+ }).reset_index()
355
+ agg_interactions = agg_interactions.sort_values("Interaction_Effect")
356
+ return agg_interactions
357
+ else:
358
+ return pd.DataFrame()
359
+ except Exception as e:
360
+ st.warning(f"Could not compute interaction effects: {str(e)}")
361
+ return pd.DataFrame()
362
+
363
  # -----------------------------
364
  # Main App
365
  # -----------------------------
366
 
 
367
  st.markdown('<h1 class="main-header">🎯 Profitability Intelligence Suite</h1>', unsafe_allow_html=True)
368
  st.markdown('<p class="sub-header">AI-Powered Margin Analysis & Strategic Recommendations</p>', unsafe_allow_html=True)
369
 
 
485
  st.success(f"βœ… Model trained: RΒ² = {metrics['r2']:.3f}, MAE = {metrics['mae']:.4f}")
486
 
487
  # Compute SHAP once for all tabs
488
+ with st.spinner("πŸ”¬ Analyzing profitability drivers and interactions..."):
489
+ shap_df, shap_interaction_values, X_test_sample, feature_names = compute_shap_values(pipe, X_test, feats_num, feats_cat, shap_sample=500)
490
 
491
  # Tabs for different sections
492
  tab1, tab2, tab3 = st.tabs(["πŸ” Key Drivers Analysis", "🎯 Strategic Recommendations", "πŸ§ͺ What-If Simulator"])
 
495
  st.markdown("### Understanding What Drives Your Profitability")
496
  st.markdown("""
497
  <div class="insight-box">
498
+ <b>πŸŽ“ Business Insight:</b> This analysis reveals which business factors have the strongest impact on gross margin,
499
+ including complex interactions between product, region, and channel combinations.
500
  </div>
501
  """, unsafe_allow_html=True)
502
 
 
511
  "net_price": "Net Selling Price",
512
  "list_price": "List Price",
513
  "qty": "Order Quantity",
 
 
514
  "roll7_qty": "7-Day Avg Quantity",
515
  "roll7_price": "7-Day Avg Price",
516
  "roll7_cost": "7-Day Avg Cost",
 
522
  for feat, val in mean_abs.head(10).items():
523
  bus_name = feat
524
  for key, name in business_name_map.items():
525
+ if key == feat:
526
  bus_name = name
527
  break
528
  if feat.startswith("cat__"):
 
591
  </div>
592
  """, unsafe_allow_html=True)
593
 
594
+ st.markdown("#### Single-Feature Segment Impact")
595
 
 
596
  try:
597
  cat_cols = ["product", "region", "channel"]
598
  joined = pd.concat([X_test_sample[cat_cols].reset_index(drop=True),
 
614
  st.markdown(f"β€’ **{row['product']}** β€’ {row['region']} β€’ {row['channel']} *(Impact: {row['net_impact']:.4f})*")
615
  except Exception as e:
616
  st.warning(f"Unable to compute segment analysis: {str(e)}")
617
+
618
+ # NEW: SHAP Interaction Analysis
619
+ st.markdown("---")
620
+ st.markdown("### πŸ”— Interaction Effects Analysis")
621
+ st.markdown("""
622
+ <div class="warning-box">
623
+ <b>⚑ Advanced Insight:</b> These combinations show how features interact to create compound effects on profitability.
624
+ For example, "Premium Widget in EMEA via E-Commerce" may have a different margin profile than individual factors suggest.
625
+ </div>
626
+ """, unsafe_allow_html=True)
627
+
628
+ if shap_interaction_values is not None:
629
+ interactions_df = analyze_shap_interactions(shap_interaction_values, X_test_sample, feature_names, feats_cat)
630
+
631
+ if not interactions_df.empty:
632
+ col_int1, col_int2 = st.columns(2)
633
+
634
+ with col_int1:
635
+ st.markdown("#### ⚠️ Problematic Combinations (Reducing Margin)")
636
+ worst_interactions = interactions_df.nsmallest(10, "Interaction_Effect")
637
+
638
+ for idx, row in worst_interactions.head(5).iterrows():
639
+ st.markdown(f"""
640
+ <div class="recommendation-card" style="border-left: 4px solid #dc3545;">
641
+ <b>{row['Interaction_Type']}</b><br>
642
+ {row['Product']} β€’ {row['Region']} β€’ {row['Channel']}<br>
643
+ <span style="color: #dc3545; font-size: 1.2rem;">Effect: {row['Interaction_Effect']:.4f}</span>
644
+ </div>
645
+ """, unsafe_allow_html=True)
646
+
647
+ with col_int2:
648
+ st.markdown("#### βœ… High-Performing Combinations (Boosting Margin)")
649
+ best_interactions = interactions_df.nlargest(10, "Interaction_Effect")
650
+
651
+ for idx, row in best_interactions.head(5).iterrows():
652
+ st.markdown(f"""
653
+ <div class="recommendation-card" style="border-left: 4px solid #28a745;">
654
+ <b>{row['Interaction_Type']}</b><br>
655
+ {row['Product']} β€’ {row['Region']} β€’ {row['Channel']}<br>
656
+ <span style="color: #28a745; font-size: 1.2rem;">Effect: {row['Interaction_Effect']:.4f}</span>
657
+ </div>
658
+ """, unsafe_allow_html=True)
659
+
660
+ # Visualization of interaction effects
661
+ st.markdown("#### Interaction Effects Heatmap")
662
+
663
+ # Create summary by combination
664
+ interaction_summary = interactions_df.groupby(["Product", "Region", "Channel"]).agg({
665
+ "Interaction_Effect": "sum"
666
+ }).reset_index()
667
+
668
+ fig_int = px.treemap(
669
+ interaction_summary,
670
+ path=['Product', 'Region', 'Channel'],
671
+ values=interaction_summary['Interaction_Effect'].abs(),
672
+ color='Interaction_Effect',
673
+ color_continuous_scale='RdYlGn',
674
+ title="Interaction Effects by Product-Region-Channel Combinations"
675
+ )
676
+ fig_int.update_layout(height=500)
677
+ st.plotly_chart(fig_int, use_container_width=True)
678
+ else:
679
+ st.info("No significant interaction effects detected in the current sample.")
680
  else:
681
  st.error("Unable to compute driver analysis. Please check your data.")
682