Spaces:

PD03
/

FinanceDemo

Sleeping

App Files Files Community

PD03 commited on Oct 5

Commit

a85e11a

verified ·

1 Parent(s): 4a5ff6e

Update app.py

Browse files

Files changed (1) hide show

app.py +161 -22

app.py CHANGED Viewed

@@ -50,6 +50,14 @@ st.markdown("""
         border-radius: 8px;
         box-shadow: 0 4px 8px rgba(0,0,0,0.05);
     }
     .recommendation-card {
         background: white;
         border: 2px solid #e9ecef;
@@ -166,16 +174,19 @@ def generate_synthetic_data(days=60, seed=42, rows_per_day=600):
 @st.cache_data(show_spinner=False)
 def build_features(_df):
     df = _df.copy()
     feats_num = ["net_price", "unit_cost", "qty", "discount_pct", "list_price", "dow"]
     feats_cat = ["product", "region", "channel"]
     df = df.sort_values("date").copy()
     seg = ["product", "region", "channel"]
-    df["price_per_unit"] = df["net_price"]
-    df["cost_per_unit"] = df["unit_cost"]
     df["roll7_qty"] = df.groupby(seg)["qty"].transform(lambda s: s.rolling(7, min_periods=1).median())
-    df["roll7_price"] = df.groupby(seg)["price_per_unit"].transform(lambda s: s.rolling(7, min_periods=1).median())
-    df["roll7_cost"] = df.groupby(seg)["cost_per_unit"].transform(lambda s: s.rolling(7, min_periods=1).median())
-    feats_num += ["price_per_unit", "cost_per_unit", "roll7_qty", "roll7_price", "roll7_cost"]
     target = "gm_pct"
     return df, feats_num, feats_cat, target
@@ -200,33 +211,31 @@ def train_model(feats_num, feats_cat, target, _X, _y):
 def compute_shap_values(_pipe, _X_sample, feats_num, feats_cat, shap_sample=500):
     try:
         np.random.seed(42)
-        # Get sample
         X_sample = _X_sample.copy() if hasattr(_X_sample, 'copy') else pd.DataFrame(_X_sample)
         if len(X_sample) > shap_sample:
             sample_idx = np.random.choice(len(X_sample), size=shap_sample, replace=False)
             X_sample = X_sample.iloc[sample_idx]
-        # Transform data
         X_t = _pipe.named_steps["pre"].transform(X_sample)
         if hasattr(X_t, 'toarray'):
             X_t = X_t.toarray()
-        # Get feature names
         cat_features = list(_pipe.named_steps["pre"].named_transformers_["cat"].get_feature_names_out(feats_cat))
         feature_names = cat_features + feats_num
-        # Compute SHAP
         explainer = shap.TreeExplainer(_pipe.named_steps["rf"])
         shap_values = explainer.shap_values(X_t)
-        # Create DataFrame
         shap_df = pd.DataFrame(shap_values, columns=feature_names)
-        return shap_df, X_sample.reset_index(drop=True), feature_names
     except Exception as e:
         st.error(f"Error computing SHAP: {str(e)}")
-        return None, None, None
 def estimate_segment_elasticity(df, product, region, channel):
     seg_df = df[(df["product"]==product)&(df["region"]==region)&(df["channel"]==channel)]
@@ -280,11 +289,81 @@ def simulate_pricing_action(segment_df, elasticity, discount_reduction_pct):
     except:
         return None
 # -----------------------------
 # Main App
 # -----------------------------
-# Header
 st.markdown('<h1 class="main-header">🎯 Profitability Intelligence Suite</h1>', unsafe_allow_html=True)
 st.markdown('<p class="sub-header">AI-Powered Margin Analysis & Strategic Recommendations</p>', unsafe_allow_html=True)
@@ -406,8 +485,8 @@ with st.spinner("🤖 Training AI model..."):
     st.success(f"✅ Model trained: R² = {metrics['r2']:.3f}, MAE = {metrics['mae']:.4f}")
 # Compute SHAP once for all tabs
-with st.spinner("🔬 Analyzing profitability drivers..."):
-    shap_df, X_test_sample, feature_names = compute_shap_values(pipe, X_test, feats_num, feats_cat, shap_sample=500)
 # Tabs for different sections
 tab1, tab2, tab3 = st.tabs(["🔍 Key Drivers Analysis", "🎯 Strategic Recommendations", "🧪 What-If Simulator"])
@@ -416,8 +495,8 @@ with tab1:
     st.markdown("### Understanding What Drives Your Profitability")
     st.markdown("""
     <div class="insight-box">
-    <b>🎓 Business Insight:</b> This analysis reveals which business factors have the strongest impact on gross margin.
-    Understanding these drivers helps prioritize strategic initiatives and operational improvements.
     </div>
     """, unsafe_allow_html=True)
@@ -432,8 +511,6 @@ with tab1:
             "net_price": "Net Selling Price",
             "list_price": "List Price",
             "qty": "Order Quantity",
-            "price_per_unit": "Price per Unit",
-            "cost_per_unit": "Cost per Unit",
             "roll7_qty": "7-Day Avg Quantity",
             "roll7_price": "7-Day Avg Price",
             "roll7_cost": "7-Day Avg Cost",
@@ -445,7 +522,7 @@ with tab1:
         for feat, val in mean_abs.head(10).items():
             bus_name = feat
             for key, name in business_name_map.items():
-                if key in feat.lower():
                     bus_name = name
                     break
             if feat.startswith("cat__"):
@@ -514,9 +591,8 @@ with tab1:
             </div>
             """, unsafe_allow_html=True)
-            st.markdown("#### Segment Performance")
-            # Safely join SHAP with original data
             try:
                 cat_cols = ["product", "region", "channel"]
                 joined = pd.concat([X_test_sample[cat_cols].reset_index(drop=True),
@@ -538,6 +614,69 @@ with tab1:
                     st.markdown(f"• **{row['product']}** • {row['region']} • {row['channel']} *(Impact: {row['net_impact']:.4f})*")
             except Exception as e:
                 st.warning(f"Unable to compute segment analysis: {str(e)}")
     else:
         st.error("Unable to compute driver analysis. Please check your data.")

         border-radius: 8px;
         box-shadow: 0 4px 8px rgba(0,0,0,0.05);
     }
+    .warning-box {
+        background: #fff3cd;
+        border-left: 5px solid #ffc107;
+        padding: 1.5rem;
+        margin: 1rem 0;
+        border-radius: 8px;
+        box-shadow: 0 4px 8px rgba(0,0,0,0.05);
+    }
     .recommendation-card {
         background: white;
         border: 2px solid #e9ecef;
 @st.cache_data(show_spinner=False)
 def build_features(_df):
     df = _df.copy()
+    # FIXED: Remove duplicate features - keep only one of unit_cost/cost_per_unit and net_price/price_per_unit
     feats_num = ["net_price", "unit_cost", "qty", "discount_pct", "list_price", "dow"]
     feats_cat = ["product", "region", "channel"]
     df = df.sort_values("date").copy()
     seg = ["product", "region", "channel"]
+    # Add rolling features
     df["roll7_qty"] = df.groupby(seg)["qty"].transform(lambda s: s.rolling(7, min_periods=1).median())
+    df["roll7_price"] = df.groupby(seg)["net_price"].transform(lambda s: s.rolling(7, min_periods=1).median())
+    df["roll7_cost"] = df.groupby(seg)["unit_cost"].transform(lambda s: s.rolling(7, min_periods=1).median())
+    # Add these to numeric features
+    feats_num += ["roll7_qty", "roll7_price", "roll7_cost"]
     target = "gm_pct"
     return df, feats_num, feats_cat, target
 def compute_shap_values(_pipe, _X_sample, feats_num, feats_cat, shap_sample=500):
     try:
         np.random.seed(42)
         X_sample = _X_sample.copy() if hasattr(_X_sample, 'copy') else pd.DataFrame(_X_sample)
         if len(X_sample) > shap_sample:
             sample_idx = np.random.choice(len(X_sample), size=shap_sample, replace=False)
             X_sample = X_sample.iloc[sample_idx]
         X_t = _pipe.named_steps["pre"].transform(X_sample)
         if hasattr(X_t, 'toarray'):
             X_t = X_t.toarray()
         cat_features = list(_pipe.named_steps["pre"].named_transformers_["cat"].get_feature_names_out(feats_cat))
         feature_names = cat_features + feats_num
         explainer = shap.TreeExplainer(_pipe.named_steps["rf"])
         shap_values = explainer.shap_values(X_t)
+        # ADDED: Compute SHAP interaction values for deeper insights
+        shap_interaction_values = explainer.shap_interaction_values(X_t)
         shap_df = pd.DataFrame(shap_values, columns=feature_names)
+        return shap_df, shap_interaction_values, X_sample.reset_index(drop=True), feature_names
     except Exception as e:
         st.error(f"Error computing SHAP: {str(e)}")
+        return None, None, None, None
 def estimate_segment_elasticity(df, product, region, channel):
     seg_df = df[(df["product"]==product)&(df["region"]==region)&(df["channel"]==channel)]
     except:
         return None
+def analyze_shap_interactions(shap_interaction_values, X_sample, feature_names, feats_cat):
+    """Analyze SHAP interaction effects to find problematic combinations"""
+    try:
+        # Get categorical feature indices in the transformed feature space
+        cat_feature_indices = {}
+        for cat in feats_cat:
+            cat_feature_indices[cat] = [i for i, fname in enumerate(feature_names) if fname.startswith(f"cat__{cat}_")]
+        # Find strongest interactions for each sample
+        interaction_insights = []
+        for sample_idx in range(min(100, len(X_sample))):  # Analyze first 100 samples
+            interaction_matrix = shap_interaction_values[sample_idx]
+            # Get the sample's categorical values
+            sample_product = X_sample.iloc[sample_idx]["product"]
+            sample_region = X_sample.iloc[sample_idx]["region"]
+            sample_channel = X_sample.iloc[sample_idx]["channel"]
+            # Find product feature index
+            prod_idx = [i for i, fname in enumerate(feature_names) if f"product_{sample_product}" in fname]
+            reg_idx = [i for i, fname in enumerate(feature_names) if f"region_{sample_region}" in fname]
+            chan_idx = [i for i, fname in enumerate(feature_names) if f"channel_{sample_channel}" in fname]
+            if prod_idx and reg_idx:
+                prod_reg_interaction = interaction_matrix[prod_idx[0], reg_idx[0]]
+                if abs(prod_reg_interaction) > 0.001:
+                    interaction_insights.append({
+                        "Product": sample_product,
+                        "Region": sample_region,
+                        "Channel": sample_channel,
+                        "Interaction_Type": "Product × Region",
+                        "Interaction_Effect": prod_reg_interaction
+                    })
+            if prod_idx and chan_idx:
+                prod_chan_interaction = interaction_matrix[prod_idx[0], chan_idx[0]]
+                if abs(prod_chan_interaction) > 0.001:
+                    interaction_insights.append({
+                        "Product": sample_product,
+                        "Region": sample_region,
+                        "Channel": sample_channel,
+                        "Interaction_Type": "Product × Channel",
+                        "Interaction_Effect": prod_chan_interaction
+                    })
+            if reg_idx and chan_idx:
+                reg_chan_interaction = interaction_matrix[reg_idx[0], chan_idx[0]]
+                if abs(reg_chan_interaction) > 0.001:
+                    interaction_insights.append({
+                        "Product": sample_product,
+                        "Region": sample_region,
+                        "Channel": sample_channel,
+                        "Interaction_Type": "Region × Channel",
+                        "Interaction_Effect": reg_chan_interaction
+                    })
+        if interaction_insights:
+            interactions_df = pd.DataFrame(interaction_insights)
+            # Aggregate by combination
+            agg_interactions = interactions_df.groupby(["Product", "Region", "Channel", "Interaction_Type"]).agg({
+                "Interaction_Effect": "mean"
+            }).reset_index()
+            agg_interactions = agg_interactions.sort_values("Interaction_Effect")
+            return agg_interactions
+        else:
+            return pd.DataFrame()
+    except Exception as e:
+        st.warning(f"Could not compute interaction effects: {str(e)}")
+        return pd.DataFrame()
 # -----------------------------
 # Main App
 # -----------------------------
 st.markdown('<h1 class="main-header">🎯 Profitability Intelligence Suite</h1>', unsafe_allow_html=True)
 st.markdown('<p class="sub-header">AI-Powered Margin Analysis & Strategic Recommendations</p>', unsafe_allow_html=True)
     st.success(f"✅ Model trained: R² = {metrics['r2']:.3f}, MAE = {metrics['mae']:.4f}")
 # Compute SHAP once for all tabs
+with st.spinner("🔬 Analyzing profitability drivers and interactions..."):
+    shap_df, shap_interaction_values, X_test_sample, feature_names = compute_shap_values(pipe, X_test, feats_num, feats_cat, shap_sample=500)
 # Tabs for different sections
 tab1, tab2, tab3 = st.tabs(["🔍 Key Drivers Analysis", "🎯 Strategic Recommendations", "🧪 What-If Simulator"])
     st.markdown("### Understanding What Drives Your Profitability")
     st.markdown("""
     <div class="insight-box">
+    <b>🎓 Business Insight:</b> This analysis reveals which business factors have the strongest impact on gross margin,
+    including complex interactions between product, region, and channel combinations.
     </div>
     """, unsafe_allow_html=True)
             "net_price": "Net Selling Price",
             "list_price": "List Price",
             "qty": "Order Quantity",
             "roll7_qty": "7-Day Avg Quantity",
             "roll7_price": "7-Day Avg Price",
             "roll7_cost": "7-Day Avg Cost",
         for feat, val in mean_abs.head(10).items():
             bus_name = feat
             for key, name in business_name_map.items():
+                if key == feat:
                     bus_name = name
                     break
             if feat.startswith("cat__"):
             </div>
             """, unsafe_allow_html=True)
+            st.markdown("#### Single-Feature Segment Impact")
             try:
                 cat_cols = ["product", "region", "channel"]
                 joined = pd.concat([X_test_sample[cat_cols].reset_index(drop=True),
                     st.markdown(f"• **{row['product']}** • {row['region']} • {row['channel']} *(Impact: {row['net_impact']:.4f})*")
             except Exception as e:
                 st.warning(f"Unable to compute segment analysis: {str(e)}")
+        # NEW: SHAP Interaction Analysis
+        st.markdown("---")
+        st.markdown("### 🔗 Interaction Effects Analysis")
+        st.markdown("""
+        <div class="warning-box">
+        <b>⚡ Advanced Insight:</b> These combinations show how features interact to create compound effects on profitability.
+        For example, "Premium Widget in EMEA via E-Commerce" may have a different margin profile than individual factors suggest.
+        </div>
+        """, unsafe_allow_html=True)
+        if shap_interaction_values is not None:
+            interactions_df = analyze_shap_interactions(shap_interaction_values, X_test_sample, feature_names, feats_cat)
+            if not interactions_df.empty:
+                col_int1, col_int2 = st.columns(2)
+                with col_int1:
+                    st.markdown("#### ⚠️ Problematic Combinations (Reducing Margin)")
+                    worst_interactions = interactions_df.nsmallest(10, "Interaction_Effect")
+                    for idx, row in worst_interactions.head(5).iterrows():
+                        st.markdown(f"""
+                        <div class="recommendation-card" style="border-left: 4px solid #dc3545;">
+                        <b>{row['Interaction_Type']}</b><br>
+                        {row['Product']} • {row['Region']} • {row['Channel']}<br>
+                        <span style="color: #dc3545; font-size: 1.2rem;">Effect: {row['Interaction_Effect']:.4f}</span>
+                        </div>
+                        """, unsafe_allow_html=True)
+                with col_int2:
+                    st.markdown("#### ✅ High-Performing Combinations (Boosting Margin)")
+                    best_interactions = interactions_df.nlargest(10, "Interaction_Effect")
+                    for idx, row in best_interactions.head(5).iterrows():
+                        st.markdown(f"""
+                        <div class="recommendation-card" style="border-left: 4px solid #28a745;">
+                        <b>{row['Interaction_Type']}</b><br>
+                        {row['Product']} • {row['Region']} • {row['Channel']}<br>
+                        <span style="color: #28a745; font-size: 1.2rem;">Effect: {row['Interaction_Effect']:.4f}</span>
+                        </div>
+                        """, unsafe_allow_html=True)
+                # Visualization of interaction effects
+                st.markdown("#### Interaction Effects Heatmap")
+                # Create summary by combination
+                interaction_summary = interactions_df.groupby(["Product", "Region", "Channel"]).agg({
+                    "Interaction_Effect": "sum"
+                }).reset_index()
+                fig_int = px.treemap(
+                    interaction_summary,
+                    path=['Product', 'Region', 'Channel'],
+                    values=interaction_summary['Interaction_Effect'].abs(),
+                    color='Interaction_Effect',
+                    color_continuous_scale='RdYlGn',
+                    title="Interaction Effects by Product-Region-Channel Combinations"
+                )
+                fig_int.update_layout(height=500)
+                st.plotly_chart(fig_int, use_container_width=True)
+            else:
+                st.info("No significant interaction effects detected in the current sample.")
     else:
         st.error("Unable to compute driver analysis. Please check your data.")