Spaces:

SelmaNajih001
/

InvestmentStrategyBasedOnSentiment

Sleeping

App Files Files Community

SelmaNajih001 commited on Oct 9, 2025

Commit

46a0645

verified ·

1 Parent(s): e55274c

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -66

app.py CHANGED Viewed

@@ -1,12 +1,12 @@
 import pandas as pd
 import yfinance as yf
-from datasets import load_dataset
 from transformers import pipeline
 import plotly.graph_objects as go
 import gradio as gr
-# --- PARAMETRI ---
 HF_DATASET = "SelmaNajih001/FT_MultiCompany"
 MODEL_SENTIMENT = "SelmaNajih001/SentimentBasedOnPriceVariation"
 MODEL_PRICE_TESLA = "SelmaNajih001/PricePredictionForTesla"
 MODEL_PRICE_MICROSOFT = "SelmaNajih001/PricePredictionForMicrosoft"
@@ -18,19 +18,38 @@ TICKERS = {
 }
 companies = list(TICKERS.keys())
-# --- PIPELINES ---
 sentiment_pipeline = pipeline("sentiment-analysis", model=MODEL_SENTIMENT)
 price_pipeline_tesla = pipeline("text-classification", model=MODEL_PRICE_TESLA)
 price_pipeline_msft = pipeline("text-classification", model=MODEL_PRICE_MICROSOFT)
 finbert_pipeline = pipeline("sentiment-analysis", model=MODEL_FINBERT)
-# --- LOAD DATASET ---
 df_multi = pd.DataFrame(load_dataset(HF_DATASET)["train"])
 df_multi['date'] = pd.to_datetime(df_multi['Date'], errors='coerce')
 df_multi['date_merge'] = df_multi['date'].dt.normalize()
 df_multi.sort_values('date', inplace=True)
-# --- SENTIMENT & PREDICTION ---
 df_multi['Sentiment'] = ""
 df_multi['Confidence'] = 0.0
 df_multi['Predicted'] = 0.0
@@ -69,7 +88,7 @@ for i, row in df_multi.iterrows():
     except:
         df_multi.at[i,'Predicted'] = 0.0
-# --- FETCH STOCK PRICES ---
 prices = {}
 for company, ticker in TICKERS.items():
     start_date = df_multi[df_multi['Company']==company]['date'].min()
@@ -80,7 +99,6 @@ for company, ticker in TICKERS.items():
     df_prices['PctChangeDaily'] = df_prices[f'Close_{ticker}'].pct_change().shift(-1)
     prices[company] = df_prices
-# --- MERGE & STRATEGIE ---
 dfs_final = {}
 for company in companies:
     df_c = df_multi[df_multi['Company'] == company].copy()
@@ -118,7 +136,6 @@ for company in companies:
         else:
             df_c.loc[i,'StrategyB_Cumulative'] = df_c.loc[i-1,'StrategyB_Cumulative']
     # Strategy C: FinBERT
     df_c['StrategyC_Cumulative'] = 0.0
     for i in range(1, len(df_c)):
@@ -132,18 +149,20 @@ for company in companies:
         else:
             df_c.loc[i,'StrategyC_Cumulative'] = df_c.loc[i-1,'StrategyC_Cumulative']
     dfs_final[company] = df_c.drop(columns=["date", "date_merge"], errors="ignore")
 # --- FUNZIONE PER GRADIO ---
 def show_company_data(selected_companies, aggregation="Day"):
     if not selected_companies:
-        return "Please select at least one company", None, None
     agg_col = {"Day": "Day", "Month": "Month", "Year": "Year"}.get(aggregation, "Day")
-    # --- Strategies Chart ---
     fig_strat = go.Figure()
     for c in selected_companies:
         if c not in dfs_final:
             continue
@@ -152,15 +171,18 @@ def show_company_data(selected_companies, aggregation="Day"):
         df_grouped = df_c.groupby(agg_col).agg({
             'StrategyA_Cumulative': 'last',
             'StrategyB_Cumulative': 'last',
-            'StrategyC_Cumulative': 'last'
         }).reset_index()
         strategy_labels = {
             'StrategyA_Cumulative': "Custom Sentiment",
             'StrategyB_Cumulative': "Regression",
             'StrategyC_Cumulative': "FinBERT"
         }
         for strat in ['StrategyA_Cumulative', 'StrategyB_Cumulative', 'StrategyC_Cumulative']:
             fig_strat.add_trace(go.Scatter(
                 x=df_grouped[agg_col],
@@ -169,6 +191,13 @@ def show_company_data(selected_companies, aggregation="Day"):
                 name=f"{c} - {strategy_labels[strat]}"
             ))
     fig_strat.update_layout(
         title="Strategies Comparison (Custom Sentiment, Regression, FinBERT)",
         xaxis_title=aggregation,
@@ -177,35 +206,18 @@ def show_company_data(selected_companies, aggregation="Day"):
         hovermode="x unified"
     )
-    # --- Grafico Prezzi ---
-    fig_price = go.Figure()
-    for c in selected_companies:
-        if c not in dfs_final:
-            continue
-        df_c = dfs_final[c]
-        df_grouped = df_c.groupby(agg_col).agg({
-            f'Close_{TICKERS[c]}':'last'
-        }).reset_index()
-        fig_price.add_trace(go.Scatter(
-            x=df_grouped[agg_col], y=df_grouped[f'Close_{TICKERS[c]}'],
-            mode="lines", name=f"{c} Price"
-        ))
     fig_price.update_layout(
-        title="Andamento Prezzi",
         xaxis_title=aggregation,
-        yaxis_title="Stock Price",
         template="plotly_dark",
         hovermode="x unified"
     )
-    return None, fig_strat, fig_price
-import gradio as gr
-# --- Markdown descrittivo ---
 description_text = """
 ### Portfolio Strategy Comparison Dashboard
 This dashboard allows you to compare the performance of three sentiment models in driving trading strategies for Microsoft and Tesla.
@@ -214,41 +226,14 @@ This dashboard allows you to compare the performance of three sentiment models i
   - If the score is below -0.8 → sell
   - Otherwise → no trade
   - For the regression model, thresholds are +1 and -1.
-- **Dataset and preprocessing**:
-  - Closing prices and daily percent changes are calculated for each company.
-  - News articles mentioning Microsoft or Tesla are merged with the price data.
-  - Negative/Down scores are multiplied by -1, Neutral scores set to 0.
-  - Daily strategy value = daily percent change × stock price.
-  - Cumulative value = sum of daily strategy values over time.
-- **Model comparison**:
-  - Regression is fine-tuned separately for Tesla and Microsoft.
-  - FinBERT is used as a baseline.
-  - The custom model incorporates actual stock movements and company-specific signals.
-- **Results overview**:
-  - Tesla: Regression often performs better, though some losses occur.
-  - Microsoft: Regression closely follows market trends; FinBERT is less accurate.
-  - Regression aligns better with real stock movements by interpreting news contextually.
-- **Caveats**:
-  - Multiple news per day may generate buy/sell signals that cancel each other.
-  - Strategy uses next-day price changes; no multi-day logic is applied.
-  - Simplified testing, but useful to compare model behavior.
 """
-# --- INPUT OPTIONS ---
 companies = ["Microsoft", "Tesla, Inc."]
-# --- FUNZIONE DI ESECUZIONE ---
-def show_strategy(selected_companies, aggregation):
-    # Qui chiami la funzione del tuo modello per generare i grafici e il dataframe
-    # Ad esempio: return df, fig_strategies, fig_prices
-    return None, None, None  # sostituire con output reali
-# --- INTERFACCIA GRADIO ---
 with gr.Blocks() as demo:
     gr.Markdown("# Portfolio Strategy Dashboard")
     gr.Markdown(description_text)
-    # Input centrati
     with gr.Row():
         dropdown_companies = gr.Dropdown(
             choices=companies,
@@ -261,16 +246,14 @@ with gr.Blocks() as demo:
             value="Day",
             label="Aggregation Level"
         )
-        submit_btn = gr.Button("Submit")  # bottone per inviare
-    # Output
     data_table = gr.Dataframe(label="Data Preview", type="pandas")
     strategies_plot = gr.Plot(label="Strategies")
     prices_plot = gr.Plot(label="Prices")
-    # Collega bottone agli output usando la funzione reale
     submit_btn.click(
-        fn=show_company_data,  # <--- qui prima avevi show_strategy
         inputs=[dropdown_companies, radio_aggregation],
         outputs=[data_table, strategies_plot, prices_plot]
     )

 import pandas as pd
 import yfinance as yf
+from datasets import Dataset, load_dataset
 from transformers import pipeline
 import plotly.graph_objects as go
 import gradio as gr
 HF_DATASET = "SelmaNajih001/FT_MultiCompany"
+HF_PRIVATE_DATASET = "tuo-username/portfolio_strategy_data"
 MODEL_SENTIMENT = "SelmaNajih001/SentimentBasedOnPriceVariation"
 MODEL_PRICE_TESLA = "SelmaNajih001/PricePredictionForTesla"
 MODEL_PRICE_MICROSOFT = "SelmaNajih001/PricePredictionForMicrosoft"
 }
 companies = list(TICKERS.keys())
 sentiment_pipeline = pipeline("sentiment-analysis", model=MODEL_SENTIMENT)
 price_pipeline_tesla = pipeline("text-classification", model=MODEL_PRICE_TESLA)
 price_pipeline_msft = pipeline("text-classification", model=MODEL_PRICE_MICROSOFT)
 finbert_pipeline = pipeline("sentiment-analysis", model=MODEL_FINBERT)
 df_multi = pd.DataFrame(load_dataset(HF_DATASET)["train"])
 df_multi['date'] = pd.to_datetime(df_multi['Date'], errors='coerce')
 df_multi['date_merge'] = df_multi['date'].dt.normalize()
 df_multi.sort_values('date', inplace=True)
+try:
+    ds_existing = load_dataset(HF_PRIVATE_DATASET)["train"]
+    df_existing = pd.DataFrame(ds_existing)
+except:
+    df_existing = pd.DataFrame()
+if not df_existing.empty:
+    df_to_add = df_multi[~df_multi['Date'].isin(df_existing['Date'])]
+else:
+    df_to_add = df_multi.copy()
+if not df_existing.empty:
+    df_updated = pd.concat([df_existing, df_to_add], ignore_index=True)
+else:
+    df_updated = df_to_add.copy()
+hf_dataset_updated = Dataset.from_pandas(df_updated)
+hf_dataset_updated.push_to_hub(HF_PRIVATE_DATASET, private=True)
+print(f"Dataset aggiornato su Hugging Face: {HF_PRIVATE_DATASET}")
+df_multi = df_updated.copy()
 df_multi['Sentiment'] = ""
 df_multi['Confidence'] = 0.0
 df_multi['Predicted'] = 0.0
     except:
         df_multi.at[i,'Predicted'] = 0.0
 prices = {}
 for company, ticker in TICKERS.items():
     start_date = df_multi[df_multi['Company']==company]['date'].min()
     df_prices['PctChangeDaily'] = df_prices[f'Close_{ticker}'].pct_change().shift(-1)
     prices[company] = df_prices
 dfs_final = {}
 for company in companies:
     df_c = df_multi[df_multi['Company'] == company].copy()
         else:
             df_c.loc[i,'StrategyB_Cumulative'] = df_c.loc[i-1,'StrategyB_Cumulative']
     # Strategy C: FinBERT
     df_c['StrategyC_Cumulative'] = 0.0
     for i in range(1, len(df_c)):
         else:
             df_c.loc[i,'StrategyC_Cumulative'] = df_c.loc[i-1,'StrategyC_Cumulative']
     dfs_final[company] = df_c.drop(columns=["date", "date_merge"], errors="ignore")
 # --- FUNZIONE PER GRADIO ---
 def show_company_data(selected_companies, aggregation="Day"):
     if not selected_companies:
+        return pd.DataFrame(), None, None
     agg_col = {"Day": "Day", "Month": "Month", "Year": "Year"}.get(aggregation, "Day")
+    # --- Figures ---
     fig_strat = go.Figure()
+    fig_price = go.Figure()
+    dfs_display = []
     for c in selected_companies:
         if c not in dfs_final:
             continue
         df_grouped = df_c.groupby(agg_col).agg({
             'StrategyA_Cumulative': 'last',
             'StrategyB_Cumulative': 'last',
+            'StrategyC_Cumulative': 'last',
+            f'Close_{TICKERS[c]}': 'last'
         }).reset_index()
+        df_grouped['Company'] = c
+        dfs_display.append(df_grouped)
         strategy_labels = {
             'StrategyA_Cumulative': "Custom Sentiment",
             'StrategyB_Cumulative': "Regression",
             'StrategyC_Cumulative': "FinBERT"
         }
         for strat in ['StrategyA_Cumulative', 'StrategyB_Cumulative', 'StrategyC_Cumulative']:
             fig_strat.add_trace(go.Scatter(
                 x=df_grouped[agg_col],
                 name=f"{c} - {strategy_labels[strat]}"
             ))
+        fig_price.add_trace(go.Scatter(
+            x=df_grouped[agg_col],
+            y=df_grouped[f'Close_{TICKERS[c]}'],
+            mode="lines",
+            name=f"{c} Price"
+        ))
     fig_strat.update_layout(
         title="Strategies Comparison (Custom Sentiment, Regression, FinBERT)",
         xaxis_title=aggregation,
         hovermode="x unified"
     )
     fig_price.update_layout(
+        title="Stock Prices",
         xaxis_title=aggregation,
+        yaxis_title="Price",
         template="plotly_dark",
         hovermode="x unified"
     )
+    df_display = pd.concat(dfs_display, ignore_index=True) if dfs_display else pd.DataFrame()
+    return df_display, fig_strat, fig_price
+# --- INTERFACCIA GRADIO ---
 description_text = """
 ### Portfolio Strategy Comparison Dashboard
 This dashboard allows you to compare the performance of three sentiment models in driving trading strategies for Microsoft and Tesla.
   - If the score is below -0.8 → sell
   - Otherwise → no trade
   - For the regression model, thresholds are +1 and -1.
 """
 companies = ["Microsoft", "Tesla, Inc."]
 with gr.Blocks() as demo:
     gr.Markdown("# Portfolio Strategy Dashboard")
     gr.Markdown(description_text)
     with gr.Row():
         dropdown_companies = gr.Dropdown(
             choices=companies,
             value="Day",
             label="Aggregation Level"
         )
+        submit_btn = gr.Button("Submit")
     data_table = gr.Dataframe(label="Data Preview", type="pandas")
     strategies_plot = gr.Plot(label="Strategies")
     prices_plot = gr.Plot(label="Prices")
     submit_btn.click(
+        fn=show_company_data,
         inputs=[dropdown_companies, radio_aggregation],
         outputs=[data_table, strategies_plot, prices_plot]
     )