Update app.py
Browse files
app.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
| 1 |
import pandas as pd
|
| 2 |
import yfinance as yf
|
| 3 |
-
from datasets import load_dataset
|
| 4 |
from transformers import pipeline
|
| 5 |
import plotly.graph_objects as go
|
| 6 |
import gradio as gr
|
| 7 |
|
| 8 |
-
# --- PARAMETRI ---
|
| 9 |
HF_DATASET = "SelmaNajih001/FT_MultiCompany"
|
|
|
|
| 10 |
MODEL_SENTIMENT = "SelmaNajih001/SentimentBasedOnPriceVariation"
|
| 11 |
MODEL_PRICE_TESLA = "SelmaNajih001/PricePredictionForTesla"
|
| 12 |
MODEL_PRICE_MICROSOFT = "SelmaNajih001/PricePredictionForMicrosoft"
|
|
@@ -18,19 +18,38 @@ TICKERS = {
|
|
| 18 |
}
|
| 19 |
companies = list(TICKERS.keys())
|
| 20 |
|
| 21 |
-
# --- PIPELINES ---
|
| 22 |
sentiment_pipeline = pipeline("sentiment-analysis", model=MODEL_SENTIMENT)
|
| 23 |
price_pipeline_tesla = pipeline("text-classification", model=MODEL_PRICE_TESLA)
|
| 24 |
price_pipeline_msft = pipeline("text-classification", model=MODEL_PRICE_MICROSOFT)
|
| 25 |
finbert_pipeline = pipeline("sentiment-analysis", model=MODEL_FINBERT)
|
| 26 |
|
| 27 |
-
# --- LOAD DATASET ---
|
| 28 |
df_multi = pd.DataFrame(load_dataset(HF_DATASET)["train"])
|
| 29 |
df_multi['date'] = pd.to_datetime(df_multi['Date'], errors='coerce')
|
| 30 |
df_multi['date_merge'] = df_multi['date'].dt.normalize()
|
| 31 |
df_multi.sort_values('date', inplace=True)
|
| 32 |
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
df_multi['Sentiment'] = ""
|
| 35 |
df_multi['Confidence'] = 0.0
|
| 36 |
df_multi['Predicted'] = 0.0
|
|
@@ -69,7 +88,7 @@ for i, row in df_multi.iterrows():
|
|
| 69 |
except:
|
| 70 |
df_multi.at[i,'Predicted'] = 0.0
|
| 71 |
|
| 72 |
-
|
| 73 |
prices = {}
|
| 74 |
for company, ticker in TICKERS.items():
|
| 75 |
start_date = df_multi[df_multi['Company']==company]['date'].min()
|
|
@@ -80,7 +99,6 @@ for company, ticker in TICKERS.items():
|
|
| 80 |
df_prices['PctChangeDaily'] = df_prices[f'Close_{ticker}'].pct_change().shift(-1)
|
| 81 |
prices[company] = df_prices
|
| 82 |
|
| 83 |
-
# --- MERGE & STRATEGIE ---
|
| 84 |
dfs_final = {}
|
| 85 |
for company in companies:
|
| 86 |
df_c = df_multi[df_multi['Company'] == company].copy()
|
|
@@ -118,7 +136,6 @@ for company in companies:
|
|
| 118 |
else:
|
| 119 |
df_c.loc[i,'StrategyB_Cumulative'] = df_c.loc[i-1,'StrategyB_Cumulative']
|
| 120 |
|
| 121 |
-
|
| 122 |
# Strategy C: FinBERT
|
| 123 |
df_c['StrategyC_Cumulative'] = 0.0
|
| 124 |
for i in range(1, len(df_c)):
|
|
@@ -132,18 +149,20 @@ for company in companies:
|
|
| 132 |
else:
|
| 133 |
df_c.loc[i,'StrategyC_Cumulative'] = df_c.loc[i-1,'StrategyC_Cumulative']
|
| 134 |
|
| 135 |
-
|
| 136 |
dfs_final[company] = df_c.drop(columns=["date", "date_merge"], errors="ignore")
|
| 137 |
|
| 138 |
# --- FUNZIONE PER GRADIO ---
|
| 139 |
def show_company_data(selected_companies, aggregation="Day"):
|
| 140 |
if not selected_companies:
|
| 141 |
-
return
|
| 142 |
|
| 143 |
agg_col = {"Day": "Day", "Month": "Month", "Year": "Year"}.get(aggregation, "Day")
|
| 144 |
|
| 145 |
-
# ---
|
| 146 |
fig_strat = go.Figure()
|
|
|
|
|
|
|
|
|
|
| 147 |
for c in selected_companies:
|
| 148 |
if c not in dfs_final:
|
| 149 |
continue
|
|
@@ -152,15 +171,18 @@ def show_company_data(selected_companies, aggregation="Day"):
|
|
| 152 |
df_grouped = df_c.groupby(agg_col).agg({
|
| 153 |
'StrategyA_Cumulative': 'last',
|
| 154 |
'StrategyB_Cumulative': 'last',
|
| 155 |
-
'StrategyC_Cumulative': 'last'
|
|
|
|
| 156 |
}).reset_index()
|
| 157 |
|
|
|
|
|
|
|
|
|
|
| 158 |
strategy_labels = {
|
| 159 |
'StrategyA_Cumulative': "Custom Sentiment",
|
| 160 |
'StrategyB_Cumulative': "Regression",
|
| 161 |
'StrategyC_Cumulative': "FinBERT"
|
| 162 |
}
|
| 163 |
-
|
| 164 |
for strat in ['StrategyA_Cumulative', 'StrategyB_Cumulative', 'StrategyC_Cumulative']:
|
| 165 |
fig_strat.add_trace(go.Scatter(
|
| 166 |
x=df_grouped[agg_col],
|
|
@@ -169,6 +191,13 @@ def show_company_data(selected_companies, aggregation="Day"):
|
|
| 169 |
name=f"{c} - {strategy_labels[strat]}"
|
| 170 |
))
|
| 171 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
fig_strat.update_layout(
|
| 173 |
title="Strategies Comparison (Custom Sentiment, Regression, FinBERT)",
|
| 174 |
xaxis_title=aggregation,
|
|
@@ -177,35 +206,18 @@ def show_company_data(selected_companies, aggregation="Day"):
|
|
| 177 |
hovermode="x unified"
|
| 178 |
)
|
| 179 |
|
| 180 |
-
|
| 181 |
-
# --- Grafico Prezzi ---
|
| 182 |
-
fig_price = go.Figure()
|
| 183 |
-
for c in selected_companies:
|
| 184 |
-
if c not in dfs_final:
|
| 185 |
-
continue
|
| 186 |
-
df_c = dfs_final[c]
|
| 187 |
-
|
| 188 |
-
df_grouped = df_c.groupby(agg_col).agg({
|
| 189 |
-
f'Close_{TICKERS[c]}':'last'
|
| 190 |
-
}).reset_index()
|
| 191 |
-
|
| 192 |
-
fig_price.add_trace(go.Scatter(
|
| 193 |
-
x=df_grouped[agg_col], y=df_grouped[f'Close_{TICKERS[c]}'],
|
| 194 |
-
mode="lines", name=f"{c} Price"
|
| 195 |
-
))
|
| 196 |
-
|
| 197 |
fig_price.update_layout(
|
| 198 |
-
title="
|
| 199 |
xaxis_title=aggregation,
|
| 200 |
-
yaxis_title="
|
| 201 |
template="plotly_dark",
|
| 202 |
hovermode="x unified"
|
| 203 |
)
|
| 204 |
|
| 205 |
-
|
| 206 |
-
|
| 207 |
|
| 208 |
-
# ---
|
| 209 |
description_text = """
|
| 210 |
### Portfolio Strategy Comparison Dashboard
|
| 211 |
This dashboard allows you to compare the performance of three sentiment models in driving trading strategies for Microsoft and Tesla.
|
|
@@ -214,41 +226,14 @@ This dashboard allows you to compare the performance of three sentiment models i
|
|
| 214 |
- If the score is below -0.8 → sell
|
| 215 |
- Otherwise → no trade
|
| 216 |
- For the regression model, thresholds are +1 and -1.
|
| 217 |
-
- **Dataset and preprocessing**:
|
| 218 |
-
- Closing prices and daily percent changes are calculated for each company.
|
| 219 |
-
- News articles mentioning Microsoft or Tesla are merged with the price data.
|
| 220 |
-
- Negative/Down scores are multiplied by -1, Neutral scores set to 0.
|
| 221 |
-
- Daily strategy value = daily percent change × stock price.
|
| 222 |
-
- Cumulative value = sum of daily strategy values over time.
|
| 223 |
-
- **Model comparison**:
|
| 224 |
-
- Regression is fine-tuned separately for Tesla and Microsoft.
|
| 225 |
-
- FinBERT is used as a baseline.
|
| 226 |
-
- The custom model incorporates actual stock movements and company-specific signals.
|
| 227 |
-
- **Results overview**:
|
| 228 |
-
- Tesla: Regression often performs better, though some losses occur.
|
| 229 |
-
- Microsoft: Regression closely follows market trends; FinBERT is less accurate.
|
| 230 |
-
- Regression aligns better with real stock movements by interpreting news contextually.
|
| 231 |
-
- **Caveats**:
|
| 232 |
-
- Multiple news per day may generate buy/sell signals that cancel each other.
|
| 233 |
-
- Strategy uses next-day price changes; no multi-day logic is applied.
|
| 234 |
-
- Simplified testing, but useful to compare model behavior.
|
| 235 |
"""
|
| 236 |
|
| 237 |
-
# --- INPUT OPTIONS ---
|
| 238 |
companies = ["Microsoft", "Tesla, Inc."]
|
| 239 |
|
| 240 |
-
# --- FUNZIONE DI ESECUZIONE ---
|
| 241 |
-
def show_strategy(selected_companies, aggregation):
|
| 242 |
-
# Qui chiami la funzione del tuo modello per generare i grafici e il dataframe
|
| 243 |
-
# Ad esempio: return df, fig_strategies, fig_prices
|
| 244 |
-
return None, None, None # sostituire con output reali
|
| 245 |
-
|
| 246 |
-
# --- INTERFACCIA GRADIO ---
|
| 247 |
with gr.Blocks() as demo:
|
| 248 |
gr.Markdown("# Portfolio Strategy Dashboard")
|
| 249 |
gr.Markdown(description_text)
|
| 250 |
|
| 251 |
-
# Input centrati
|
| 252 |
with gr.Row():
|
| 253 |
dropdown_companies = gr.Dropdown(
|
| 254 |
choices=companies,
|
|
@@ -261,16 +246,14 @@ with gr.Blocks() as demo:
|
|
| 261 |
value="Day",
|
| 262 |
label="Aggregation Level"
|
| 263 |
)
|
| 264 |
-
submit_btn = gr.Button("Submit")
|
| 265 |
|
| 266 |
-
# Output
|
| 267 |
data_table = gr.Dataframe(label="Data Preview", type="pandas")
|
| 268 |
strategies_plot = gr.Plot(label="Strategies")
|
| 269 |
prices_plot = gr.Plot(label="Prices")
|
| 270 |
|
| 271 |
-
# Collega bottone agli output usando la funzione reale
|
| 272 |
submit_btn.click(
|
| 273 |
-
fn=show_company_data,
|
| 274 |
inputs=[dropdown_companies, radio_aggregation],
|
| 275 |
outputs=[data_table, strategies_plot, prices_plot]
|
| 276 |
)
|
|
|
|
| 1 |
import pandas as pd
|
| 2 |
import yfinance as yf
|
| 3 |
+
from datasets import Dataset, load_dataset
|
| 4 |
from transformers import pipeline
|
| 5 |
import plotly.graph_objects as go
|
| 6 |
import gradio as gr
|
| 7 |
|
|
|
|
| 8 |
HF_DATASET = "SelmaNajih001/FT_MultiCompany"
|
| 9 |
+
HF_PRIVATE_DATASET = "tuo-username/portfolio_strategy_data"
|
| 10 |
MODEL_SENTIMENT = "SelmaNajih001/SentimentBasedOnPriceVariation"
|
| 11 |
MODEL_PRICE_TESLA = "SelmaNajih001/PricePredictionForTesla"
|
| 12 |
MODEL_PRICE_MICROSOFT = "SelmaNajih001/PricePredictionForMicrosoft"
|
|
|
|
| 18 |
}
|
| 19 |
companies = list(TICKERS.keys())
|
| 20 |
|
|
|
|
| 21 |
sentiment_pipeline = pipeline("sentiment-analysis", model=MODEL_SENTIMENT)
|
| 22 |
price_pipeline_tesla = pipeline("text-classification", model=MODEL_PRICE_TESLA)
|
| 23 |
price_pipeline_msft = pipeline("text-classification", model=MODEL_PRICE_MICROSOFT)
|
| 24 |
finbert_pipeline = pipeline("sentiment-analysis", model=MODEL_FINBERT)
|
| 25 |
|
|
|
|
| 26 |
df_multi = pd.DataFrame(load_dataset(HF_DATASET)["train"])
|
| 27 |
df_multi['date'] = pd.to_datetime(df_multi['Date'], errors='coerce')
|
| 28 |
df_multi['date_merge'] = df_multi['date'].dt.normalize()
|
| 29 |
df_multi.sort_values('date', inplace=True)
|
| 30 |
|
| 31 |
+
try:
|
| 32 |
+
ds_existing = load_dataset(HF_PRIVATE_DATASET)["train"]
|
| 33 |
+
df_existing = pd.DataFrame(ds_existing)
|
| 34 |
+
except:
|
| 35 |
+
df_existing = pd.DataFrame()
|
| 36 |
+
|
| 37 |
+
if not df_existing.empty:
|
| 38 |
+
df_to_add = df_multi[~df_multi['Date'].isin(df_existing['Date'])]
|
| 39 |
+
else:
|
| 40 |
+
df_to_add = df_multi.copy()
|
| 41 |
+
|
| 42 |
+
if not df_existing.empty:
|
| 43 |
+
df_updated = pd.concat([df_existing, df_to_add], ignore_index=True)
|
| 44 |
+
else:
|
| 45 |
+
df_updated = df_to_add.copy()
|
| 46 |
+
|
| 47 |
+
hf_dataset_updated = Dataset.from_pandas(df_updated)
|
| 48 |
+
hf_dataset_updated.push_to_hub(HF_PRIVATE_DATASET, private=True)
|
| 49 |
+
print(f"Dataset aggiornato su Hugging Face: {HF_PRIVATE_DATASET}")
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
df_multi = df_updated.copy()
|
| 53 |
df_multi['Sentiment'] = ""
|
| 54 |
df_multi['Confidence'] = 0.0
|
| 55 |
df_multi['Predicted'] = 0.0
|
|
|
|
| 88 |
except:
|
| 89 |
df_multi.at[i,'Predicted'] = 0.0
|
| 90 |
|
| 91 |
+
|
| 92 |
prices = {}
|
| 93 |
for company, ticker in TICKERS.items():
|
| 94 |
start_date = df_multi[df_multi['Company']==company]['date'].min()
|
|
|
|
| 99 |
df_prices['PctChangeDaily'] = df_prices[f'Close_{ticker}'].pct_change().shift(-1)
|
| 100 |
prices[company] = df_prices
|
| 101 |
|
|
|
|
| 102 |
dfs_final = {}
|
| 103 |
for company in companies:
|
| 104 |
df_c = df_multi[df_multi['Company'] == company].copy()
|
|
|
|
| 136 |
else:
|
| 137 |
df_c.loc[i,'StrategyB_Cumulative'] = df_c.loc[i-1,'StrategyB_Cumulative']
|
| 138 |
|
|
|
|
| 139 |
# Strategy C: FinBERT
|
| 140 |
df_c['StrategyC_Cumulative'] = 0.0
|
| 141 |
for i in range(1, len(df_c)):
|
|
|
|
| 149 |
else:
|
| 150 |
df_c.loc[i,'StrategyC_Cumulative'] = df_c.loc[i-1,'StrategyC_Cumulative']
|
| 151 |
|
|
|
|
| 152 |
dfs_final[company] = df_c.drop(columns=["date", "date_merge"], errors="ignore")
|
| 153 |
|
| 154 |
# --- FUNZIONE PER GRADIO ---
|
| 155 |
def show_company_data(selected_companies, aggregation="Day"):
|
| 156 |
if not selected_companies:
|
| 157 |
+
return pd.DataFrame(), None, None
|
| 158 |
|
| 159 |
agg_col = {"Day": "Day", "Month": "Month", "Year": "Year"}.get(aggregation, "Day")
|
| 160 |
|
| 161 |
+
# --- Figures ---
|
| 162 |
fig_strat = go.Figure()
|
| 163 |
+
fig_price = go.Figure()
|
| 164 |
+
dfs_display = []
|
| 165 |
+
|
| 166 |
for c in selected_companies:
|
| 167 |
if c not in dfs_final:
|
| 168 |
continue
|
|
|
|
| 171 |
df_grouped = df_c.groupby(agg_col).agg({
|
| 172 |
'StrategyA_Cumulative': 'last',
|
| 173 |
'StrategyB_Cumulative': 'last',
|
| 174 |
+
'StrategyC_Cumulative': 'last',
|
| 175 |
+
f'Close_{TICKERS[c]}': 'last'
|
| 176 |
}).reset_index()
|
| 177 |
|
| 178 |
+
df_grouped['Company'] = c
|
| 179 |
+
dfs_display.append(df_grouped)
|
| 180 |
+
|
| 181 |
strategy_labels = {
|
| 182 |
'StrategyA_Cumulative': "Custom Sentiment",
|
| 183 |
'StrategyB_Cumulative': "Regression",
|
| 184 |
'StrategyC_Cumulative': "FinBERT"
|
| 185 |
}
|
|
|
|
| 186 |
for strat in ['StrategyA_Cumulative', 'StrategyB_Cumulative', 'StrategyC_Cumulative']:
|
| 187 |
fig_strat.add_trace(go.Scatter(
|
| 188 |
x=df_grouped[agg_col],
|
|
|
|
| 191 |
name=f"{c} - {strategy_labels[strat]}"
|
| 192 |
))
|
| 193 |
|
| 194 |
+
fig_price.add_trace(go.Scatter(
|
| 195 |
+
x=df_grouped[agg_col],
|
| 196 |
+
y=df_grouped[f'Close_{TICKERS[c]}'],
|
| 197 |
+
mode="lines",
|
| 198 |
+
name=f"{c} Price"
|
| 199 |
+
))
|
| 200 |
+
|
| 201 |
fig_strat.update_layout(
|
| 202 |
title="Strategies Comparison (Custom Sentiment, Regression, FinBERT)",
|
| 203 |
xaxis_title=aggregation,
|
|
|
|
| 206 |
hovermode="x unified"
|
| 207 |
)
|
| 208 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
fig_price.update_layout(
|
| 210 |
+
title="Stock Prices",
|
| 211 |
xaxis_title=aggregation,
|
| 212 |
+
yaxis_title="Price",
|
| 213 |
template="plotly_dark",
|
| 214 |
hovermode="x unified"
|
| 215 |
)
|
| 216 |
|
| 217 |
+
df_display = pd.concat(dfs_display, ignore_index=True) if dfs_display else pd.DataFrame()
|
| 218 |
+
return df_display, fig_strat, fig_price
|
| 219 |
|
| 220 |
+
# --- INTERFACCIA GRADIO ---
|
| 221 |
description_text = """
|
| 222 |
### Portfolio Strategy Comparison Dashboard
|
| 223 |
This dashboard allows you to compare the performance of three sentiment models in driving trading strategies for Microsoft and Tesla.
|
|
|
|
| 226 |
- If the score is below -0.8 → sell
|
| 227 |
- Otherwise → no trade
|
| 228 |
- For the regression model, thresholds are +1 and -1.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
"""
|
| 230 |
|
|
|
|
| 231 |
companies = ["Microsoft", "Tesla, Inc."]
|
| 232 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
with gr.Blocks() as demo:
|
| 234 |
gr.Markdown("# Portfolio Strategy Dashboard")
|
| 235 |
gr.Markdown(description_text)
|
| 236 |
|
|
|
|
| 237 |
with gr.Row():
|
| 238 |
dropdown_companies = gr.Dropdown(
|
| 239 |
choices=companies,
|
|
|
|
| 246 |
value="Day",
|
| 247 |
label="Aggregation Level"
|
| 248 |
)
|
| 249 |
+
submit_btn = gr.Button("Submit")
|
| 250 |
|
|
|
|
| 251 |
data_table = gr.Dataframe(label="Data Preview", type="pandas")
|
| 252 |
strategies_plot = gr.Plot(label="Strategies")
|
| 253 |
prices_plot = gr.Plot(label="Prices")
|
| 254 |
|
|
|
|
| 255 |
submit_btn.click(
|
| 256 |
+
fn=show_company_data,
|
| 257 |
inputs=[dropdown_companies, radio_aggregation],
|
| 258 |
outputs=[data_table, strategies_plot, prices_plot]
|
| 259 |
)
|