SelmaNajih001 commited on
Commit
46a0645
·
verified ·
1 Parent(s): e55274c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -66
app.py CHANGED
@@ -1,12 +1,12 @@
1
  import pandas as pd
2
  import yfinance as yf
3
- from datasets import load_dataset
4
  from transformers import pipeline
5
  import plotly.graph_objects as go
6
  import gradio as gr
7
 
8
- # --- PARAMETRI ---
9
  HF_DATASET = "SelmaNajih001/FT_MultiCompany"
 
10
  MODEL_SENTIMENT = "SelmaNajih001/SentimentBasedOnPriceVariation"
11
  MODEL_PRICE_TESLA = "SelmaNajih001/PricePredictionForTesla"
12
  MODEL_PRICE_MICROSOFT = "SelmaNajih001/PricePredictionForMicrosoft"
@@ -18,19 +18,38 @@ TICKERS = {
18
  }
19
  companies = list(TICKERS.keys())
20
 
21
- # --- PIPELINES ---
22
  sentiment_pipeline = pipeline("sentiment-analysis", model=MODEL_SENTIMENT)
23
  price_pipeline_tesla = pipeline("text-classification", model=MODEL_PRICE_TESLA)
24
  price_pipeline_msft = pipeline("text-classification", model=MODEL_PRICE_MICROSOFT)
25
  finbert_pipeline = pipeline("sentiment-analysis", model=MODEL_FINBERT)
26
 
27
- # --- LOAD DATASET ---
28
  df_multi = pd.DataFrame(load_dataset(HF_DATASET)["train"])
29
  df_multi['date'] = pd.to_datetime(df_multi['Date'], errors='coerce')
30
  df_multi['date_merge'] = df_multi['date'].dt.normalize()
31
  df_multi.sort_values('date', inplace=True)
32
 
33
- # --- SENTIMENT & PREDICTION ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  df_multi['Sentiment'] = ""
35
  df_multi['Confidence'] = 0.0
36
  df_multi['Predicted'] = 0.0
@@ -69,7 +88,7 @@ for i, row in df_multi.iterrows():
69
  except:
70
  df_multi.at[i,'Predicted'] = 0.0
71
 
72
- # --- FETCH STOCK PRICES ---
73
  prices = {}
74
  for company, ticker in TICKERS.items():
75
  start_date = df_multi[df_multi['Company']==company]['date'].min()
@@ -80,7 +99,6 @@ for company, ticker in TICKERS.items():
80
  df_prices['PctChangeDaily'] = df_prices[f'Close_{ticker}'].pct_change().shift(-1)
81
  prices[company] = df_prices
82
 
83
- # --- MERGE & STRATEGIE ---
84
  dfs_final = {}
85
  for company in companies:
86
  df_c = df_multi[df_multi['Company'] == company].copy()
@@ -118,7 +136,6 @@ for company in companies:
118
  else:
119
  df_c.loc[i,'StrategyB_Cumulative'] = df_c.loc[i-1,'StrategyB_Cumulative']
120
 
121
-
122
  # Strategy C: FinBERT
123
  df_c['StrategyC_Cumulative'] = 0.0
124
  for i in range(1, len(df_c)):
@@ -132,18 +149,20 @@ for company in companies:
132
  else:
133
  df_c.loc[i,'StrategyC_Cumulative'] = df_c.loc[i-1,'StrategyC_Cumulative']
134
 
135
-
136
  dfs_final[company] = df_c.drop(columns=["date", "date_merge"], errors="ignore")
137
 
138
  # --- FUNZIONE PER GRADIO ---
139
  def show_company_data(selected_companies, aggregation="Day"):
140
  if not selected_companies:
141
- return "Please select at least one company", None, None
142
 
143
  agg_col = {"Day": "Day", "Month": "Month", "Year": "Year"}.get(aggregation, "Day")
144
 
145
- # --- Strategies Chart ---
146
  fig_strat = go.Figure()
 
 
 
147
  for c in selected_companies:
148
  if c not in dfs_final:
149
  continue
@@ -152,15 +171,18 @@ def show_company_data(selected_companies, aggregation="Day"):
152
  df_grouped = df_c.groupby(agg_col).agg({
153
  'StrategyA_Cumulative': 'last',
154
  'StrategyB_Cumulative': 'last',
155
- 'StrategyC_Cumulative': 'last'
 
156
  }).reset_index()
157
 
 
 
 
158
  strategy_labels = {
159
  'StrategyA_Cumulative': "Custom Sentiment",
160
  'StrategyB_Cumulative': "Regression",
161
  'StrategyC_Cumulative': "FinBERT"
162
  }
163
-
164
  for strat in ['StrategyA_Cumulative', 'StrategyB_Cumulative', 'StrategyC_Cumulative']:
165
  fig_strat.add_trace(go.Scatter(
166
  x=df_grouped[agg_col],
@@ -169,6 +191,13 @@ def show_company_data(selected_companies, aggregation="Day"):
169
  name=f"{c} - {strategy_labels[strat]}"
170
  ))
171
 
 
 
 
 
 
 
 
172
  fig_strat.update_layout(
173
  title="Strategies Comparison (Custom Sentiment, Regression, FinBERT)",
174
  xaxis_title=aggregation,
@@ -177,35 +206,18 @@ def show_company_data(selected_companies, aggregation="Day"):
177
  hovermode="x unified"
178
  )
179
 
180
-
181
- # --- Grafico Prezzi ---
182
- fig_price = go.Figure()
183
- for c in selected_companies:
184
- if c not in dfs_final:
185
- continue
186
- df_c = dfs_final[c]
187
-
188
- df_grouped = df_c.groupby(agg_col).agg({
189
- f'Close_{TICKERS[c]}':'last'
190
- }).reset_index()
191
-
192
- fig_price.add_trace(go.Scatter(
193
- x=df_grouped[agg_col], y=df_grouped[f'Close_{TICKERS[c]}'],
194
- mode="lines", name=f"{c} Price"
195
- ))
196
-
197
  fig_price.update_layout(
198
- title="Andamento Prezzi",
199
  xaxis_title=aggregation,
200
- yaxis_title="Stock Price",
201
  template="plotly_dark",
202
  hovermode="x unified"
203
  )
204
 
205
- return None, fig_strat, fig_price
206
- import gradio as gr
207
 
208
- # --- Markdown descrittivo ---
209
  description_text = """
210
  ### Portfolio Strategy Comparison Dashboard
211
  This dashboard allows you to compare the performance of three sentiment models in driving trading strategies for Microsoft and Tesla.
@@ -214,41 +226,14 @@ This dashboard allows you to compare the performance of three sentiment models i
214
  - If the score is below -0.8 → sell
215
  - Otherwise → no trade
216
  - For the regression model, thresholds are +1 and -1.
217
- - **Dataset and preprocessing**:
218
- - Closing prices and daily percent changes are calculated for each company.
219
- - News articles mentioning Microsoft or Tesla are merged with the price data.
220
- - Negative/Down scores are multiplied by -1, Neutral scores set to 0.
221
- - Daily strategy value = daily percent change × stock price.
222
- - Cumulative value = sum of daily strategy values over time.
223
- - **Model comparison**:
224
- - Regression is fine-tuned separately for Tesla and Microsoft.
225
- - FinBERT is used as a baseline.
226
- - The custom model incorporates actual stock movements and company-specific signals.
227
- - **Results overview**:
228
- - Tesla: Regression often performs better, though some losses occur.
229
- - Microsoft: Regression closely follows market trends; FinBERT is less accurate.
230
- - Regression aligns better with real stock movements by interpreting news contextually.
231
- - **Caveats**:
232
- - Multiple news per day may generate buy/sell signals that cancel each other.
233
- - Strategy uses next-day price changes; no multi-day logic is applied.
234
- - Simplified testing, but useful to compare model behavior.
235
  """
236
 
237
- # --- INPUT OPTIONS ---
238
  companies = ["Microsoft", "Tesla, Inc."]
239
 
240
- # --- FUNZIONE DI ESECUZIONE ---
241
- def show_strategy(selected_companies, aggregation):
242
- # Qui chiami la funzione del tuo modello per generare i grafici e il dataframe
243
- # Ad esempio: return df, fig_strategies, fig_prices
244
- return None, None, None # sostituire con output reali
245
-
246
- # --- INTERFACCIA GRADIO ---
247
  with gr.Blocks() as demo:
248
  gr.Markdown("# Portfolio Strategy Dashboard")
249
  gr.Markdown(description_text)
250
 
251
- # Input centrati
252
  with gr.Row():
253
  dropdown_companies = gr.Dropdown(
254
  choices=companies,
@@ -261,16 +246,14 @@ with gr.Blocks() as demo:
261
  value="Day",
262
  label="Aggregation Level"
263
  )
264
- submit_btn = gr.Button("Submit") # bottone per inviare
265
 
266
- # Output
267
  data_table = gr.Dataframe(label="Data Preview", type="pandas")
268
  strategies_plot = gr.Plot(label="Strategies")
269
  prices_plot = gr.Plot(label="Prices")
270
 
271
- # Collega bottone agli output usando la funzione reale
272
  submit_btn.click(
273
- fn=show_company_data, # <--- qui prima avevi show_strategy
274
  inputs=[dropdown_companies, radio_aggregation],
275
  outputs=[data_table, strategies_plot, prices_plot]
276
  )
 
1
  import pandas as pd
2
  import yfinance as yf
3
+ from datasets import Dataset, load_dataset
4
  from transformers import pipeline
5
  import plotly.graph_objects as go
6
  import gradio as gr
7
 
 
8
  HF_DATASET = "SelmaNajih001/FT_MultiCompany"
9
+ HF_PRIVATE_DATASET = "tuo-username/portfolio_strategy_data"
10
  MODEL_SENTIMENT = "SelmaNajih001/SentimentBasedOnPriceVariation"
11
  MODEL_PRICE_TESLA = "SelmaNajih001/PricePredictionForTesla"
12
  MODEL_PRICE_MICROSOFT = "SelmaNajih001/PricePredictionForMicrosoft"
 
18
  }
19
  companies = list(TICKERS.keys())
20
 
 
21
  sentiment_pipeline = pipeline("sentiment-analysis", model=MODEL_SENTIMENT)
22
  price_pipeline_tesla = pipeline("text-classification", model=MODEL_PRICE_TESLA)
23
  price_pipeline_msft = pipeline("text-classification", model=MODEL_PRICE_MICROSOFT)
24
  finbert_pipeline = pipeline("sentiment-analysis", model=MODEL_FINBERT)
25
 
 
26
  df_multi = pd.DataFrame(load_dataset(HF_DATASET)["train"])
27
  df_multi['date'] = pd.to_datetime(df_multi['Date'], errors='coerce')
28
  df_multi['date_merge'] = df_multi['date'].dt.normalize()
29
  df_multi.sort_values('date', inplace=True)
30
 
31
+ try:
32
+ ds_existing = load_dataset(HF_PRIVATE_DATASET)["train"]
33
+ df_existing = pd.DataFrame(ds_existing)
34
+ except:
35
+ df_existing = pd.DataFrame()
36
+
37
+ if not df_existing.empty:
38
+ df_to_add = df_multi[~df_multi['Date'].isin(df_existing['Date'])]
39
+ else:
40
+ df_to_add = df_multi.copy()
41
+
42
+ if not df_existing.empty:
43
+ df_updated = pd.concat([df_existing, df_to_add], ignore_index=True)
44
+ else:
45
+ df_updated = df_to_add.copy()
46
+
47
+ hf_dataset_updated = Dataset.from_pandas(df_updated)
48
+ hf_dataset_updated.push_to_hub(HF_PRIVATE_DATASET, private=True)
49
+ print(f"Dataset aggiornato su Hugging Face: {HF_PRIVATE_DATASET}")
50
+
51
+
52
+ df_multi = df_updated.copy()
53
  df_multi['Sentiment'] = ""
54
  df_multi['Confidence'] = 0.0
55
  df_multi['Predicted'] = 0.0
 
88
  except:
89
  df_multi.at[i,'Predicted'] = 0.0
90
 
91
+
92
  prices = {}
93
  for company, ticker in TICKERS.items():
94
  start_date = df_multi[df_multi['Company']==company]['date'].min()
 
99
  df_prices['PctChangeDaily'] = df_prices[f'Close_{ticker}'].pct_change().shift(-1)
100
  prices[company] = df_prices
101
 
 
102
  dfs_final = {}
103
  for company in companies:
104
  df_c = df_multi[df_multi['Company'] == company].copy()
 
136
  else:
137
  df_c.loc[i,'StrategyB_Cumulative'] = df_c.loc[i-1,'StrategyB_Cumulative']
138
 
 
139
  # Strategy C: FinBERT
140
  df_c['StrategyC_Cumulative'] = 0.0
141
  for i in range(1, len(df_c)):
 
149
  else:
150
  df_c.loc[i,'StrategyC_Cumulative'] = df_c.loc[i-1,'StrategyC_Cumulative']
151
 
 
152
  dfs_final[company] = df_c.drop(columns=["date", "date_merge"], errors="ignore")
153
 
154
  # --- FUNZIONE PER GRADIO ---
155
  def show_company_data(selected_companies, aggregation="Day"):
156
  if not selected_companies:
157
+ return pd.DataFrame(), None, None
158
 
159
  agg_col = {"Day": "Day", "Month": "Month", "Year": "Year"}.get(aggregation, "Day")
160
 
161
+ # --- Figures ---
162
  fig_strat = go.Figure()
163
+ fig_price = go.Figure()
164
+ dfs_display = []
165
+
166
  for c in selected_companies:
167
  if c not in dfs_final:
168
  continue
 
171
  df_grouped = df_c.groupby(agg_col).agg({
172
  'StrategyA_Cumulative': 'last',
173
  'StrategyB_Cumulative': 'last',
174
+ 'StrategyC_Cumulative': 'last',
175
+ f'Close_{TICKERS[c]}': 'last'
176
  }).reset_index()
177
 
178
+ df_grouped['Company'] = c
179
+ dfs_display.append(df_grouped)
180
+
181
  strategy_labels = {
182
  'StrategyA_Cumulative': "Custom Sentiment",
183
  'StrategyB_Cumulative': "Regression",
184
  'StrategyC_Cumulative': "FinBERT"
185
  }
 
186
  for strat in ['StrategyA_Cumulative', 'StrategyB_Cumulative', 'StrategyC_Cumulative']:
187
  fig_strat.add_trace(go.Scatter(
188
  x=df_grouped[agg_col],
 
191
  name=f"{c} - {strategy_labels[strat]}"
192
  ))
193
 
194
+ fig_price.add_trace(go.Scatter(
195
+ x=df_grouped[agg_col],
196
+ y=df_grouped[f'Close_{TICKERS[c]}'],
197
+ mode="lines",
198
+ name=f"{c} Price"
199
+ ))
200
+
201
  fig_strat.update_layout(
202
  title="Strategies Comparison (Custom Sentiment, Regression, FinBERT)",
203
  xaxis_title=aggregation,
 
206
  hovermode="x unified"
207
  )
208
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  fig_price.update_layout(
210
+ title="Stock Prices",
211
  xaxis_title=aggregation,
212
+ yaxis_title="Price",
213
  template="plotly_dark",
214
  hovermode="x unified"
215
  )
216
 
217
+ df_display = pd.concat(dfs_display, ignore_index=True) if dfs_display else pd.DataFrame()
218
+ return df_display, fig_strat, fig_price
219
 
220
+ # --- INTERFACCIA GRADIO ---
221
  description_text = """
222
  ### Portfolio Strategy Comparison Dashboard
223
  This dashboard allows you to compare the performance of three sentiment models in driving trading strategies for Microsoft and Tesla.
 
226
  - If the score is below -0.8 → sell
227
  - Otherwise → no trade
228
  - For the regression model, thresholds are +1 and -1.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
  """
230
 
 
231
  companies = ["Microsoft", "Tesla, Inc."]
232
 
 
 
 
 
 
 
 
233
  with gr.Blocks() as demo:
234
  gr.Markdown("# Portfolio Strategy Dashboard")
235
  gr.Markdown(description_text)
236
 
 
237
  with gr.Row():
238
  dropdown_companies = gr.Dropdown(
239
  choices=companies,
 
246
  value="Day",
247
  label="Aggregation Level"
248
  )
249
+ submit_btn = gr.Button("Submit")
250
 
 
251
  data_table = gr.Dataframe(label="Data Preview", type="pandas")
252
  strategies_plot = gr.Plot(label="Strategies")
253
  prices_plot = gr.Plot(label="Prices")
254
 
 
255
  submit_btn.click(
256
+ fn=show_company_data,
257
  inputs=[dropdown_companies, radio_aggregation],
258
  outputs=[data_table, strategies_plot, prices_plot]
259
  )