Spaces:
Build error
Build error
Update variables.py
Browse files- variables.py +35 -1
variables.py
CHANGED
|
@@ -24,6 +24,40 @@ from langchain.schema import (
|
|
| 24 |
SystemMessage
|
| 25 |
)
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
@st.experimental_singleton(suppress_st_warning=True)
|
| 28 |
def get_latest_file():
|
| 29 |
'''Get the latest file from output folder'''
|
|
@@ -44,7 +78,7 @@ def get_latest_file():
|
|
| 44 |
return file_contents
|
| 45 |
|
| 46 |
@st.experimental_singleton(suppress_st_warning=True)
|
| 47 |
-
def
|
| 48 |
'''Process file with latest tweets'''
|
| 49 |
|
| 50 |
# Split tweets int chunks
|
|
|
|
| 24 |
SystemMessage
|
| 25 |
)
|
| 26 |
|
| 27 |
+
@st.experimental_singleton(suppress_st_warning=True)
|
| 28 |
+
def load_models():
|
| 29 |
+
'''load sentimant and topic clssification models'''
|
| 30 |
+
sent_pipe = pipeline(task,model=sent_model_id, tokenizer=sent_model_id)
|
| 31 |
+
topic_pipe = pipeline(task, model=topic_model_id, tokenizer=topic_model_id)
|
| 32 |
+
|
| 33 |
+
return sent_pipe, topic_pipe
|
| 34 |
+
|
| 35 |
+
@st.cache(allow_output_mutation=True, suppress_st_warning=True)
|
| 36 |
+
def process_tweets(df,df_users):
|
| 37 |
+
'''process tweets into a dataframe'''
|
| 38 |
+
|
| 39 |
+
df['author'] = df['author'].astype(np.int64)
|
| 40 |
+
|
| 41 |
+
df_merged = df.merge(df_users, on='author')
|
| 42 |
+
|
| 43 |
+
tweet_list = df_merged['tweet'].tolist()
|
| 44 |
+
|
| 45 |
+
sentiment, topic = pd.DataFrame(sentiment_classifier(tweet_list)), pd.DataFrame(topic_classifier(tweet_list))
|
| 46 |
+
|
| 47 |
+
sentiment.rename(columns={'score':'sentiment_confidence','label':'sentiment'}, inplace=True)
|
| 48 |
+
|
| 49 |
+
topic.rename(columns={'score':'topic_confidence','label':'topic'}, inplace=True)
|
| 50 |
+
|
| 51 |
+
df_group = pd.concat([df_merged,sentiment,topic],axis=1)
|
| 52 |
+
|
| 53 |
+
df_group[['sentiment_confidence','topic_confidence']] = df_group[['sentiment_confidence','topic_confidence']].round(2).mul(100)
|
| 54 |
+
|
| 55 |
+
df_tweets = df_group[['creation_time','username','tweet','sentiment','topic','sentiment_confidence','topic_confidence']]
|
| 56 |
+
|
| 57 |
+
df_tweets = df_tweets.sort_values(by=['creation_time'],ascending=False)
|
| 58 |
+
|
| 59 |
+
return df_tweets
|
| 60 |
+
|
| 61 |
@st.experimental_singleton(suppress_st_warning=True)
|
| 62 |
def get_latest_file():
|
| 63 |
'''Get the latest file from output folder'''
|
|
|
|
| 78 |
return file_contents
|
| 79 |
|
| 80 |
@st.experimental_singleton(suppress_st_warning=True)
|
| 81 |
+
def embed_tweets(file,model,query):
|
| 82 |
'''Process file with latest tweets'''
|
| 83 |
|
| 84 |
# Split tweets int chunks
|