Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import pandas as pd | |
| import streamlit.components.v1 as stc | |
| import docx2txt | |
| # NLP Package-used for text analysis | |
| import nltk | |
| from nltk.tokenize import word_tokenize | |
| from nltk.tag import pos_tag | |
| from nltk.stem import WordNetLemmatizer | |
| from nltk.corpus import stopwords | |
| # from nltk import ne_chunk | |
| from nltk.tag import StanfordNERTagger | |
| from collections import Counter | |
| from textblob import TextBlob | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| from PIL import Image | |
| from wordcloud import WordCloud | |
| import base64 | |
| import time | |
| from app_utils import * | |
| def load_image(file): | |
| img = Image.open(file) | |
| return img | |
| HTML_BANNER = """ | |
| <div style="background-color:green;padding:10px;border-radius:10px"> | |
| <h1 style="color:white;text-align:center;">Text Analysis App </h1> | |
| </div> | |
| """ | |
| def text_analysis(): | |
| stc.html(HTML_BANNER) | |
| st.image(load_image('text_analysis.png')) | |
| menu=['Text-analysis','Upload_Files'] | |
| choice=st.sidebar.selectbox('Menu',menu) | |
| if choice=='Text-analysis': | |
| st.subheader('Analyse Text') | |
| text=st.text_area("Enter the text to anlayze") | |
| if (st.button("Analyze")): | |
| st.success("Success") | |
| with st.expander('Original Text'): | |
| st.write(text) | |
| with st.expander('Text Analysis'): | |
| token_analysis=nlp_analysis(text) | |
| st.dataframe(token_analysis) | |
| with st.expander('Entitites'): | |
| entity_result=find_entities(text) | |
| stc.html(entity_result, height=100, scrolling=True) | |
| col1,col2=st.columns(2) | |
| with col1: | |
| with st.expander("Word Stats"): | |
| st.info("Word Statistics") | |
| docx = nt.TextFrame(text) | |
| st.write(docx.word_stats()) | |
| with st.expander("Top keywords"): | |
| keywords=get_most_common_tokens(text) | |
| st.write(keywords) | |
| with st.expander('Tagged Keywords'): | |
| data= pos_tag(word_tokenize(text)) | |
| st.dataframe(data) | |
| visualize_tags=tag_visualize(data) | |
| stc.html(visualize_tags,scrolling=True) | |
| with st.expander("Sentiment"): | |
| sent_result=get_semantics(text) | |
| st.write(sent_result) | |
| with col2: | |
| with st.expander("Plot word freq"): | |
| try: | |
| fig, ax = plt.subplots() | |
| most_common_tokens = dict(token_analysis["Token"].value_counts()) | |
| sns.countplot(data=token_analysis[token_analysis["Token"].isin(most_common_tokens)], x="Token", ax=ax) | |
| ax.set_xlabel('PoS') | |
| ax.set_ylabel('Frequency') | |
| ax.tick_params(axis='x' , rotation=45) | |
| st.pyplot(fig) | |
| except: | |
| st.warning('Insufficient data') | |
| with st.expander("Plot part of speech"): | |
| try: | |
| fig, ax = plt.subplots() | |
| most_common_tokens = dict(token_analysis["Position"].value_counts()) | |
| sns.countplot(data=token_analysis[token_analysis["Position"].isin(most_common_tokens)], x="Position", ax=ax) | |
| ax.set_xlabel('PoS') | |
| ax.set_ylabel('Frequency') | |
| ax.tick_params(axis='x' , rotation=45) | |
| st.pyplot(fig) | |
| except: | |
| st.warning('Insufficient data') | |
| with st.expander("Plot word cloud"): | |
| try: | |
| plot_wordcloud(text) | |
| except: | |
| st.warning('Insufficient data') | |
| with st.expander('Download Results'): | |
| file_download(token_analysis) | |
| elif choice == 'Upload_Files': | |
| text_file = st.file_uploader('Upload Files', type=['docx']) | |
| if text_file is not None: | |
| if text_file.type == 'text/plain': | |
| text = str(text_file.read(), "utf-8") | |
| else: | |
| text = docx2txt.process(text_file) | |
| if (st.button("Analyze")): | |
| with st.expander('Original Text'): | |
| st.write(text) | |
| with st.expander('Text Analysis'): | |
| token_analysis = nlp_analysis(text) | |
| st.dataframe(token_analysis) | |
| with st.expander('Entities'): | |
| entity_result = find_entities(text) | |
| stc.html(entity_result, height=100, scrolling=True) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| with st.expander("Word Stats"): | |
| st.info("Word Statistics") | |
| docx = nt.TextFrame(text) | |
| st.write(docx.word_stats()) | |
| with st.expander("Top keywords"): | |
| keywords = get_most_common_tokens(text) | |
| st.write(keywords) | |
| with st.expander("Sentiment"): | |
| sent_result = get_semantics(text) | |
| st.write(sent_result) | |
| with col2: | |
| with st.expander("Plot word freq"): | |
| fig, ax = plt.subplots() | |
| num_tokens = 10 # Adjust the number of tokens to display as desired | |
| most_common_tokens = dict(token_analysis["Token"].value_counts().head(num_tokens)) | |
| sns.countplot(data=token_analysis[token_analysis["Token"].isin(most_common_tokens)], x="Token", ax=ax) | |
| ax.set_xlabel('Token') | |
| ax.set_ylabel('Frequency') | |
| ax.tick_params(axis='x', rotation=45) | |
| st.pyplot(fig) | |
| with st.expander("Plot part of speech"): | |
| fig, ax = plt.subplots() | |
| most_common_tokens = dict(token_analysis["Position"].value_counts()) | |
| sns.countplot(data=token_analysis[token_analysis["Position"].isin(most_common_tokens)], x="Position", ax=ax) | |
| ax.set_xlabel('PoS') | |
| ax.set_ylabel('Frequency') | |
| ax.tick_params(axis='x', rotation=45) | |
| st.pyplot(fig) | |
| with st.expander("Plot word cloud"): | |
| plot_wordcloud(text) | |
| with st.expander('Download Results'): | |
| file_download(token_analysis) | |