Spaces:
Sleeping
Sleeping
| from transformers import pipeline | |
| from tqdm import tqdm | |
| import pandas as pd | |
| import streamlit as st | |
| from io import StringIO | |
| def correct_text(uploaded_file, column_to_correct): | |
| """ | |
| Corrects text in the specified column using a text correction model. | |
| Args: | |
| uploaded_file: DataFrame containing the text to correct | |
| column_to_correct: Index of the column to correct | |
| Returns: | |
| DataFrame with corrected text in a new column | |
| """ | |
| corrector = pipeline("text2text-generation", | |
| model="sdadas/byt5-text-correction") | |
| df = uploaded_file | |
| progress_bar = st.progress(0) | |
| status_text = st.text("Correcting text π§ ...") | |
| for index, row in df.iterrows(): | |
| if pd.notna(row.iloc[column_to_correct]): | |
| original_text = str(row.iloc[column_to_correct]) | |
| corrected_text = corrector( | |
| "<es>" + original_text, max_length=1024)[0]['generated_text'] | |
| # Save corrected text only if different from original | |
| if corrected_text != original_text: | |
| df.loc[index, column_to_correct + 1] = corrected_text | |
| progress = (index + 1) / len(df) | |
| progress_bar.progress(progress) | |
| status_text.text(f"Progress: {int(progress * 100)}% completed ") | |
| return df | |
| def choose_columns(dataframe): | |
| """ | |
| Lets user select columns to correct and displays preview of data. | |
| Args: | |
| dataframe: Input DataFrame | |
| Returns: | |
| Selected column index or None if no selection | |
| """ | |
| st.write("Choose the columns to correct π") | |
| column_to_correct = st.selectbox( | |
| "Select columns to correct", dataframe.columns) | |
| if column_to_correct: | |
| st.write("Preview of data in selected columns π:") | |
| non_empty_data = dataframe[dataframe[column_to_correct].notna()] | |
| st.dataframe(non_empty_data[column_to_correct].head()) | |
| if st.button("Correct Text"): | |
| if column_to_correct is not None: | |
| return dataframe.columns.get_loc(column_to_correct) | |
| else: | |
| st.error("Please select a column before correcting text β") | |
| return None | |
| def main(): | |
| """Main function to run the text correction application""" | |
| st.title("CSV text Correction App β") | |
| uploaded_file = st.file_uploader("Choose a CSV file π", type=["csv"]) | |
| if uploaded_file is not None: | |
| try: | |
| dataframe = pd.read_csv(uploaded_file, encoding='utf-8') | |
| column_index = choose_columns(dataframe) | |
| if column_index is not None: | |
| st.write(correct_text(dataframe, column_index)) | |
| except UnicodeDecodeError: | |
| st.error( | |
| "Error: Unable to decode the file. Please check the file encoding or try another file.") | |
| except Exception as e: | |
| st.error(f"An unexpected error occurred: {e}") | |
| if __name__ == "__main__": | |
| main() | |