Spaces:
Runtime error
Runtime error
| import os | |
| import sys | |
| import streamlit as st | |
| import PyPDF2 # PyPDF2 for PDF text extraction | |
| import faiss # FAISS for similarity search | |
| import numpy as np | |
| from langchain_community.llms import Replicate | |
| from langchain_community.vectorstores import FAISS | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| # Replicate API token | |
| os.environ['REPLICATE_API_TOKEN'] = "r8_TN8tlsE4jjj9WISWhBKx7NqzHLAGwvq3pJOUj" # You need to use your replicate api | |
| def extract_text_with_pypdf2(file): | |
| reader = PyPDF2.PdfReader(file) | |
| text = "" | |
| for page in reader.pages: | |
| text += page.extract_text() | |
| return text | |
| # Streamlit interface | |
| st.title("PDF Chatbot by *** Tasrif Nur Himel ***") | |
| uploaded_file = st.file_uploader("Upload a PDF", type="pdf") | |
| if uploaded_file is not None: | |
| # Extract text from the PDF using PyPDF2 | |
| extracted_text = extract_text_with_pypdf2(uploaded_file) | |
| # Split the text into smaller chunks for processing | |
| text_splitter = CharacterTextSplitter( | |
| separator=" ", # Using space as a separator for more control | |
| chunk_size=800, | |
| chunk_overlap=200, | |
| length_function=len | |
| ) | |
| texts = text_splitter.split_text(extracted_text) | |
| # Use HuggingFace embeddings for transforming text into numerical vectors | |
| embeddings = HuggingFaceEmbeddings() | |
| # Create FAISS vector store from text and embeddings | |
| document_search = FAISS.from_texts(texts, embeddings) | |
| # Initialize Replicate Llama2 Model | |
| llm = Replicate( | |
| model="a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5", | |
| model_kwargs={"temperature": 0.75, "max_length": 3000} | |
| ) | |
| # Set up the Conversational Retrieval Chain | |
| def qa_chain(query): | |
| # Search for similar documents | |
| docs = document_search.similarity_search(query) | |
| # Create a combined prompt from query and documents | |
| combined_prompt = f"Question: {query}\n\nRelevant Documents:\n" | |
| combined_prompt += "\n\n".join([doc.page_content for doc in docs]) | |
| # Get the response from the Llama model | |
| response = llm.invoke(combined_prompt) # Updated method call | |
| return response | |
| st.write("PDF successfully uploaded and processed. You can now ask questions about its content.") | |
| chat_history = st.session_state.get('chat_history', []) | |
| if 'init' not in st.session_state: | |
| st.session_state['init'] = True | |
| st.session_state['chat_history'] = [] | |
| query = st.text_input("Prompt:") | |
| if st.button("Send"): | |
| if query.lower() in ["exit", "quit", "q"]: | |
| st.write('Exiting') | |
| sys.exit() | |
| result = qa_chain(query) | |
| st.write('Answer: ' + result) | |
| st.session_state['chat_history'].append((query, result)) | |
| # Display the chat history | |
| st.write("### Chat History") | |
| for query, answer in st.session_state['chat_history']: | |
| st.write(f"**You:** {query}") | |
| st.write(f"**Bot:** {answer}") | |
| else: | |
| st.write("Please upload a PDF file.") | |