import os import sys import streamlit as st import PyPDF2 # PyPDF2 for PDF text extraction import faiss # FAISS for similarity search import numpy as np from langchain_community.llms import Replicate from langchain_community.vectorstores import FAISS from langchain.text_splitter import CharacterTextSplitter from langchain_huggingface import HuggingFaceEmbeddings # Replicate API token os.environ['REPLICATE_API_TOKEN'] = "r8_TN8tlsE4jjj9WISWhBKx7NqzHLAGwvq3pJOUj" # You need to use your replicate api def extract_text_with_pypdf2(file): reader = PyPDF2.PdfReader(file) text = "" for page in reader.pages: text += page.extract_text() return text # Streamlit interface st.title("PDF Chatbot by *** Tasrif Nur Himel ***") uploaded_file = st.file_uploader("Upload a PDF", type="pdf") if uploaded_file is not None: # Extract text from the PDF using PyPDF2 extracted_text = extract_text_with_pypdf2(uploaded_file) # Split the text into smaller chunks for processing text_splitter = CharacterTextSplitter( separator=" ", # Using space as a separator for more control chunk_size=800, chunk_overlap=200, length_function=len ) texts = text_splitter.split_text(extracted_text) # Use HuggingFace embeddings for transforming text into numerical vectors embeddings = HuggingFaceEmbeddings() # Create FAISS vector store from text and embeddings document_search = FAISS.from_texts(texts, embeddings) # Initialize Replicate Llama2 Model llm = Replicate( model="a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5", model_kwargs={"temperature": 0.75, "max_length": 3000} ) # Set up the Conversational Retrieval Chain def qa_chain(query): # Search for similar documents docs = document_search.similarity_search(query) # Create a combined prompt from query and documents combined_prompt = f"Question: {query}\n\nRelevant Documents:\n" combined_prompt += "\n\n".join([doc.page_content for doc in docs]) # Get the response from the Llama model response = llm.invoke(combined_prompt) # Updated method call return response st.write("PDF successfully uploaded and processed. You can now ask questions about its content.") chat_history = st.session_state.get('chat_history', []) if 'init' not in st.session_state: st.session_state['init'] = True st.session_state['chat_history'] = [] query = st.text_input("Prompt:") if st.button("Send"): if query.lower() in ["exit", "quit", "q"]: st.write('Exiting') sys.exit() result = qa_chain(query) st.write('Answer: ' + result) st.session_state['chat_history'].append((query, result)) # Display the chat history st.write("### Chat History") for query, answer in st.session_state['chat_history']: st.write(f"**You:** {query}") st.write(f"**Bot:** {answer}") else: st.write("Please upload a PDF file.")