himel06's picture
Update app.py
4a6fc54 verified
import os
import sys
import streamlit as st
import PyPDF2 # PyPDF2 for PDF text extraction
import faiss # FAISS for similarity search
import numpy as np
from langchain_community.llms import Replicate
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
# Replicate API token
os.environ['REPLICATE_API_TOKEN'] = "r8_TN8tlsE4jjj9WISWhBKx7NqzHLAGwvq3pJOUj" # You need to use your replicate api
def extract_text_with_pypdf2(file):
reader = PyPDF2.PdfReader(file)
text = ""
for page in reader.pages:
text += page.extract_text()
return text
# Streamlit interface
st.title("PDF Chatbot by *** Tasrif Nur Himel ***")
uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
if uploaded_file is not None:
# Extract text from the PDF using PyPDF2
extracted_text = extract_text_with_pypdf2(uploaded_file)
# Split the text into smaller chunks for processing
text_splitter = CharacterTextSplitter(
separator=" ", # Using space as a separator for more control
chunk_size=800,
chunk_overlap=200,
length_function=len
)
texts = text_splitter.split_text(extracted_text)
# Use HuggingFace embeddings for transforming text into numerical vectors
embeddings = HuggingFaceEmbeddings()
# Create FAISS vector store from text and embeddings
document_search = FAISS.from_texts(texts, embeddings)
# Initialize Replicate Llama2 Model
llm = Replicate(
model="a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5",
model_kwargs={"temperature": 0.75, "max_length": 3000}
)
# Set up the Conversational Retrieval Chain
def qa_chain(query):
# Search for similar documents
docs = document_search.similarity_search(query)
# Create a combined prompt from query and documents
combined_prompt = f"Question: {query}\n\nRelevant Documents:\n"
combined_prompt += "\n\n".join([doc.page_content for doc in docs])
# Get the response from the Llama model
response = llm.invoke(combined_prompt) # Updated method call
return response
st.write("PDF successfully uploaded and processed. You can now ask questions about its content.")
chat_history = st.session_state.get('chat_history', [])
if 'init' not in st.session_state:
st.session_state['init'] = True
st.session_state['chat_history'] = []
query = st.text_input("Prompt:")
if st.button("Send"):
if query.lower() in ["exit", "quit", "q"]:
st.write('Exiting')
sys.exit()
result = qa_chain(query)
st.write('Answer: ' + result)
st.session_state['chat_history'].append((query, result))
# Display the chat history
st.write("### Chat History")
for query, answer in st.session_state['chat_history']:
st.write(f"**You:** {query}")
st.write(f"**Bot:** {answer}")
else:
st.write("Please upload a PDF file.")