Spaces:
Runtime error
Runtime error
File size: 3,097 Bytes
fde63e9 b1118ff fde63e9 4a6fc54 fde63e9 b1118ff fde63e9 b1118ff fde63e9 b1118ff fde63e9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import os
import sys
import streamlit as st
import PyPDF2 # PyPDF2 for PDF text extraction
import faiss # FAISS for similarity search
import numpy as np
from langchain_community.llms import Replicate
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
# Replicate API token
os.environ['REPLICATE_API_TOKEN'] = "r8_TN8tlsE4jjj9WISWhBKx7NqzHLAGwvq3pJOUj" # You need to use your replicate api
def extract_text_with_pypdf2(file):
reader = PyPDF2.PdfReader(file)
text = ""
for page in reader.pages:
text += page.extract_text()
return text
# Streamlit interface
st.title("PDF Chatbot by *** Tasrif Nur Himel ***")
uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
if uploaded_file is not None:
# Extract text from the PDF using PyPDF2
extracted_text = extract_text_with_pypdf2(uploaded_file)
# Split the text into smaller chunks for processing
text_splitter = CharacterTextSplitter(
separator=" ", # Using space as a separator for more control
chunk_size=800,
chunk_overlap=200,
length_function=len
)
texts = text_splitter.split_text(extracted_text)
# Use HuggingFace embeddings for transforming text into numerical vectors
embeddings = HuggingFaceEmbeddings()
# Create FAISS vector store from text and embeddings
document_search = FAISS.from_texts(texts, embeddings)
# Initialize Replicate Llama2 Model
llm = Replicate(
model="a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5",
model_kwargs={"temperature": 0.75, "max_length": 3000}
)
# Set up the Conversational Retrieval Chain
def qa_chain(query):
# Search for similar documents
docs = document_search.similarity_search(query)
# Create a combined prompt from query and documents
combined_prompt = f"Question: {query}\n\nRelevant Documents:\n"
combined_prompt += "\n\n".join([doc.page_content for doc in docs])
# Get the response from the Llama model
response = llm.invoke(combined_prompt) # Updated method call
return response
st.write("PDF successfully uploaded and processed. You can now ask questions about its content.")
chat_history = st.session_state.get('chat_history', [])
if 'init' not in st.session_state:
st.session_state['init'] = True
st.session_state['chat_history'] = []
query = st.text_input("Prompt:")
if st.button("Send"):
if query.lower() in ["exit", "quit", "q"]:
st.write('Exiting')
sys.exit()
result = qa_chain(query)
st.write('Answer: ' + result)
st.session_state['chat_history'].append((query, result))
# Display the chat history
st.write("### Chat History")
for query, answer in st.session_state['chat_history']:
st.write(f"**You:** {query}")
st.write(f"**Bot:** {answer}")
else:
st.write("Please upload a PDF file.")
|