Spaces:

himel06
/

Llama-PDF-ChatBot

Runtime error

App Files Files Community

Llama-PDF-ChatBot / app.py

himel06

Update app.py

4a6fc54 verified over 1 year ago

raw

history blame contribute delete

3.1 kB

	import os
	import sys
	import streamlit as st
	import PyPDF2 # PyPDF2 for PDF text extraction
	import faiss # FAISS for similarity search
	import numpy as np
	from langchain_community.llms import Replicate
	from langchain_community.vectorstores import FAISS
	from langchain.text_splitter import CharacterTextSplitter
	from langchain_huggingface import HuggingFaceEmbeddings

	# Replicate API token
	os.environ['REPLICATE_API_TOKEN'] = "r8_TN8tlsE4jjj9WISWhBKx7NqzHLAGwvq3pJOUj" # You need to use your replicate api

	def extract_text_with_pypdf2(file):
	reader = PyPDF2.PdfReader(file)
	text = ""
	for page in reader.pages:
	text += page.extract_text()
	return text

	# Streamlit interface
	st.title("PDF Chatbot by * Tasrif Nur Himel *")

	uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
	if uploaded_file is not None:
	# Extract text from the PDF using PyPDF2
	extracted_text = extract_text_with_pypdf2(uploaded_file)

	# Split the text into smaller chunks for processing
	text_splitter = CharacterTextSplitter(
	separator=" ", # Using space as a separator for more control
	chunk_size=800,
	chunk_overlap=200,
	length_function=len
	)
	texts = text_splitter.split_text(extracted_text)

	# Use HuggingFace embeddings for transforming text into numerical vectors
	embeddings = HuggingFaceEmbeddings()

	# Create FAISS vector store from text and embeddings
	document_search = FAISS.from_texts(texts, embeddings)

	# Initialize Replicate Llama2 Model
	llm = Replicate(
	model="a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5",
	model_kwargs={"temperature": 0.75, "max_length": 3000}
	)

	# Set up the Conversational Retrieval Chain
	def qa_chain(query):
	# Search for similar documents
	docs = document_search.similarity_search(query)
	# Create a combined prompt from query and documents
	combined_prompt = f"Question: {query}\n\nRelevant Documents:\n"
	combined_prompt += "\n\n".join([doc.page_content for doc in docs])
	# Get the response from the Llama model
	response = llm.invoke(combined_prompt) # Updated method call
	return response

	st.write("PDF successfully uploaded and processed. You can now ask questions about its content.")

	chat_history = st.session_state.get('chat_history', [])

	if 'init' not in st.session_state:
	st.session_state['init'] = True
	st.session_state['chat_history'] = []

	query = st.text_input("Prompt:")
	if st.button("Send"):
	if query.lower() in ["exit", "quit", "q"]:
	st.write('Exiting')
	sys.exit()
	result = qa_chain(query)
	st.write('Answer: ' + result)
	st.session_state['chat_history'].append((query, result))

	# Display the chat history
	st.write("### Chat History")
	for query, answer in st.session_state['chat_history']:
	st.write(f"You: {query}")
	st.write(f"Bot: {answer}")

	else:
	st.write("Please upload a PDF file.")