Spaces:
Runtime error
Runtime error
Python File
Browse files
app.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
import os
|
| 2 |
import sys
|
| 3 |
import streamlit as st
|
| 4 |
-
|
| 5 |
import faiss # FAISS for similarity search
|
| 6 |
import numpy as np
|
| 7 |
from langchain_community.llms import Replicate
|
|
@@ -13,7 +13,7 @@ from langchain_huggingface import HuggingFaceEmbeddings
|
|
| 13 |
os.environ['REPLICATE_API_TOKEN'] = "r8_TN8tlsE4jjj9WISWhBKx7NqzHLAGwvq3pJOUj"
|
| 14 |
|
| 15 |
def extract_text_with_pypdf2(file):
|
| 16 |
-
reader = PdfReader(file)
|
| 17 |
text = ""
|
| 18 |
for page in reader.pages:
|
| 19 |
text += page.extract_text()
|
|
@@ -29,7 +29,7 @@ if uploaded_file is not None:
|
|
| 29 |
|
| 30 |
# Split the text into smaller chunks for processing
|
| 31 |
text_splitter = CharacterTextSplitter(
|
| 32 |
-
separator="
|
| 33 |
chunk_size=800,
|
| 34 |
chunk_overlap=200,
|
| 35 |
length_function=len
|
|
@@ -56,7 +56,7 @@ if uploaded_file is not None:
|
|
| 56 |
combined_prompt = f"Question: {query}\n\nRelevant Documents:\n"
|
| 57 |
combined_prompt += "\n\n".join([doc.page_content for doc in docs])
|
| 58 |
# Get the response from the Llama model
|
| 59 |
-
response = llm(combined_prompt)
|
| 60 |
return response
|
| 61 |
|
| 62 |
st.write("PDF successfully uploaded and processed. You can now ask questions about its content.")
|
|
|
|
| 1 |
import os
|
| 2 |
import sys
|
| 3 |
import streamlit as st
|
| 4 |
+
import PyPDF2 # PyPDF2 for PDF text extraction
|
| 5 |
import faiss # FAISS for similarity search
|
| 6 |
import numpy as np
|
| 7 |
from langchain_community.llms import Replicate
|
|
|
|
| 13 |
os.environ['REPLICATE_API_TOKEN'] = "r8_TN8tlsE4jjj9WISWhBKx7NqzHLAGwvq3pJOUj"
|
| 14 |
|
| 15 |
def extract_text_with_pypdf2(file):
|
| 16 |
+
reader = PyPDF2.PdfReader(file)
|
| 17 |
text = ""
|
| 18 |
for page in reader.pages:
|
| 19 |
text += page.extract_text()
|
|
|
|
| 29 |
|
| 30 |
# Split the text into smaller chunks for processing
|
| 31 |
text_splitter = CharacterTextSplitter(
|
| 32 |
+
separator=" ", # Using space as a separator for more control
|
| 33 |
chunk_size=800,
|
| 34 |
chunk_overlap=200,
|
| 35 |
length_function=len
|
|
|
|
| 56 |
combined_prompt = f"Question: {query}\n\nRelevant Documents:\n"
|
| 57 |
combined_prompt += "\n\n".join([doc.page_content for doc in docs])
|
| 58 |
# Get the response from the Llama model
|
| 59 |
+
response = llm.invoke(combined_prompt) # Updated method call
|
| 60 |
return response
|
| 61 |
|
| 62 |
st.write("PDF successfully uploaded and processed. You can now ask questions about its content.")
|