absiitr commited on
Commit
58ee917
Β·
verified Β·
1 Parent(s): 871aaf1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +211 -169
app.py CHANGED
@@ -3,119 +3,116 @@ import tempfile
3
  import gc
4
  import logging
5
  import streamlit as st
6
- from groq import Groq
7
  from langchain_community.document_loaders import PyPDFLoader
8
  from langchain_text_splitters import RecursiveCharacterTextSplitter
9
  from langchain_community.embeddings import HuggingFaceEmbeddings
10
  from langchain_community.vectorstores import Chroma
11
  import torch
12
 
13
- # ---------------- CONFIG ----------------
14
  logging.basicConfig(level=logging.INFO)
15
 
 
16
  GROQ_API_KEY = st.secrets.get("GROQ_API_KEY", os.environ.get("GROQ_API_KEY"))
17
  GROQ_MODEL = "llama-3.1-8b-instant"
18
 
 
19
  client = None
20
  if GROQ_API_KEY:
21
  try:
22
  client = Groq(api_key=GROQ_API_KEY)
 
23
  except Exception as e:
24
- st.error(e)
 
 
 
25
 
26
- # ---------------- PAGE CONFIG ----------------
27
- st.set_page_config(
28
- page_title="PDF Assistant",
29
- page_icon="πŸ“˜",
30
- layout="wide"
31
- )
32
 
33
- # ---------------- FORCE DARK UI (THEME INDEPENDENT) ----------------
34
  st.markdown("""
35
  <style>
36
- :root {
37
- --primary-color: #1e3a8a;
38
- --background-color: #0e1117;
39
- --secondary-background-color: #161b22;
40
- --text-color: #f0f2f6;
41
  }
42
-
43
- html, body, [data-testid="stAppViewContainer"], .main {
44
- background-color: #0e1117 !important;
45
- color: #f0f2f6 !important;
46
  }
47
-
48
- /* Hide Streamlit default UI */
49
- header[data-testid="stHeader"], footer {
50
  display: none;
51
  }
52
-
53
- /* Sidebar */
54
  [data-testid="stSidebar"] {
55
- background-color: #161b22 !important;
56
- width: 20rem;
57
  position: fixed;
 
 
58
  height: 100vh;
59
- overflow-y: auto;
 
 
60
  }
61
-
62
- /* Fixed header */
 
 
 
 
 
 
63
  .fixed-header {
64
  position: fixed;
65
  top: 0;
66
  left: 0;
67
  width: 100%;
68
  height: 6rem;
69
- background: #0e1117;
70
- z-index: 9999;
71
  display: flex;
72
  flex-direction: column;
73
- align-items: center;
74
  justify-content: center;
75
- border-bottom: 1px solid rgba(255,255,255,0.1);
76
- }
77
-
78
- .title-text {
79
- font-size: 2.5rem;
80
- font-weight: 800;
81
- }
82
-
83
- .creator-text {
84
- font-size: 1rem;
85
- color: #9ca3af;
86
- }
87
-
88
- .creator-text a {
89
- color: #4da6ff;
90
- text-decoration: none;
91
  }
92
-
93
- /* Main content spacing */
94
  .main .block-container {
95
  margin-top: 6rem;
 
 
 
96
  padding-bottom: 5rem;
97
  }
98
-
99
- /* Inputs */
100
- input, textarea {
101
- background-color: #1f2933 !important;
102
- color: #f0f2f6 !important;
103
- border: 1px solid #374151 !important;
104
  }
105
-
106
- /* Buttons */
107
- button {
108
- background-color: #1e3a8a !important;
109
- color: white !important;
110
- border-radius: 8px !important;
 
 
111
  font-weight: 600;
 
112
  }
113
-
114
- button:hover {
115
- background-color: #2563eb !important;
116
  }
117
-
118
- /* Chat bubbles */
 
 
 
 
 
119
  .chat-user {
120
  background: #2d3748;
121
  padding: 12px;
@@ -123,48 +120,62 @@ button:hover {
123
  margin: 6px 0 6px auto;
124
  max-width: 85%;
125
  text-align: right;
 
126
  }
127
-
128
  .chat-bot {
129
  background: #1e3a8a;
130
  padding: 12px;
131
  border-radius: 10px 10px 10px 2px;
132
  margin: 6px auto 6px 0;
133
  max-width: 85%;
 
 
134
  }
135
-
136
- /* Hide file list */
137
- [data-testid="stFileUploaderFile"],
138
- section[data-testid="stFileUploader"] ul,
139
- section[data-testid="stFileUploader"] small {
140
- display: none;
141
  }
142
-
143
- /* Scrollbar */
144
- *::-webkit-scrollbar {
145
- width: 8px;
 
 
146
  }
147
- *::-webkit-scrollbar-thumb {
148
- background: #2d3748;
149
- border-radius: 4px;
 
 
 
 
 
 
 
 
 
 
150
  }
151
  </style>
152
  """, unsafe_allow_html=True)
153
 
154
- # ---------------- HEADER ----------------
155
  st.markdown("""
156
  <div class="fixed-header">
157
  <div class="title-text">πŸ“˜ PDF Assistant</div>
158
  <div class="creator-text">
159
- by <a href="https://www.linkedin.com/in/abhishek-iitr/" target="_blank">
160
- Abhishek Saxena</a>
161
  </div>
162
  </div>
163
  """, unsafe_allow_html=True)
164
 
 
165
  # ---------------- SESSION STATE ----------------
166
- if "messages" not in st.session_state:
167
- st.session_state.messages = []
 
 
 
168
 
169
  if "retriever" not in st.session_state:
170
  st.session_state.retriever = None
@@ -176,10 +187,11 @@ if "uploader_key" not in st.session_state:
176
  st.session_state.uploader_key = 0
177
 
178
  # ---------------- FUNCTIONS ----------------
179
- def clear_chat():
180
- st.session_state.messages = []
181
 
182
  def clear_memory():
 
183
  st.session_state.retriever = None
184
  st.session_state.uploaded_file_name = None
185
  st.session_state.uploader_key += 1
@@ -187,102 +199,132 @@ def clear_memory():
187
  if torch.cuda.is_available():
188
  torch.cuda.empty_cache()
189
 
190
- def process_pdf(uploaded):
191
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
192
- tmp.write(uploaded.getvalue())
193
- path = tmp.name
194
-
195
- loader = PyPDFLoader(path)
196
- docs = loader.load()
197
-
198
- splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=60)
199
- chunks = splitter.split_documents(docs)
200
-
201
- embeddings = HuggingFaceEmbeddings(
202
- model_name="sentence-transformers/all-MiniLM-L6-v2",
203
- model_kwargs={"device": "cpu"},
204
- encode_kwargs={"normalize_embeddings": True}
205
- )
206
-
207
- vectorstore = Chroma.from_documents(chunks, embeddings)
208
- st.session_state.retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
209
- os.unlink(path)
210
-
211
- def ask(question):
212
- docs = st.session_state.retriever.invoke(question)
213
- context = "\n\n".join(d.page_content for d in docs)
214
-
215
- prompt = f"""
216
- You are a strict RAG assistant.
217
- Use ONLY the PDF content.
218
- If the answer is not found, say:
219
- "I cannot find this in the PDF."
220
-
221
- CONTEXT:
222
- {context}
223
-
224
- QUESTION:
225
- {question}
 
 
 
 
 
 
 
 
 
 
 
 
226
  """
227
-
228
- res = client.chat.completions.create(
229
- model=GROQ_MODEL,
230
- messages=[{"role": "user", "content": prompt}],
231
- temperature=0.1
232
- )
233
- return res.choices[0].message.content.strip()
 
 
 
 
 
234
 
235
  # ---------------- SIDEBAR ----------------
236
  with st.sidebar:
 
237
  if st.button("πŸ—‘οΈ Clear Chat History", use_container_width=True):
238
- clear_chat()
239
- if st.button("πŸ”₯ Clear PDF Memory", use_container_width=True):
240
- clear_memory()
241
- st.success("Memory cleared")
242
 
243
  st.markdown("---")
244
-
 
245
  uploaded = st.file_uploader(
246
- "Upload PDF",
247
- type=["pdf"],
248
- key=st.session_state.uploader_key,
249
- label_visibility="collapsed"
250
  )
251
 
252
- if uploaded and uploaded.name != st.session_state.uploaded_file_name:
253
- with st.spinner("Processing PDF..."):
254
- process_pdf(uploaded)
255
- st.session_state.uploaded_file_name = uploaded.name
256
- st.session_state.messages = []
257
- st.success("PDF ready")
258
-
259
- if not uploaded:
260
- st.warning("⬆️ Upload a PDF to start chatting")
 
 
 
 
 
 
261
 
262
- # ---------------- INPUT ----------------
263
- disabled = not st.session_state.uploaded_file_name or not client
264
 
265
- with st.form(key="chat_form", clear_on_submit=True):
266
- col1, col2 = st.columns([0.85, 0.15])
267
- with col1:
268
- question = st.text_input(
269
- "",
 
270
  placeholder="Ask a question about the loaded PDF...",
271
- disabled=disabled
 
272
  )
273
- with col2:
274
- send = st.form_submit_button("➀", disabled=disabled)
275
 
276
- if send and question:
277
- st.session_state.messages.append(("user", question))
278
  with st.spinner("Thinking..."):
279
- answer = ask(question)
280
- st.session_state.messages.append(("bot", answer))
 
 
 
 
 
281
  st.rerun()
282
 
283
- # ---------------- CHAT HISTORY ----------------
284
- for role, msg in reversed(st.session_state.messages):
285
- if role == "user":
286
- st.markdown(f"<div class='chat-user'>{msg}</div>", unsafe_allow_html=True)
287
- else:
288
- st.markdown(f"<div class='chat-bot'>{msg}</div>", unsafe_allow_html=True)
 
 
 
3
  import gc
4
  import logging
5
  import streamlit as st
6
+ from groq import Groq, APIError
7
  from langchain_community.document_loaders import PyPDFLoader
8
  from langchain_text_splitters import RecursiveCharacterTextSplitter
9
  from langchain_community.embeddings import HuggingFaceEmbeddings
10
  from langchain_community.vectorstores import Chroma
11
  import torch
12
 
13
+ # ---------------- CONFIGURATION ----------------
14
  logging.basicConfig(level=logging.INFO)
15
 
16
+ # Load API key from Hugging Face secrets
17
  GROQ_API_KEY = st.secrets.get("GROQ_API_KEY", os.environ.get("GROQ_API_KEY"))
18
  GROQ_MODEL = "llama-3.1-8b-instant"
19
 
20
+ # Initialize Groq client
21
  client = None
22
  if GROQ_API_KEY:
23
  try:
24
  client = Groq(api_key=GROQ_API_KEY)
25
+ logging.info("βœ… Groq client initialized successfully.")
26
  except Exception as e:
27
+ st.error(f"❌ Failed to initialize Groq client: {e}")
28
+ client = None
29
+ else:
30
+ st.warning("⚠️ GROQ_API_KEY not found. Please add it to Hugging Face secrets.")
31
 
32
+ # ---------------- STREAMLIT UI SETUP ----------------
33
+ st.set_page_config(page_title="PDF Assistant", page_icon="πŸ“˜", layout="wide")
 
 
 
 
34
 
35
+ # ---------------- CSS ----------------
36
  st.markdown("""
37
  <style>
38
+ /* 1. GLOBAL RESET & SCROLL LOCK */
39
+ html, body {
40
+ overflow: hidden;
41
+ height: 100%;
42
+ margin: 0;
43
  }
44
+ /* 2. HIDE DEFAULT STREAMLIT ELEMENTS */
45
+ header[data-testid="stHeader"] {
46
+ display: none;
 
47
  }
48
+ footer {
 
 
49
  display: none;
50
  }
51
+ /* 3. SIDEBAR STYLING (INDEPENDENT LEFT PANEL SCROLL) */
 
52
  [data-testid="stSidebar"] {
 
 
53
  position: fixed;
54
+ top: 0;
55
+ left: 0;
56
  height: 100vh;
57
+ width: 20rem;
58
+ overflow-y: auto !important;
59
+ z-index: 99999;
60
  }
61
+ [data-testid="stSidebar"]::-webkit-scrollbar {
62
+ width: 6px;
63
+ }
64
+ [data-testid="stSidebar"]::-webkit-scrollbar-thumb {
65
+ background: #2d3748;
66
+ border-radius: 3px;
67
+ }
68
+ /* 4. FIXED HEADER STYLING */
69
  .fixed-header {
70
  position: fixed;
71
  top: 0;
72
  left: 0;
73
  width: 100%;
74
  height: 6rem;
75
+ background-color: #0e1117;
76
+ z-index: 99998;
77
  display: flex;
78
  flex-direction: column;
 
79
  justify-content: center;
80
+ align-items: center;
81
+ border-bottom: 1px solid rgba(255, 255, 255, 0.1);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  }
83
+ /* 5. MAIN CONTENT SCROLLING (INDEPENDENT RIGHT PANEL SCROLL) */
 
84
  .main .block-container {
85
  margin-top: 6rem;
86
+ height: calc(100vh - 6rem);
87
+ overflow-y: auto;
88
+ padding-top: 1rem;
89
  padding-bottom: 5rem;
90
  }
91
+ .main .block-container::-webkit-scrollbar {
92
+ width: 8px;
 
 
 
 
93
  }
94
+ .main .block-container::-webkit-scrollbar-thumb {
95
+ background: #2d3748;
96
+ border-radius: 4px;
97
+ }
98
+ /* 6. SIDEBAR BUTTON STYLING */
99
+ [data-testid="stSidebar"] .stButton button {
100
+ width: 100%;
101
+ border-radius: 8px;
102
  font-weight: 600;
103
+ margin-bottom: 6px;
104
  }
105
+ /* 7. HIDE UPLOADED FILE LIST & NAME */
106
+ [data-testid='stFileUploaderFile'] {
107
+ display: none;
108
  }
109
+ section[data-testid="stFileUploader"] ul {
110
+ display: none;
111
+ }
112
+ section[data-testid="stFileUploader"] small {
113
+ display: none;
114
+ }
115
+ /* 8. CHAT BUBBLES */
116
  .chat-user {
117
  background: #2d3748;
118
  padding: 12px;
 
120
  margin: 6px 0 6px auto;
121
  max-width: 85%;
122
  text-align: right;
123
+ color: #f0f2f6;
124
  }
 
125
  .chat-bot {
126
  background: #1e3a8a;
127
  padding: 12px;
128
  border-radius: 10px 10px 10px 2px;
129
  margin: 6px auto 6px 0;
130
  max-width: 85%;
131
+ text-align: left;
132
+ color: #ffffff;
133
  }
134
+ /* Sources CSS removed/hidden as it is no longer used */
135
+ .sources {
136
+ display: none;
 
 
 
137
  }
138
+ /* 9. TITLE TEXT */
139
+ .title-text {
140
+ font-size: 2.5rem;
141
+ font-weight: 800;
142
+ margin: 0;
143
+ line-height: 1.2;
144
  }
145
+ .creator-text {
146
+ font-size: 1rem;
147
+ font-weight: 500;
148
+ color: #cccccc;
149
+ }
150
+ .creator-text a {
151
+ color: #4da6ff;
152
+ text-decoration: none;
153
+ }
154
+ /* 10. INPUT FORM STYLING */
155
+ [data-testid="stForm"] {
156
+ border: none;
157
+ padding: 0;
158
  }
159
  </style>
160
  """, unsafe_allow_html=True)
161
 
162
+ # ---------------- FIXED HEADER ----------------
163
  st.markdown("""
164
  <div class="fixed-header">
165
  <div class="title-text">πŸ“˜ PDF Assistant</div>
166
  <div class="creator-text">
167
+ by <a href="https://www.linkedin.com/in/abhishek-iitr/" target="_blank">Abhishek Saxena</a>
 
168
  </div>
169
  </div>
170
  """, unsafe_allow_html=True)
171
 
172
+
173
  # ---------------- SESSION STATE ----------------
174
+ if "chat" not in st.session_state:
175
+ st.session_state.chat = []
176
+
177
+ if "vectorstore" not in st.session_state:
178
+ st.session_state.vectorstore = None
179
 
180
  if "retriever" not in st.session_state:
181
  st.session_state.retriever = None
 
187
  st.session_state.uploader_key = 0
188
 
189
  # ---------------- FUNCTIONS ----------------
190
+ def clear_chat_history():
191
+ st.session_state.chat = []
192
 
193
  def clear_memory():
194
+ st.session_state.vectorstore = None
195
  st.session_state.retriever = None
196
  st.session_state.uploaded_file_name = None
197
  st.session_state.uploader_key += 1
 
199
  if torch.cuda.is_available():
200
  torch.cuda.empty_cache()
201
 
202
+ def process_pdf(uploaded_file):
203
+ try:
204
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
205
+ tmp.write(uploaded_file.getvalue())
206
+ path = tmp.name
207
+
208
+ loader = PyPDFLoader(path)
209
+ docs = loader.load()
210
+
211
+ splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=60)
212
+ chunks = splitter.split_documents(docs)
213
+
214
+ embeddings = HuggingFaceEmbeddings(
215
+ model_name="sentence-transformers/all-MiniLM-L6-v2",
216
+ model_kwargs={"device": "cpu"},
217
+ encode_kwargs={"normalize_embeddings": True}
218
+ )
219
+
220
+ vectorstore = Chroma.from_documents(chunks, embeddings)
221
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
222
+
223
+ st.session_state.vectorstore = vectorstore
224
+ st.session_state.retriever = retriever
225
+
226
+ if os.path.exists(path):
227
+ os.unlink(path)
228
+
229
+ return len(chunks)
230
+ except Exception as e:
231
+ st.error(f"Error processing PDF: {str(e)}")
232
+ return None
233
+
234
+ def ask_question(question):
235
+ if not client:
236
+ return None, 0, "Groq client is not initialized."
237
+ if not st.session_state.retriever:
238
+ return None, 0, "Upload PDF first."
239
+
240
+ try:
241
+ docs = st.session_state.retriever.invoke(question)
242
+ context = "\n\n".join(d.page_content for d in docs)
243
+
244
+ prompt = f"""
245
+ You are a strict RAG Q&A assistant.
246
+ Summarize the context based on user question and return best answer. If the answer is not found, reply: "I cannot find this in the PDF."
247
+ CONTEXT = {context}
248
+ QUESTION = {question}
249
+ Answer on your behalf, don't say based on the context...
250
  """
251
+
252
+ response = client.chat.completions.create(
253
+ model=GROQ_MODEL,
254
+ messages=[
255
+ {"role": "system", "content": "Use only the PDF content."},
256
+ {"role": "user", "content": prompt}
257
+ ],
258
+ temperature=0.1
259
+ )
260
+ return response.choices[0].message.content.strip(), len(docs), None
261
+ except Exception as e:
262
+ return None, 0, f"Error: {str(e)}"
263
 
264
  # ---------------- SIDEBAR ----------------
265
  with st.sidebar:
266
+ st.write("")
267
  if st.button("πŸ—‘οΈ Clear Chat History", use_container_width=True):
268
+ clear_chat_history()
269
+ if st.button("πŸ”₯ Clear PDF Memory", on_click=clear_memory, use_container_width=True):
270
+ st.success("Memory Cleared!")
 
271
 
272
  st.markdown("---")
273
+
274
+ upload_label = "βœ… PDF Uploaded!" if st.session_state.uploaded_file_name else "Upload PDF"
275
  uploaded = st.file_uploader(
276
+ upload_label, type=["pdf"], key=st.session_state.uploader_key, label_visibility="collapsed"
 
 
 
277
  )
278
 
279
+ if uploaded:
280
+ if uploaded.name != st.session_state.uploaded_file_name:
281
+ st.session_state.uploaded_file_name = None
282
+ st.session_state.chat = []
283
+ with st.spinner(f"Processing '{uploaded.name}'..."):
284
+ chunks = process_pdf(uploaded)
285
+ if chunks:
286
+ st.session_state.uploaded_file_name = uploaded.name
287
+ st.success("βœ… PDF Processed!")
288
+ else:
289
+ st.error("❌ Failed.")
290
+ else:
291
+ st.success(f"βœ… **Active:** `{uploaded.name}`")
292
+ else:
293
+ st.warning("⬆️ Upload a PDF to start chatting!")
294
 
295
+ # ---------------- INPUT AREA ----------------
296
+ disabled_input = st.session_state.uploaded_file_name is None or client is None
297
 
298
+ # Input Form
299
+ with st.form(key='chat_form', clear_on_submit=True):
300
+ col_input, col_btn = st.columns([0.85, 0.15], gap="small")
301
+ with col_input:
302
+ user_question = st.text_input(
303
+ "Ask a question",
304
  placeholder="Ask a question about the loaded PDF...",
305
+ label_visibility="collapsed",
306
+ disabled=disabled_input
307
  )
308
+ with col_btn:
309
+ submit_btn = st.form_submit_button("➀", disabled=disabled_input, use_container_width=True)
310
 
311
+ if submit_btn and user_question:
312
+ st.session_state.chat.append(("user", user_question))
313
  with st.spinner("Thinking..."):
314
+ answer, sources, error = ask_question(user_question)
315
+ if answer:
316
+ # CHANGED: Removed the 'Context Chunks Used' HTML from the message
317
+ bot_msg = answer
318
+ st.session_state.chat.append(("bot", bot_msg))
319
+ else:
320
+ st.session_state.chat.append(("bot", f"πŸ”΄ **Error:** {error}"))
321
  st.rerun()
322
 
323
+ # ---------------- CHAT HISTORY (REVERSED) ----------------
324
+ if st.session_state.chat:
325
+ st.markdown("---")
326
+ for role, msg in reversed(st.session_state.chat):
327
+ if role == "user":
328
+ st.markdown(f"<div class='chat-user'>{msg}</div>", unsafe_allow_html=True)
329
+ else:
330
+ st.markdown(f"<div class='chat-bot'>{msg}</div>", unsafe_allow_html=True)