absiitr commited on
Commit
06f5408
·
verified ·
1 Parent(s): 71d2c45

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +158 -257
app.py CHANGED
@@ -3,175 +3,121 @@ import tempfile
3
  import gc
4
  import logging
5
  import streamlit as st
6
- from groq import Groq, APIError
7
  from langchain_community.document_loaders import PyPDFLoader
8
  from langchain_text_splitters import RecursiveCharacterTextSplitter
9
  from langchain_community.embeddings import HuggingFaceEmbeddings
10
  from langchain_community.vectorstores import Chroma
11
  import torch
12
 
13
- # ---------------- CONFIGURATION ----------------
14
  logging.basicConfig(level=logging.INFO)
15
 
16
- # Load API key from Hugging Face secrets
17
  GROQ_API_KEY = st.secrets.get("GROQ_API_KEY", os.environ.get("GROQ_API_KEY"))
18
  GROQ_MODEL = "llama-3.1-8b-instant"
19
 
20
- # Initialize Groq client
21
  client = None
22
  if GROQ_API_KEY:
23
  try:
24
  client = Groq(api_key=GROQ_API_KEY)
25
- logging.info("✅ Groq client initialized successfully.")
26
  except Exception as e:
27
- st.error(f"❌ Failed to initialize Groq client: {e}")
28
- client = None
29
- else:
30
- st.warning("⚠️ GROQ_API_KEY not found. Please add it to Hugging Face secrets.")
31
 
32
- # ---------------- STREAMLIT UI SETUP ----------------
33
  st.set_page_config(
34
- page_title="PDF Assistant",
35
- page_icon="📘",
36
- layout="wide",
37
- initial_sidebar_state="expanded"
38
  )
39
 
40
- # ---------------- CSS ----------------
41
  st.markdown("""
42
  <style>
43
- /* 0. FORCE DARK THEME (OVERRIDES LIGHT MODE) */
44
- [data-testid="stAppViewContainer"] {
45
- background-color: #0e1117;
46
- color: #fafafa;
47
- }
48
- [data-testid="stSidebar"] {
49
- background-color: #262730;
50
- color: #fafafa;
51
- }
52
- /* Force text colors for standard elements */
53
- p, h1, h2, h3, h4, h5, h6, li, span, div, label {
54
- color: #fafafa !important;
55
- }
56
 
57
- /* 1. GLOBAL RESET & SCROLL LOCK */
58
- html, body {
59
- overflow: hidden;
60
- height: 100%;
61
- margin: 0;
62
- background-color: #0e1117 !important;
63
  }
64
 
65
- /* 2. HIDE DEFAULT STREAMLIT ELEMENTS & SIDEBAR TOGGLES */
66
- header[data-testid="stHeader"] {
67
- display: none;
68
- }
69
- footer {
70
- display: none;
71
  }
72
 
73
- /* --- HIDE SIDEBAR TOGGLE BUTTONS --- */
74
- section[data-testid="stSidebar"] > div > div:first-child {
75
- display: none;
76
- }
77
- [data-testid="collapsedControl"] {
78
  display: none;
79
  }
80
 
81
- /* 3. SIDEBAR STYLING */
82
  [data-testid="stSidebar"] {
 
 
83
  position: fixed;
84
- top: 0;
85
- left: 0;
86
  height: 100vh;
87
- width: 20rem;
88
- overflow-y: auto !important;
89
- z-index: 99999;
90
- }
91
- [data-testid="stSidebar"]::-webkit-scrollbar {
92
- width: 6px;
93
- }
94
- [data-testid="stSidebar"]::-webkit-scrollbar-thumb {
95
- background: #2d3748;
96
- border-radius: 3px;
97
  }
98
 
99
- /* 4. FIXED HEADER STYLING */
100
  .fixed-header {
101
  position: fixed;
102
  top: 0;
103
  left: 0;
104
  width: 100%;
105
  height: 6rem;
106
- background-color: #0e1117;
107
- z-index: 99998;
108
  display: flex;
109
  flex-direction: column;
110
- justify-content: center;
111
  align-items: center;
112
- border-bottom: 1px solid rgba(255, 255, 255, 0.1);
 
113
  }
114
 
115
- /* 5. MAIN CONTENT SCROLLING */
116
- .main .block-container {
117
- margin-top: 6rem;
118
- height: calc(100vh - 6rem);
119
- overflow-y: auto;
120
- padding-top: 1rem;
121
- padding-bottom: 5rem;
122
- }
123
- .main .block-container::-webkit-scrollbar {
124
- width: 8px;
125
- }
126
- .main .block-container::-webkit-scrollbar-thumb {
127
- background: #2d3748;
128
- border-radius: 4px;
129
  }
130
 
131
- /* 6. SIDEBAR BUTTON STYLING */
132
- [data-testid="stSidebar"] .stButton button {
133
- width: 100%;
134
- border-radius: 8px;
135
- font-weight: 600;
136
- margin-bottom: 6px;
137
- background-color: #2d3748;
138
- color: white;
139
- border: 1px solid #4a5568;
140
- }
141
- [data-testid="stSidebar"] .stButton button:hover {
142
- background-color: #4a5568;
143
- border-color: #cbd5e0;
144
- color: white;
145
  }
146
 
147
- /* 7. UPLOADED FILE FIXES (FORCE DARK DROPDOWN) */
148
- /* This fixes the white box issue seen in your screenshot */
149
- [data-testid="stFileUploader"] {
150
- background-color: #262730; /* Dark background matching sidebar */
151
- border-radius: 10px;
152
- padding: 10px;
153
  }
154
- [data-testid="stFileUploader"] section {
155
- background-color: #262730 !important;
156
- color: #fafafa !important;
 
 
157
  }
158
- [data-testid="stFileUploader"] button {
159
- color: #fafafa !important; /* Browse files button text */
160
- border-color: #4a5568 !important;
 
 
 
161
  }
162
- [data-testid="stFileUploader"] .uploadedFile {
163
- background-color: #2d3748 !important;
 
 
164
  color: white !important;
 
 
165
  }
166
- /* Hiding file list details if needed, but keeping the uploader visible */
167
- section[data-testid="stFileUploader"] ul {
168
- display: none;
169
- }
170
- section[data-testid="stFileUploader"] small {
171
- display: none;
172
  }
173
 
174
- /* 8. CHAT BUBBLES */
175
  .chat-user {
176
  background: #2d3748;
177
  padding: 12px;
@@ -179,52 +125,36 @@ section[data-testid="stFileUploader"] small {
179
  margin: 6px 0 6px auto;
180
  max-width: 85%;
181
  text-align: right;
182
- color: #f0f2f6;
183
  }
 
184
  .chat-bot {
185
  background: #1e3a8a;
186
  padding: 12px;
187
  border-radius: 10px 10px 10px 2px;
188
  margin: 6px auto 6px 0;
189
  max-width: 85%;
190
- text-align: left;
191
- color: #ffffff;
192
- }
193
- .sources {
194
- display: none;
195
  }
196
 
197
- /* 9. TITLE TEXT */
198
- .title-text {
199
- font-size: 2.5rem;
200
- font-weight: 800;
201
- margin: 0;
202
- line-height: 1.2;
203
- color: #ffffff !important;
204
- }
205
- .creator-text {
206
- font-size: 1rem;
207
- font-weight: 500;
208
- color: #cccccc !important;
209
- }
210
- .creator-text a {
211
- color: #4da6ff;
212
- text-decoration: none;
213
  }
214
 
215
- /* 10. INPUT FORM STYLING */
216
- [data-testid="stForm"] {
217
- border: none;
218
- padding: 0;
219
  }
220
- [data-testid="stTextInput"] input {
221
- color: white;
222
- background-color: #2d3748;
223
  }
 
224
  </style>
225
  """, unsafe_allow_html=True)
226
 
227
- # ---------------- FIXED HEADER ----------------
228
  st.markdown("""
229
  <div class="fixed-header">
230
  <div class="title-text">📘 PDF Assistant</div>
@@ -237,17 +167,21 @@ st.markdown("""
237
  # ---------------- SESSION STATE ----------------
238
  if "chat" not in st.session_state:
239
  st.session_state.chat = []
 
240
  if "vectorstore" not in st.session_state:
241
  st.session_state.vectorstore = None
 
242
  if "retriever" not in st.session_state:
243
  st.session_state.retriever = None
 
244
  if "uploaded_file_name" not in st.session_state:
245
  st.session_state.uploaded_file_name = None
 
246
  if "uploader_key" not in st.session_state:
247
  st.session_state.uploader_key = 0
248
 
249
  # ---------------- FUNCTIONS ----------------
250
- def clear_chat_history():
251
  st.session_state.chat = []
252
 
253
  def clear_memory():
@@ -259,133 +193,100 @@ def clear_memory():
259
  if torch.cuda.is_available():
260
  torch.cuda.empty_cache()
261
 
262
- def process_pdf(uploaded_file):
263
- try:
264
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
265
- tmp.write(uploaded_file.getvalue())
266
- path = tmp.name
267
-
268
- loader = PyPDFLoader(path)
269
- docs = loader.load()
270
-
271
- splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=60)
272
- chunks = splitter.split_documents(docs)
273
-
274
- embeddings = HuggingFaceEmbeddings(
275
- model_name="sentence-transformers/all-MiniLM-L6-v2",
276
- model_kwargs={"device": "cpu"},
277
- encode_kwargs={"normalize_embeddings": True}
278
- )
279
-
280
- vectorstore = Chroma.from_documents(chunks, embeddings)
281
- retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
282
-
283
- st.session_state.vectorstore = vectorstore
284
- st.session_state.retriever = retriever
285
-
286
- if os.path.exists(path):
287
- os.unlink(path)
288
-
289
- return len(chunks)
290
- except Exception as e:
291
- st.error(f"Error processing PDF: {str(e)}")
292
- return None
293
-
294
- def ask_question(question):
295
- if not client:
296
- return None, 0, "Groq client is not initialized."
297
- if not st.session_state.retriever:
298
- return None, 0, "Upload PDF first."
299
-
300
- try:
301
- docs = st.session_state.retriever.invoke(question)
302
- context = "\n\n".join(d.page_content for d in docs)
303
-
304
- prompt = f"""You are a strict RAG Q&A assistant.
305
- Summarize the context based on user question and return best answer.
306
- If the answer is not found, reply: "I cannot find this in the PDF."
307
-
308
- CONTEXT = {context}
309
- QUESTION = {question}
310
-
311
- Answer on your behalf, don't say based on the context..."""
312
-
313
- response = client.chat.completions.create(
314
- model=GROQ_MODEL,
315
- messages=[
316
- {"role": "system", "content": "Use only the PDF content."},
317
- {"role": "user", "content": prompt}
318
- ],
319
- temperature=0.1
320
- )
321
- return response.choices[0].message.content.strip(), len(docs), None
322
- except Exception as e:
323
- return None, 0, f"Error: {str(e)}"
324
 
325
  # ---------------- SIDEBAR ----------------
326
  with st.sidebar:
327
- st.write("")
328
  if st.button("🗑️ Clear Chat History", use_container_width=True):
329
- clear_chat_history()
330
-
331
- if st.button("🔥 Clear PDF Memory", on_click=clear_memory, use_container_width=True):
332
- st.success("Memory Cleared!")
333
-
334
  st.markdown("---")
335
-
336
- upload_label = "✅ PDF Uploaded!" if st.session_state.uploaded_file_name else "Upload PDF"
337
  uploaded = st.file_uploader(
338
- upload_label, type=["pdf"], key=st.session_state.uploader_key, label_visibility="collapsed"
 
 
339
  )
340
-
341
- if uploaded:
342
- if uploaded.name != st.session_state.uploaded_file_name:
343
- st.session_state.uploaded_file_name = None
 
344
  st.session_state.chat = []
345
- with st.spinner(f"Processing '{uploaded.name}'..."):
346
- chunks = process_pdf(uploaded)
347
- if chunks:
348
- st.session_state.uploaded_file_name = uploaded.name
349
- st.success("✅ PDF Processed!")
350
- else:
351
- st.error("❌ Failed.")
352
- else:
353
- st.success(f"✅ **Active:** `{uploaded.name}`")
354
- else:
355
- st.warning("⬆️ Upload a PDF to start chatting!")
356
 
357
- # ---------------- INPUT AREA ----------------
358
- disabled_input = st.session_state.uploaded_file_name is None or client is None
359
 
360
- # Input Form
361
- with st.form(key='chat_form', clear_on_submit=True):
362
- col_input, col_btn = st.columns([0.85, 0.15], gap="small")
363
- with col_input:
364
- user_question = st.text_input(
365
- "Ask a question",
 
 
366
  placeholder="Ask a question about the loaded PDF...",
367
- label_visibility="collapsed",
368
- disabled=disabled_input
369
  )
370
- with col_btn:
371
- submit_btn = st.form_submit_button("➤", disabled=disabled_input, use_container_width=True)
372
 
373
- if submit_btn and user_question:
374
- st.session_state.chat.append(("user", user_question))
375
  with st.spinner("Thinking..."):
376
- answer, sources, error = ask_question(user_question)
377
- if answer:
378
- bot_msg = answer
379
- st.session_state.chat.append(("bot", bot_msg))
380
- else:
381
- st.session_state.chat.append(("bot", f"🔴 **Error:** {error}"))
382
  st.rerun()
383
 
384
- # ---------------- CHAT HISTORY (REVERSED) ----------------
385
- if st.session_state.chat:
386
- st.markdown("---")
387
- for role, msg in reversed(st.session_state.chat):
388
- if role == "user":
389
- st.markdown(f"<div class='chat-user'>{msg}</div>", unsafe_allow_html=True)
390
- else:
391
- st.markdown(f"<div class='chat-bot'>{msg}</div>", unsafe_allow_html=True)
 
3
  import gc
4
  import logging
5
  import streamlit as st
6
+ from groq import Groq
7
  from langchain_community.document_loaders import PyPDFLoader
8
  from langchain_text_splitters import RecursiveCharacterTextSplitter
9
  from langchain_community.embeddings import HuggingFaceEmbeddings
10
  from langchain_community.vectorstores import Chroma
11
  import torch
12
 
13
+ # ---------------- CONFIG ----------------
14
  logging.basicConfig(level=logging.INFO)
15
 
 
16
  GROQ_API_KEY = st.secrets.get("GROQ_API_KEY", os.environ.get("GROQ_API_KEY"))
17
  GROQ_MODEL = "llama-3.1-8b-instant"
18
 
 
19
  client = None
20
  if GROQ_API_KEY:
21
  try:
22
  client = Groq(api_key=GROQ_API_KEY)
 
23
  except Exception as e:
24
+ st.error(e)
 
 
 
25
 
26
+ # ---------------- PAGE CONFIG ----------------
27
  st.set_page_config(
28
+ page_title="PDF Assistant",
29
+ page_icon="📘",
30
+ layout="wide"
 
31
  )
32
 
33
+ # ---------------- FORCE DARK UI (THEME INDEPENDENT) ----------------
34
  st.markdown("""
35
  <style>
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+ /* ========= FORCE DARK THEME ========= */
38
+ :root {
39
+ --primary-color: #1e3a8a;
40
+ --background-color: #0e1117;
41
+ --secondary-background-color: #161b22;
42
+ --text-color: #f0f2f6;
43
  }
44
 
45
+ html, body, [data-testid="stAppViewContainer"], .main {
46
+ background-color: #0e1117 !important;
47
+ color: #f0f2f6 !important;
 
 
 
48
  }
49
 
50
+ /* Hide Streamlit chrome */
51
+ header[data-testid="stHeader"], footer {
 
 
 
52
  display: none;
53
  }
54
 
55
+ /* Sidebar */
56
  [data-testid="stSidebar"] {
57
+ background-color: #161b22 !important;
58
+ width: 20rem;
59
  position: fixed;
 
 
60
  height: 100vh;
61
+ overflow-y: auto;
 
 
 
 
 
 
 
 
 
62
  }
63
 
64
+ /* Fixed Header */
65
  .fixed-header {
66
  position: fixed;
67
  top: 0;
68
  left: 0;
69
  width: 100%;
70
  height: 6rem;
71
+ background: #0e1117;
72
+ z-index: 9999;
73
  display: flex;
74
  flex-direction: column;
 
75
  align-items: center;
76
+ justify-content: center;
77
+ border-bottom: 1px solid rgba(255,255,255,0.1);
78
  }
79
 
80
+ .title-text {
81
+ font-size: 2.5rem;
82
+ font-weight: 800;
 
 
 
 
 
 
 
 
 
 
 
83
  }
84
 
85
+ .creator-text {
86
+ font-size: 1rem;
87
+ color: #9ca3af;
 
 
 
 
 
 
 
 
 
 
 
88
  }
89
 
90
+ .creator-text a {
91
+ color: #4da6ff;
92
+ text-decoration: none;
 
 
 
93
  }
94
+
95
+ /* Main container */
96
+ .main .block-container {
97
+ margin-top: 6rem;
98
+ padding-bottom: 5rem;
99
  }
100
+
101
+ /* Inputs */
102
+ input, textarea {
103
+ background-color: #1f2933 !important;
104
+ color: #f0f2f6 !important;
105
+ border: 1px solid #374151 !important;
106
  }
107
+
108
+ /* Buttons */
109
+ button {
110
+ background-color: #1e3a8a !important;
111
  color: white !important;
112
+ border-radius: 8px !important;
113
+ font-weight: 600;
114
  }
115
+
116
+ button:hover {
117
+ background-color: #2563eb !important;
 
 
 
118
  }
119
 
120
+ /* Chat bubbles */
121
  .chat-user {
122
  background: #2d3748;
123
  padding: 12px;
 
125
  margin: 6px 0 6px auto;
126
  max-width: 85%;
127
  text-align: right;
 
128
  }
129
+
130
  .chat-bot {
131
  background: #1e3a8a;
132
  padding: 12px;
133
  border-radius: 10px 10px 10px 2px;
134
  margin: 6px auto 6px 0;
135
  max-width: 85%;
 
 
 
 
 
136
  }
137
 
138
+ /* Hide file list */
139
+ [data-testid="stFileUploaderFile"],
140
+ section[data-testid="stFileUploader"] ul,
141
+ section[data-testid="stFileUploader"] small {
142
+ display: none;
 
 
 
 
 
 
 
 
 
 
 
143
  }
144
 
145
+ /* Scrollbars */
146
+ *::-webkit-scrollbar {
147
+ width: 8px;
 
148
  }
149
+ *::-webkit-scrollbar-thumb {
150
+ background: #2d3748;
151
+ border-radius: 4px;
152
  }
153
+
154
  </style>
155
  """, unsafe_allow_html=True)
156
 
157
+ # ---------------- HEADER ----------------
158
  st.markdown("""
159
  <div class="fixed-header">
160
  <div class="title-text">📘 PDF Assistant</div>
 
167
  # ---------------- SESSION STATE ----------------
168
  if "chat" not in st.session_state:
169
  st.session_state.chat = []
170
+
171
  if "vectorstore" not in st.session_state:
172
  st.session_state.vectorstore = None
173
+
174
  if "retriever" not in st.session_state:
175
  st.session_state.retriever = None
176
+
177
  if "uploaded_file_name" not in st.session_state:
178
  st.session_state.uploaded_file_name = None
179
+
180
  if "uploader_key" not in st.session_state:
181
  st.session_state.uploader_key = 0
182
 
183
  # ---------------- FUNCTIONS ----------------
184
+ def clear_chat():
185
  st.session_state.chat = []
186
 
187
  def clear_memory():
 
193
  if torch.cuda.is_available():
194
  torch.cuda.empty_cache()
195
 
196
+ def process_pdf(uploaded):
197
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
198
+ tmp.write(uploaded.getvalue())
199
+ path = tmp.name
200
+
201
+ loader = PyPDFLoader(path)
202
+ docs = loader.load()
203
+
204
+ splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=60)
205
+ chunks = splitter.split_documents(docs)
206
+
207
+ embeddings = HuggingFaceEmbeddings(
208
+ model_name="sentence-transformers/all-MiniLM-L6-v2",
209
+ model_kwargs={"device": "cpu"},
210
+ encode_kwargs={"normalize_embeddings": True}
211
+ )
212
+
213
+ vectorstore = Chroma.from_documents(chunks, embeddings)
214
+ st.session_state.retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
215
+ os.unlink(path)
216
+
217
+ def ask(question):
218
+ docs = st.session_state.retriever.invoke(question)
219
+ context = "\n\n".join(d.page_content for d in docs)
220
+
221
+ prompt = f"""
222
+ You are a strict RAG assistant.
223
+ Use ONLY the PDF content.
224
+ If not found, say: I cannot find this in the PDF.
225
+
226
+ CONTEXT:
227
+ {context}
228
+
229
+ QUESTION:
230
+ {question}
231
+ """
232
+
233
+ res = client.chat.completions.create(
234
+ model=GROQ_MODEL,
235
+ messages=[{"role": "user", "content": prompt}],
236
+ temperature=0.1
237
+ )
238
+ return res.choices[0].message.content.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
 
240
  # ---------------- SIDEBAR ----------------
241
  with st.sidebar:
 
242
  if st.button("🗑️ Clear Chat History", use_container_width=True):
243
+ clear_chat()
244
+ if st.button("🔥 Clear PDF Memory", use_container_width=True):
245
+ clear_memory()
246
+ st.success("Memory cleared")
247
+
248
  st.markdown("---")
249
+
 
250
  uploaded = st.file_uploader(
251
+ "Upload PDF", type=["pdf"],
252
+ key=st.session_state.uploader_key,
253
+ label_visibility="collapsed"
254
  )
255
+
256
+ if uploaded and uploaded.name != st.session_state.uploaded_file_name:
257
+ with st.spinner("Processing PDF..."):
258
+ process_pdf(uploaded)
259
+ st.session_state.uploaded_file_name = uploaded.name
260
  st.session_state.chat = []
261
+ st.success("PDF ready")
 
 
 
 
 
 
 
 
 
 
262
 
263
+ if not uploaded:
264
+ st.warning("⬆️ Upload a PDF to start chatting")
265
 
266
+ # ---------------- INPUT ----------------
267
+ disabled = not st.session_state.uploaded_file_name or not client
268
+
269
+ with st.form("chat", clear_on_submit=True):
270
+ col1, col2 = st.columns([0.85, 0.15])
271
+ with col1:
272
+ q = st.text_input(
273
+ "",
274
  placeholder="Ask a question about the loaded PDF...",
275
+ disabled=disabled
 
276
  )
277
+ with col2:
278
+ send = st.form_submit_button("➤", disabled=disabled)
279
 
280
+ if send and q:
281
+ st.session_state.chat.append(("user", q))
282
  with st.spinner("Thinking..."):
283
+ ans = ask(q)
284
+ st.session_state.chat.append(("bot", ans))
 
 
 
 
285
  st.rerun()
286
 
287
+ # ---------------- CHAT ----------------
288
+ for role, msg in reversed(st.session_state.chat):
289
+ if role == "user":
290
+ st.markdown(f"<div class='chat-user'>{msg}</div>", unsafe_allow_html=True)
291
+ else:
292
+ st.markdown(f"<div class='chat-bot'>{msg}</div>", unsafe_allow_html=True)