mabuseif commited on
Commit
04a3d2b
·
verified ·
1 Parent(s): e7dfed4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -4
app.py CHANGED
@@ -17,6 +17,10 @@ import pytz
17
  import re
18
  import pandas as pd
19
  import base64
 
 
 
 
20
 
21
  # --- Constants ---
22
  MELBOURNE_TIMEZONE = 'Australia/Melbourne'
@@ -213,6 +217,50 @@ def get_table_download_link(df, filename="citation_data.csv"):
213
  href = f'<a href="data:file/csv;base64,{b64}" download="{filename}">Download Citation Data as CSV</a>'
214
  return href
215
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  # --- Live Clock JavaScript ---
217
  def live_clock():
218
  return """
@@ -413,27 +461,62 @@ with tabs[0]:
413
  citation_link_end = f'<a href="{source_url}#:~:text={encode_text_fragment(selected_fragment)}" data-hash="{scc_hash}">({author_name}, {publication_year})</a>'
414
  metadata_link = format_metadata_html(source_url, author_name, publication_year, scc_hash, username, task_name, current_date, current_time)
415
 
416
- col_html1, col_html2 = st.columns(2)
417
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
418
  # HTML Citation - Start of Text
419
  with col_html1:
420
  st.markdown("### Citation (Start of Text)")
421
  st.markdown('<div class="rendered-citation">', unsafe_allow_html=True)
422
  st.markdown(citation_link_start, unsafe_allow_html=True)
423
  st.markdown('</div>', unsafe_allow_html=True)
424
-
425
  # HTML Citation - End of Text
426
  with col_html2:
427
  st.markdown("### Citation (End of Text)")
428
  st.markdown('<div class="rendered-citation">', unsafe_allow_html=True)
429
  st.markdown(citation_link_end, unsafe_allow_html=True)
430
  st.markdown('</div>', unsafe_allow_html=True)
431
-
432
  # SCC Index
433
  st.markdown("### SCC Index")
434
  st.markdown(metadata_link, unsafe_allow_html=True)
435
 
436
- st.markdown('</div>', unsafe_allow_html=True)
 
 
 
 
 
437
 
438
  with tabs[1]:
439
  st.markdown('<div class="tab-content">', unsafe_allow_html=True)
 
17
  import re
18
  import pandas as pd
19
  import base64
20
+ import io
21
+ import openpyxl
22
+ from openpyxl.utils.dataframe import dataframe_to_rows
23
+ from openpyxl.worksheet.hyperlink import Hyperlink
24
 
25
  # --- Constants ---
26
  MELBOURNE_TIMEZONE = 'Australia/Melbourne'
 
217
  href = f'<a href="data:file/csv;base64,{b64}" download="{filename}">Download Citation Data as CSV</a>'
218
  return href
219
 
220
+ def get_excel_download_link(df, filename="citation_data.xlsx"):
221
+ output = io.BytesIO()
222
+ wb = openpyxl.Workbook()
223
+ ws = wb.active
224
+ # Write headers
225
+ headers = df.columns.tolist()
226
+ ws.append(headers)
227
+
228
+ # Write data rows
229
+ for index, row in df.iterrows():
230
+ row_data = []
231
+ cell_positions = {} # track cell positions for hyperlink assignment
232
+ urls = {} # store URLs per column
233
+
234
+ for col_idx, col in enumerate(headers):
235
+ value = row[col]
236
+ if col in ["Citation", "SCC Index"]:
237
+ # Extract URL and display text from HTML anchor tag
238
+ match = re.search(r'<a href="([^"]+)"[^>]*>([^<]+)</a>', str(value))
239
+ if match:
240
+ link_url, display_text = match.groups()
241
+ row_data.append(display_text)
242
+ # Position where this cell will be written (next row after append)
243
+ cell_positions[col] = (ws.max_row + 1, col_idx + 1)
244
+ urls[col] = link_url
245
+ else:
246
+ row_data.append(value)
247
+ else:
248
+ row_data.append(value)
249
+
250
+ ws.append(row_data)
251
+
252
+ # Apply hyperlinks after appending row
253
+ for col, (r, c) in cell_positions.items():
254
+ cell = ws.cell(row=r, column=c)
255
+ cell.hyperlink = urls[col]
256
+ cell.hyperlink.tooltip = "Click to visit source"
257
+ cell.style = "Hyperlink"
258
+
259
+ wb.save(output)
260
+ b64 = base64.b64encode(output.getvalue()).decode()
261
+ href = f'<a href="data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,{b64}" download="{filename}">Download citation data as Excel</a>'
262
+ return href
263
+
264
  # --- Live Clock JavaScript ---
265
  def live_clock():
266
  return """
 
461
  citation_link_end = f'<a href="{source_url}#:~:text={encode_text_fragment(selected_fragment)}" data-hash="{scc_hash}">({author_name}, {publication_year})</a>'
462
  metadata_link = format_metadata_html(source_url, author_name, publication_year, scc_hash, username, task_name, current_date, current_time)
463
 
464
+ # --- Persistent Table with Clickable SCC Hash ---
465
 
466
+ # First, ensure session state is initialized for the citation DataFrame
467
+ if 'citation_df' not in st.session_state:
468
+ st.session_state.citation_df = pd.DataFrame(columns=[
469
+ "Username", "Task Name", "Time", "Date",
470
+ "Citation", "SCC Index", "Annotated Text"
471
+ ])
472
+
473
+ # Create clickable HTML for SCC Index (full metadata link)
474
+ clickable_index = metadata_link
475
+
476
+ # Create new row data
477
+ new_row = {
478
+ "Username": username,
479
+ "Task Name": task_name,
480
+ "Time": current_time,
481
+ "Date": current_date,
482
+ "Citation": citation_link_start,
483
+ "SCC Index": clickable_index,
484
+ "Annotated Text": annotated_text
485
+ }
486
+
487
+ # Append the new row to the session state DataFrame
488
+ new_df = pd.DataFrame([new_row])
489
+ st.session_state.citation_df = pd.concat([st.session_state.citation_df, new_df], ignore_index=True)
490
+
491
+ # Get the accumulated DataFrame for display and download
492
+ df = st.session_state.citation_df
493
+
494
+ col_html1, col_html2 = st.columns(2)
495
+
496
  # HTML Citation - Start of Text
497
  with col_html1:
498
  st.markdown("### Citation (Start of Text)")
499
  st.markdown('<div class="rendered-citation">', unsafe_allow_html=True)
500
  st.markdown(citation_link_start, unsafe_allow_html=True)
501
  st.markdown('</div>', unsafe_allow_html=True)
502
+
503
  # HTML Citation - End of Text
504
  with col_html2:
505
  st.markdown("### Citation (End of Text)")
506
  st.markdown('<div class="rendered-citation">', unsafe_allow_html=True)
507
  st.markdown(citation_link_end, unsafe_allow_html=True)
508
  st.markdown('</div>', unsafe_allow_html=True)
509
+
510
  # SCC Index
511
  st.markdown("### SCC Index")
512
  st.markdown(metadata_link, unsafe_allow_html=True)
513
 
514
+ # Display table after SCC Index
515
+ st.markdown("### Citation Table")
516
+ st.markdown(get_excel_download_link(df), unsafe_allow_html=True)
517
+ st.markdown(df.to_html(classes="citation-table", index=False, escape=False), unsafe_allow_html=True)
518
+
519
+ st.markdown('</div>', unsafe_allow_html=True)
520
 
521
  with tabs[1]:
522
  st.markdown('<div class="tab-content">', unsafe_allow_html=True)