Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -111,10 +111,6 @@ def load_css():
|
|
| 111 |
""", unsafe_allow_html=True)
|
| 112 |
|
| 113 |
# --- Helper Functions ---
|
| 114 |
-
def normalise_hyphens(text):
|
| 115 |
-
# Replace hyphen variants with U+002D for internal consistency
|
| 116 |
-
return text.replace('\u2011', '-').replace('\u2013', '-').replace('\u2014', '-')
|
| 117 |
-
|
| 118 |
def select_longest_segment(text):
|
| 119 |
# Split text by various dashes (hyphen, non-breaking hyphen, en dash, em dash)
|
| 120 |
dash_variants = ['\u002D', '\u2011', '\u2013', '\u2014']
|
|
@@ -135,12 +131,15 @@ def encode_text_fragment(text):
|
|
| 135 |
return urllib.parse.quote(text, safe='-')
|
| 136 |
|
| 137 |
def generate_citation_hash(author, year, url, fragment_text, cited_text, username, task_name, current_date, current_time):
|
| 138 |
-
#
|
| 139 |
-
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
selected_cited_text = select_longest_segment(cited_text)
|
| 142 |
-
|
| 143 |
-
data = f"{author}, {year} | {url} | {normalised_fragment_text} | {normalised_cited_text} | {username} | {normalise_hyphens(task_name)} | {current_date} | {current_time}"
|
| 144 |
return hashlib.sha256(data.encode('utf-8')).hexdigest()
|
| 145 |
|
| 146 |
def format_citation_html(url, fragment_text, author, year, scc_hash):
|
|
@@ -458,13 +457,20 @@ with tabs[1]:
|
|
| 458 |
elif citation_base_url != hash_base_url:
|
| 459 |
st.error("The citation URL and SCC index URL must point to the same base URL.")
|
| 460 |
else:
|
| 461 |
-
#
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 465 |
# Recompute hash
|
| 466 |
recomputed_hash = generate_citation_hash(
|
| 467 |
-
author, year, citation_base_url,
|
| 468 |
)
|
| 469 |
|
| 470 |
if recomputed_hash == scc_hash:
|
|
|
|
| 111 |
""", unsafe_allow_html=True)
|
| 112 |
|
| 113 |
# --- Helper Functions ---
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
def select_longest_segment(text):
|
| 115 |
# Split text by various dashes (hyphen, non-breaking hyphen, en dash, em dash)
|
| 116 |
dash_variants = ['\u002D', '\u2011', '\u2013', '\u2014']
|
|
|
|
| 131 |
return urllib.parse.quote(text, safe='-')
|
| 132 |
|
| 133 |
def generate_citation_hash(author, year, url, fragment_text, cited_text, username, task_name, current_date, current_time):
|
| 134 |
+
# Normalize inputs by stripping whitespace
|
| 135 |
+
fragment_text = fragment_text.strip()
|
| 136 |
+
cited_text = cited_text.strip()
|
| 137 |
+
task_name = task_name.strip()
|
| 138 |
+
author = author.strip()
|
| 139 |
+
url = url.strip()
|
| 140 |
+
username = username.strip()
|
| 141 |
selected_cited_text = select_longest_segment(cited_text)
|
| 142 |
+
data = f"{author}, {year} | {url} | {fragment_text} | {selected_cited_text} | {username} | {task_name} | {current_date} | {current_time}"
|
|
|
|
| 143 |
return hashlib.sha256(data.encode('utf-8')).hexdigest()
|
| 144 |
|
| 145 |
def format_citation_html(url, fragment_text, author, year, scc_hash):
|
|
|
|
| 457 |
elif citation_base_url != hash_base_url:
|
| 458 |
st.error("The citation URL and SCC index URL must point to the same base URL.")
|
| 459 |
else:
|
| 460 |
+
# Normalize inputs by stripping whitespace
|
| 461 |
+
citation_fragment = citation_fragment.strip()
|
| 462 |
+
task_name = task_name.strip()
|
| 463 |
+
# Check for potential truncation
|
| 464 |
+
if len(citation_fragment) < 10:
|
| 465 |
+
st.markdown("""
|
| 466 |
+
<div class="warning-box">
|
| 467 |
+
<strong>Warning:</strong> The citation text fragment appears truncated, which may cause verification to fail.
|
| 468 |
+
</div>
|
| 469 |
+
""", unsafe_allow_html=True)
|
| 470 |
+
selected_citation_fragment = select_longest_segment(citation_fragment)
|
| 471 |
# Recompute hash
|
| 472 |
recomputed_hash = generate_citation_hash(
|
| 473 |
+
author, year, citation_base_url, citation_fragment, selected_citation_fragment, username, task_name, date, time
|
| 474 |
)
|
| 475 |
|
| 476 |
if recomputed_hash == scc_hash:
|