Spaces:

Building-science
/

SCC

Running

App Files Files

mabuseif commited on Aug 7

Commit

567ba93

verified ·

1 Parent(s): 15321a5

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -14

app.py CHANGED Viewed

@@ -111,10 +111,6 @@ def load_css():
     """, unsafe_allow_html=True)
 # --- Helper Functions ---
-def normalise_hyphens(text):
-    # Replace hyphen variants with U+002D for internal consistency
-    return text.replace('\u2011', '-').replace('\u2013', '-').replace('\u2014', '-')
 def select_longest_segment(text):
     # Split text by various dashes (hyphen, non-breaking hyphen, en dash, em dash)
     dash_variants = ['\u002D', '\u2011', '\u2013', '\u2014']
@@ -135,12 +131,15 @@ def encode_text_fragment(text):
     return urllib.parse.quote(text, safe='-')
 def generate_citation_hash(author, year, url, fragment_text, cited_text, username, task_name, current_date, current_time):
-    # Normalise hyphens for consistent hash generation
-    normalised_fragment_text = normalise_hyphens(fragment_text)
-    # Use the longest segment for the cited text to align with citation link
     selected_cited_text = select_longest_segment(cited_text)
-    normalised_cited_text = normalise_hyphens(selected_cited_text)
-    data = f"{author}, {year} | {url} | {normalised_fragment_text} | {normalised_cited_text} | {username} | {normalise_hyphens(task_name)} | {current_date} | {current_time}"
     return hashlib.sha256(data.encode('utf-8')).hexdigest()
 def format_citation_html(url, fragment_text, author, year, scc_hash):
@@ -458,13 +457,20 @@ with tabs[1]:
             elif citation_base_url != hash_base_url:
                 st.error("The citation URL and SCC index URL must point to the same base URL.")
             else:
-                # Normalise hyphens and select longest segment for hash recomputation
-                normalised_citation_fragment = normalise_hyphens(citation_fragment)
-                selected_citation_fragment = select_longest_segment(normalised_citation_fragment)
-                normalised_task_name = normalise_hyphens(task_name)
                 # Recompute hash
                 recomputed_hash = generate_citation_hash(
-                    author, year, citation_base_url, normalised_citation_fragment, selected_citation_fragment, username, normalised_task_name, date, time
                 )
                 if recomputed_hash == scc_hash:

     """, unsafe_allow_html=True)
 # --- Helper Functions ---
 def select_longest_segment(text):
     # Split text by various dashes (hyphen, non-breaking hyphen, en dash, em dash)
     dash_variants = ['\u002D', '\u2011', '\u2013', '\u2014']
     return urllib.parse.quote(text, safe='-')
 def generate_citation_hash(author, year, url, fragment_text, cited_text, username, task_name, current_date, current_time):
+    # Normalize inputs by stripping whitespace
+    fragment_text = fragment_text.strip()
+    cited_text = cited_text.strip()
+    task_name = task_name.strip()
+    author = author.strip()
+    url = url.strip()
+    username = username.strip()
     selected_cited_text = select_longest_segment(cited_text)
+    data = f"{author}, {year} | {url} | {fragment_text} | {selected_cited_text} | {username} | {task_name} | {current_date} | {current_time}"
     return hashlib.sha256(data.encode('utf-8')).hexdigest()
 def format_citation_html(url, fragment_text, author, year, scc_hash):
             elif citation_base_url != hash_base_url:
                 st.error("The citation URL and SCC index URL must point to the same base URL.")
             else:
+                # Normalize inputs by stripping whitespace
+                citation_fragment = citation_fragment.strip()
+                task_name = task_name.strip()
+                # Check for potential truncation
+                if len(citation_fragment) < 10:
+                    st.markdown("""
+                    <div class="warning-box">
+                        <strong>Warning:</strong> The citation text fragment appears truncated, which may cause verification to fail.
+                    </div>
+                    """, unsafe_allow_html=True)
+                selected_citation_fragment = select_longest_segment(citation_fragment)
                 # Recompute hash
                 recomputed_hash = generate_citation_hash(
+                    author, year, citation_base_url, citation_fragment, selected_citation_fragment, username, task_name, date, time
                 )
                 if recomputed_hash == scc_hash: