Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -111,10 +111,6 @@ def load_css():
|
|
111 |
""", unsafe_allow_html=True)
|
112 |
|
113 |
# --- Helper Functions ---
|
114 |
-
def normalise_hyphens(text):
|
115 |
-
# Replace hyphen variants with U+002D for internal consistency
|
116 |
-
return text.replace('\u2011', '-').replace('\u2013', '-').replace('\u2014', '-')
|
117 |
-
|
118 |
def select_longest_segment(text):
|
119 |
# Split text by various dashes (hyphen, non-breaking hyphen, en dash, em dash)
|
120 |
dash_variants = ['\u002D', '\u2011', '\u2013', '\u2014']
|
@@ -135,12 +131,15 @@ def encode_text_fragment(text):
|
|
135 |
return urllib.parse.quote(text, safe='-')
|
136 |
|
137 |
def generate_citation_hash(author, year, url, fragment_text, cited_text, username, task_name, current_date, current_time):
|
138 |
-
#
|
139 |
-
|
140 |
-
|
|
|
|
|
|
|
|
|
141 |
selected_cited_text = select_longest_segment(cited_text)
|
142 |
-
|
143 |
-
data = f"{author}, {year} | {url} | {normalised_fragment_text} | {normalised_cited_text} | {username} | {normalise_hyphens(task_name)} | {current_date} | {current_time}"
|
144 |
return hashlib.sha256(data.encode('utf-8')).hexdigest()
|
145 |
|
146 |
def format_citation_html(url, fragment_text, author, year, scc_hash):
|
@@ -458,13 +457,20 @@ with tabs[1]:
|
|
458 |
elif citation_base_url != hash_base_url:
|
459 |
st.error("The citation URL and SCC index URL must point to the same base URL.")
|
460 |
else:
|
461 |
-
#
|
462 |
-
|
463 |
-
|
464 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
465 |
# Recompute hash
|
466 |
recomputed_hash = generate_citation_hash(
|
467 |
-
author, year, citation_base_url,
|
468 |
)
|
469 |
|
470 |
if recomputed_hash == scc_hash:
|
|
|
111 |
""", unsafe_allow_html=True)
|
112 |
|
113 |
# --- Helper Functions ---
|
|
|
|
|
|
|
|
|
114 |
def select_longest_segment(text):
|
115 |
# Split text by various dashes (hyphen, non-breaking hyphen, en dash, em dash)
|
116 |
dash_variants = ['\u002D', '\u2011', '\u2013', '\u2014']
|
|
|
131 |
return urllib.parse.quote(text, safe='-')
|
132 |
|
133 |
def generate_citation_hash(author, year, url, fragment_text, cited_text, username, task_name, current_date, current_time):
|
134 |
+
# Normalize inputs by stripping whitespace
|
135 |
+
fragment_text = fragment_text.strip()
|
136 |
+
cited_text = cited_text.strip()
|
137 |
+
task_name = task_name.strip()
|
138 |
+
author = author.strip()
|
139 |
+
url = url.strip()
|
140 |
+
username = username.strip()
|
141 |
selected_cited_text = select_longest_segment(cited_text)
|
142 |
+
data = f"{author}, {year} | {url} | {fragment_text} | {selected_cited_text} | {username} | {task_name} | {current_date} | {current_time}"
|
|
|
143 |
return hashlib.sha256(data.encode('utf-8')).hexdigest()
|
144 |
|
145 |
def format_citation_html(url, fragment_text, author, year, scc_hash):
|
|
|
457 |
elif citation_base_url != hash_base_url:
|
458 |
st.error("The citation URL and SCC index URL must point to the same base URL.")
|
459 |
else:
|
460 |
+
# Normalize inputs by stripping whitespace
|
461 |
+
citation_fragment = citation_fragment.strip()
|
462 |
+
task_name = task_name.strip()
|
463 |
+
# Check for potential truncation
|
464 |
+
if len(citation_fragment) < 10:
|
465 |
+
st.markdown("""
|
466 |
+
<div class="warning-box">
|
467 |
+
<strong>Warning:</strong> The citation text fragment appears truncated, which may cause verification to fail.
|
468 |
+
</div>
|
469 |
+
""", unsafe_allow_html=True)
|
470 |
+
selected_citation_fragment = select_longest_segment(citation_fragment)
|
471 |
# Recompute hash
|
472 |
recomputed_hash = generate_citation_hash(
|
473 |
+
author, year, citation_base_url, citation_fragment, selected_citation_fragment, username, task_name, date, time
|
474 |
)
|
475 |
|
476 |
if recomputed_hash == scc_hash:
|