mabuseif commited on
Commit
567ba93
·
verified ·
1 Parent(s): 15321a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -14
app.py CHANGED
@@ -111,10 +111,6 @@ def load_css():
111
  """, unsafe_allow_html=True)
112
 
113
  # --- Helper Functions ---
114
- def normalise_hyphens(text):
115
- # Replace hyphen variants with U+002D for internal consistency
116
- return text.replace('\u2011', '-').replace('\u2013', '-').replace('\u2014', '-')
117
-
118
  def select_longest_segment(text):
119
  # Split text by various dashes (hyphen, non-breaking hyphen, en dash, em dash)
120
  dash_variants = ['\u002D', '\u2011', '\u2013', '\u2014']
@@ -135,12 +131,15 @@ def encode_text_fragment(text):
135
  return urllib.parse.quote(text, safe='-')
136
 
137
  def generate_citation_hash(author, year, url, fragment_text, cited_text, username, task_name, current_date, current_time):
138
- # Normalise hyphens for consistent hash generation
139
- normalised_fragment_text = normalise_hyphens(fragment_text)
140
- # Use the longest segment for the cited text to align with citation link
 
 
 
 
141
  selected_cited_text = select_longest_segment(cited_text)
142
- normalised_cited_text = normalise_hyphens(selected_cited_text)
143
- data = f"{author}, {year} | {url} | {normalised_fragment_text} | {normalised_cited_text} | {username} | {normalise_hyphens(task_name)} | {current_date} | {current_time}"
144
  return hashlib.sha256(data.encode('utf-8')).hexdigest()
145
 
146
  def format_citation_html(url, fragment_text, author, year, scc_hash):
@@ -458,13 +457,20 @@ with tabs[1]:
458
  elif citation_base_url != hash_base_url:
459
  st.error("The citation URL and SCC index URL must point to the same base URL.")
460
  else:
461
- # Normalise hyphens and select longest segment for hash recomputation
462
- normalised_citation_fragment = normalise_hyphens(citation_fragment)
463
- selected_citation_fragment = select_longest_segment(normalised_citation_fragment)
464
- normalised_task_name = normalise_hyphens(task_name)
 
 
 
 
 
 
 
465
  # Recompute hash
466
  recomputed_hash = generate_citation_hash(
467
- author, year, citation_base_url, normalised_citation_fragment, selected_citation_fragment, username, normalised_task_name, date, time
468
  )
469
 
470
  if recomputed_hash == scc_hash:
 
111
  """, unsafe_allow_html=True)
112
 
113
  # --- Helper Functions ---
 
 
 
 
114
  def select_longest_segment(text):
115
  # Split text by various dashes (hyphen, non-breaking hyphen, en dash, em dash)
116
  dash_variants = ['\u002D', '\u2011', '\u2013', '\u2014']
 
131
  return urllib.parse.quote(text, safe='-')
132
 
133
  def generate_citation_hash(author, year, url, fragment_text, cited_text, username, task_name, current_date, current_time):
134
+ # Normalize inputs by stripping whitespace
135
+ fragment_text = fragment_text.strip()
136
+ cited_text = cited_text.strip()
137
+ task_name = task_name.strip()
138
+ author = author.strip()
139
+ url = url.strip()
140
+ username = username.strip()
141
  selected_cited_text = select_longest_segment(cited_text)
142
+ data = f"{author}, {year} | {url} | {fragment_text} | {selected_cited_text} | {username} | {task_name} | {current_date} | {current_time}"
 
143
  return hashlib.sha256(data.encode('utf-8')).hexdigest()
144
 
145
  def format_citation_html(url, fragment_text, author, year, scc_hash):
 
457
  elif citation_base_url != hash_base_url:
458
  st.error("The citation URL and SCC index URL must point to the same base URL.")
459
  else:
460
+ # Normalize inputs by stripping whitespace
461
+ citation_fragment = citation_fragment.strip()
462
+ task_name = task_name.strip()
463
+ # Check for potential truncation
464
+ if len(citation_fragment) < 10:
465
+ st.markdown("""
466
+ <div class="warning-box">
467
+ <strong>Warning:</strong> The citation text fragment appears truncated, which may cause verification to fail.
468
+ </div>
469
+ """, unsafe_allow_html=True)
470
+ selected_citation_fragment = select_longest_segment(citation_fragment)
471
  # Recompute hash
472
  recomputed_hash = generate_citation_hash(
473
+ author, year, citation_base_url, citation_fragment, selected_citation_fragment, username, task_name, date, time
474
  )
475
 
476
  if recomputed_hash == scc_hash: