Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -115,15 +115,6 @@ def normalise_hyphens(text):
|
|
115 |
# Replace hyphen variants with U+002D for internal consistency
|
116 |
return text.replace('\u2011', '-').replace('\u2013', '-').replace('\u2014', '-')
|
117 |
|
118 |
-
def select_longest_segment(text):
|
119 |
-
# Split text by various dashes (hyphen, en dash, em dash)
|
120 |
-
dash_variants = ['\u002D', '\u2011', '\u2013', '\u2014']
|
121 |
-
segments = text
|
122 |
-
for dash in dash_variants:
|
123 |
-
segments = segments.split(dash)
|
124 |
-
# Return the longest segment, or original text if no dashes
|
125 |
-
return max(segments, key=len, default=text).strip()
|
126 |
-
|
127 |
def encode_text_fragment(text):
|
128 |
# Encode text for W3C Text Fragments, preserving only regular hyphens (U+002D)
|
129 |
# Non-breaking hyphens (U+2011) are encoded as %E2%80%91
|
@@ -131,12 +122,13 @@ def encode_text_fragment(text):
|
|
131 |
# Em dashes (U+2014) are encoded as %E2%80%94
|
132 |
return urllib.parse.quote(text, safe='-')
|
133 |
|
134 |
-
def
|
135 |
-
#
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
|
|
140 |
|
141 |
def format_citation_html(url, fragment_text, author, year, scc_hash):
|
142 |
# Use original fragment_text for text fragment URL to match external source
|
|
|
115 |
# Replace hyphen variants with U+002D for internal consistency
|
116 |
return text.replace('\u2011', '-').replace('\u2013', '-').replace('\u2014', '-')
|
117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
def encode_text_fragment(text):
|
119 |
# Encode text for W3C Text Fragments, preserving only regular hyphens (U+002D)
|
120 |
# Non-breaking hyphens (U+2011) are encoded as %E2%80%91
|
|
|
122 |
# Em dashes (U+2014) are encoded as %E2%80%94
|
123 |
return urllib.parse.quote(text, safe='-')
|
124 |
|
125 |
+
def generate_citation_hash(author, year, url, fragment_text, cited_text, username, task_name, current_date, current_time):
|
126 |
+
# Normalise hyphens for consistent hash generation
|
127 |
+
normalised_fragment_text = normalise_hyphens(fragment_text)
|
128 |
+
normalised_cited_text = normalise_hyphens(cited_text)
|
129 |
+
normalised_task_name = normalise_hyphens(task_name)
|
130 |
+
data = f"{author}, {year} | {url} | {normalised_fragment_text} | {normalised_cited_text} | {username} | {normalised_task_name} | {current_date} | {current_time}"
|
131 |
+
return hashlib.sha256(data.encode('utf-8')).hexdigest()
|
132 |
|
133 |
def format_citation_html(url, fragment_text, author, year, scc_hash):
|
134 |
# Use original fragment_text for text fragment URL to match external source
|