Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -43,42 +43,77 @@ def answer_question(prompt):
|
|
43 |
return generated_answer
|
44 |
|
45 |
|
46 |
-
def calculate_similarity(word,
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
color_index = 0
|
69 |
-
for pair in all_matched_pairs:
|
70 |
-
color_code = html_color_codes[color_index % len(html_color_codes)]
|
71 |
-
# Correctly apply HTML span with style for coloring
|
72 |
-
words[pair[0]] = f"<span style='color: {color_code};'>{words[pair[0]]}</span>"
|
73 |
-
tokenized_other_sentence = word_tokenize(other_sentences[pair[1]])
|
74 |
-
tokenized_other_sentence = [f"<span style='color: {color_code};'>{word}</span>" if idx == pair[0] else word for idx, word in enumerate(tokenized_other_sentence)]
|
75 |
-
other_sentences[pair[1]] = ' '.join(tokenized_other_sentence)
|
76 |
-
color_index += 1
|
77 |
-
|
78 |
-
return ' '.join(words)
|
79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
model = SentenceTransformer('all-mpnet-base-v2')
|
|
|
82 |
|
83 |
sentences = ["In a quaint little town nestled in the heart of the mountains, a small bakery famous for its artisanal breads and pastries had a line of customers stretching out the door, eagerly waiting to savor the freshly baked goods that were known far and wide for their delightful flavors.",
|
84 |
|
@@ -89,8 +124,6 @@ sentences = ["In a quaint little town nestled in the heart of the mountains, a s
|
|
89 |
"In a cozy, mountain-encircled village, a beloved bakery was the center of attraction, known for its traditional baking methods and delightful pastries, drawing a consistent stream of people waiting outside, all desiring to experience the renowned flavors that made the bakery's products distinctively mouth-watering."]
|
90 |
|
91 |
|
92 |
-
|
93 |
-
|
94 |
text_list = []
|
95 |
|
96 |
def updateChoices(prompt):
|
@@ -101,29 +134,33 @@ def setTextVisibility(cbg, model_name_input):
|
|
101 |
sentences = []
|
102 |
result = []
|
103 |
model = SentenceTransformer('all-mpnet-base-v2')
|
104 |
-
exclude_words = {"a", "the", "for", "from", "of", "in", "over", "as", "on", "is", "am", "have", "an", "has", "had", "and", "by", "it", "its", "those", "these", "above", "to"
|
105 |
sentences_org = ["In a quaint little town nestled in the heart of the mountains, a small bakery famous for its artisanal breads and pastries had a line of customers stretching out the door, eagerly waiting to savor the freshly baked goods that were known far and wide for their delightful flavors.",
|
106 |
"Within a picturesque mountain village, there stood a renowned bakery, celebrated for its handcrafted bread and sweet treats, attracting a long queue of patrons each morning, all keen to enjoy the baked delicacies that had gained widespread acclaim for their exceptional taste.",
|
107 |
"A charming bakery, located in a small mountainous hamlet, renowned for producing exquisite handmade pastries and bread, was bustling with a crowd of eager customers lined up outside, each anticipating the chance to indulge in the famous baked items celebrated for their extraordinary deliciousness.",
|
108 |
"In a cozy, mountain-encircled village, a beloved bakery was the center of attraction, known for its traditional baking methods and delightful pastries, drawing a consistent stream of people waiting outside, all desiring to experience the renowned flavors that made the bakery's products distinctively mouth-watering."]
|
109 |
for text in cbg:
|
110 |
sentences.append(answer_question(text, model_name_input))
|
|
|
|
|
|
|
|
|
111 |
|
112 |
-
|
113 |
-
for
|
114 |
-
other_sentences = sentences[:i] + sentences[i+1:]
|
115 |
-
highlighted_sentence = highlight_words(sentence, other_sentences, model, exclude_words)
|
116 |
-
highlighted_sentences.append(highlighted_sentence)
|
117 |
-
|
118 |
-
for idx, sentence in enumerate(highlighted_sentences):
|
119 |
-
result.append("<p><strong>"+ cbg[idx] +"</strong></p><p>"+ sentence +"</p><br/>")
|
120 |
-
|
121 |
-
score = round(calculate_similarity_score(sentences))
|
122 |
|
123 |
-
|
|
|
|
|
|
|
124 |
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
-
return
|
127 |
|
128 |
def upload_file(files):
|
129 |
file_paths = [file.name for file in files]
|
|
|
43 |
return generated_answer
|
44 |
|
45 |
|
46 |
+
def calculate_similarity(word, other_words, model, threshold=0.5):
|
47 |
+
embeddings_word = model.encode([word])
|
48 |
+
embeddings_other_words = model.encode(other_words)
|
49 |
+
for i, embedding in enumerate(embeddings_other_words):
|
50 |
+
similarity = 1 - scipy.spatial.distance.cosine(embeddings_word[0], embedding)
|
51 |
+
if similarity > threshold and similarity < 0.85:
|
52 |
+
return i, similarity
|
53 |
+
return None, None
|
54 |
+
|
55 |
+
|
56 |
+
def highlight_words_within_cluster(sentences, model, exclude_words):
|
57 |
+
# Create a dictionary to map words to color codes
|
58 |
+
word_to_color = {}
|
59 |
+
color_codes = [
|
60 |
+
"\033[41m", # Background Red
|
61 |
+
"\033[42m", # Background Green
|
62 |
+
"\033[43m", # Background Yellow
|
63 |
+
"\033[44m", # Background Blue
|
64 |
+
"\033[45m", # Background Purple
|
65 |
+
"\033[46m", # Background Cyan
|
66 |
+
"\033[100m", # Background Dark Gray
|
67 |
+
"\033[101m", # Background Light Red
|
68 |
+
"\033[102m", # Background Light Green
|
69 |
+
"\033[103m", # Background Light Yellow
|
70 |
+
"\033[104m", # Background Light Blue
|
71 |
+
"\033[105m", # Background Light Purple
|
72 |
+
"\033[106m", # Background Light Cyan
|
73 |
+
"\033[47m" # Background Gray
|
74 |
+
]
|
75 |
+
html_color_codes = ["red", "green", "blue", "purple", "cyan", "fuchsia", "lime", "maroon", "olive", "navy", "teal", "gray"]
|
76 |
color_index = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
|
78 |
+
highlighted_sentences = []
|
79 |
+
for sentence in sentences:
|
80 |
+
words = word_tokenize(sentence)
|
81 |
+
other_sentences = [s for s in sentences if s != sentence]
|
82 |
+
all_other_words = [word for s in other_sentences for word in word_tokenize(s) if word.lower() not in exclude_words and word.isalnum()]
|
83 |
+
|
84 |
+
highlighted_words = []
|
85 |
+
for word in words:
|
86 |
+
if word.lower() not in exclude_words and word.isalnum():
|
87 |
+
match_index, similarity = calculate_similarity(word, all_other_words, model)
|
88 |
+
if match_index is not None:
|
89 |
+
# Assign color to the word if not already assigned
|
90 |
+
if word not in word_to_color:
|
91 |
+
word_to_color[word] = html_color_codes[color_index % len(html_color_codes)]
|
92 |
+
color_index += 1
|
93 |
+
# Highlight the word
|
94 |
+
#highlighted_word = f"{word_to_color[word]}{word}\033[0m"
|
95 |
+
highlighted_word = "<span style='color: "+ word_to_color[word] +"'>"+ word +"</span>"
|
96 |
+
else:
|
97 |
+
highlighted_word = word
|
98 |
+
highlighted_words.append(highlighted_word)
|
99 |
+
else:
|
100 |
+
highlighted_words.append(word)
|
101 |
+
|
102 |
+
highlighted_sentences.append(' '.join(highlighted_words))
|
103 |
+
return highlighted_sentences
|
104 |
+
|
105 |
+
# Rest of the code, including the cluster_sentences function, remains the same
|
106 |
+
|
107 |
+
exclude_words = {"a", "the", "for", "from", "of", "in","over", "as", "on", "is", "am", "have", "an","has", "had", "and", "by", "it", "its", "those", "these", "was", "were", "their", "them", "I", "you", "also", "your", "me", "after"}
|
108 |
+
|
109 |
+
def cluster_sentences(sentences, model, num_clusters=1):
|
110 |
+
embeddings = model.encode(sentences)
|
111 |
+
kmeans = KMeans(n_clusters=num_clusters)
|
112 |
+
kmeans.fit(embeddings)
|
113 |
+
return kmeans.labels_
|
114 |
|
115 |
model = SentenceTransformer('all-mpnet-base-v2')
|
116 |
+
exclude_words = {"a", "the", "for", "from", "of", "in", "over", "as", "on", "is", "am", "have", "an", "has", "had", "and", "by", "it", "its", "those", "these", "above", "to"}
|
117 |
|
118 |
sentences = ["In a quaint little town nestled in the heart of the mountains, a small bakery famous for its artisanal breads and pastries had a line of customers stretching out the door, eagerly waiting to savor the freshly baked goods that were known far and wide for their delightful flavors.",
|
119 |
|
|
|
124 |
"In a cozy, mountain-encircled village, a beloved bakery was the center of attraction, known for its traditional baking methods and delightful pastries, drawing a consistent stream of people waiting outside, all desiring to experience the renowned flavors that made the bakery's products distinctively mouth-watering."]
|
125 |
|
126 |
|
|
|
|
|
127 |
text_list = []
|
128 |
|
129 |
def updateChoices(prompt):
|
|
|
134 |
sentences = []
|
135 |
result = []
|
136 |
model = SentenceTransformer('all-mpnet-base-v2')
|
137 |
+
exclude_words = {"a", "the", "for", "from", "of", "in", "over", "as", "on", "is", "am", "have", "an", "has", "had", "and", "by", "it", "its", "those", "these", "above", "to"}
|
138 |
sentences_org = ["In a quaint little town nestled in the heart of the mountains, a small bakery famous for its artisanal breads and pastries had a line of customers stretching out the door, eagerly waiting to savor the freshly baked goods that were known far and wide for their delightful flavors.",
|
139 |
"Within a picturesque mountain village, there stood a renowned bakery, celebrated for its handcrafted bread and sweet treats, attracting a long queue of patrons each morning, all keen to enjoy the baked delicacies that had gained widespread acclaim for their exceptional taste.",
|
140 |
"A charming bakery, located in a small mountainous hamlet, renowned for producing exquisite handmade pastries and bread, was bustling with a crowd of eager customers lined up outside, each anticipating the chance to indulge in the famous baked items celebrated for their extraordinary deliciousness.",
|
141 |
"In a cozy, mountain-encircled village, a beloved bakery was the center of attraction, known for its traditional baking methods and delightful pastries, drawing a consistent stream of people waiting outside, all desiring to experience the renowned flavors that made the bakery's products distinctively mouth-watering."]
|
142 |
for text in cbg:
|
143 |
sentences.append(answer_question(text, model_name_input))
|
144 |
+
|
145 |
+
# Step 1: Cluster the sentences
|
146 |
+
num_clusters = 1
|
147 |
+
sentence_clusters = cluster_sentences(sentences, model, num_clusters)
|
148 |
|
149 |
+
# Step 2: Highlight similar words within each cluster
|
150 |
+
clustered_sentences = [[] for _ in range(num_clusters)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
|
152 |
+
for sentence, cluster_id in zip(sentences, sentence_clusters):
|
153 |
+
clustered_sentences[cluster_id].append(sentence)
|
154 |
+
|
155 |
+
highlighted_clustered_sentences = []
|
156 |
|
157 |
+
for cluster in clustered_sentences:
|
158 |
+
highlighted_clustered_sentences.extend(highlight_words_within_cluster(cluster, model, exclude_words))
|
159 |
+
|
160 |
+
for idx, sentence in enumerate(highlighted_clustered_sentences):
|
161 |
+
result.append("<p><strong>"+ cbg[idx] +"</strong></p><p>"+ sentence +"</p><br/>")
|
162 |
|
163 |
+
return result
|
164 |
|
165 |
def upload_file(files):
|
166 |
file_paths = [file.name for file in files]
|