Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -35,6 +35,80 @@ def answer_question(prompt):
|
|
35 |
generated_answer = hub_chain.run(input_data)
|
36 |
return generated_answer
|
37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
text_list = []
|
39 |
|
40 |
def updateChoices(prompt):
|
|
|
35 |
generated_answer = hub_chain.run(input_data)
|
36 |
return generated_answer
|
37 |
|
38 |
+
def calculate_similarity(word, other_words, model, threshold=0.5):
|
39 |
+
embeddings_word = model.encode([word])
|
40 |
+
embeddings_other_words = model.encode(other_words)
|
41 |
+
for i, embedding in enumerate(embeddings_other_words):
|
42 |
+
similarity = 1 - scipy.spatial.distance.cosine(embeddings_word[0], embedding)
|
43 |
+
if similarity > threshold and similarity < 0.85:
|
44 |
+
return i, similarity
|
45 |
+
return None, None
|
46 |
+
|
47 |
+
|
48 |
+
def highlight_words_within_cluster(sentences, model, exclude_words):
|
49 |
+
# Create a dictionary to map words to color codes
|
50 |
+
word_to_color = {}
|
51 |
+
color_codes = [
|
52 |
+
"\033[41m", # Background Red
|
53 |
+
"\033[42m", # Background Green
|
54 |
+
"\033[43m", # Background Yellow
|
55 |
+
"\033[44m", # Background Blue
|
56 |
+
"\033[45m", # Background Purple
|
57 |
+
"\033[46m", # Background Cyan
|
58 |
+
"\033[100m", # Background Dark Gray
|
59 |
+
"\033[101m", # Background Light Red
|
60 |
+
"\033[102m", # Background Light Green
|
61 |
+
"\033[103m", # Background Light Yellow
|
62 |
+
"\033[104m", # Background Light Blue
|
63 |
+
"\033[105m", # Background Light Purple
|
64 |
+
"\033[106m", # Background Light Cyan
|
65 |
+
"\033[47m" # Background Gray
|
66 |
+
]
|
67 |
+
html_color_codes = ["red", "green", "blue", "purple", "cyan", "fuchsia", "lime", "maroon", "olive", "navy", "teal", "gray"]
|
68 |
+
color_index = 0
|
69 |
+
|
70 |
+
highlighted_sentences = []
|
71 |
+
for sentence in sentences:
|
72 |
+
words = word_tokenize(sentence)
|
73 |
+
other_sentences = [s for s in sentences if s != sentence]
|
74 |
+
all_other_words = [word for s in other_sentences for word in word_tokenize(s) if word.lower() not in exclude_words and word.isalnum()]
|
75 |
+
|
76 |
+
highlighted_words = []
|
77 |
+
for word in words:
|
78 |
+
if word.lower() not in exclude_words and word.isalnum():
|
79 |
+
match_index, similarity = calculate_similarity(word, all_other_words, model)
|
80 |
+
if match_index is not None:
|
81 |
+
# Assign color to the word if not already assigned
|
82 |
+
if word not in word_to_color:
|
83 |
+
word_to_color[word] = html_color_codes[color_index % len(html_color_codes)]
|
84 |
+
color_index += 1
|
85 |
+
# Highlight the word
|
86 |
+
#highlighted_word = f"{word_to_color[word]}{word}\033[0m"
|
87 |
+
highlighted_word = "<span style='color: "+ word_to_color[word] +"'>"+ word +"</span>"
|
88 |
+
else:
|
89 |
+
highlighted_word = word
|
90 |
+
highlighted_words.append(highlighted_word)
|
91 |
+
else:
|
92 |
+
highlighted_words.append(word)
|
93 |
+
|
94 |
+
highlighted_sentences.append(' '.join(highlighted_words))
|
95 |
+
return highlighted_sentences
|
96 |
+
|
97 |
+
# Rest of the code, including the cluster_sentences function, remains the same
|
98 |
+
|
99 |
+
exclude_words = {"a", "the", "for", "from", "of", "in","over", "as", "on", "is", "am", "have", "an","has", "had", "and", "by", "it", "its", "those", "these", "was", "were", "their", "them", "I", "you", "also", "your", "me", "after"}
|
100 |
+
|
101 |
+
def cluster_sentences(sentences, model, num_clusters=3):
|
102 |
+
embeddings = model.encode(sentences)
|
103 |
+
kmeans = KMeans(n_clusters=num_clusters)
|
104 |
+
kmeans.fit(embeddings)
|
105 |
+
return kmeans.labels_
|
106 |
+
|
107 |
+
model = SentenceTransformer('all-mpnet-base-v2')
|
108 |
+
exclude_words = {"a", "the", "for", "from", "of", "in", "over", "as", "on", "is", "am", "have", "an", "has", "had", "and", "by", "it", "its", "those", "these", "above", "to"}
|
109 |
+
|
110 |
+
|
111 |
+
|
112 |
text_list = []
|
113 |
|
114 |
def updateChoices(prompt):
|