Spaces:
Runtime error
Runtime error
Commit
·
fb737cc
1
Parent(s):
04fbf3e
Update app.py
Browse files
app.py
CHANGED
@@ -35,10 +35,12 @@ model = LlamaForCausalLM.from_pretrained(
|
|
35 |
device_map="auto") #low_cpu_mem_usage=True)
|
36 |
|
37 |
########## DEFINING FUNCTIONS ###################
|
|
|
38 |
def mean_pooling(model_output, attention_mask):
|
39 |
token_embeddings = model_output[0]
|
40 |
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
|
41 |
return tf.reduce_sum(token_embeddings * input_mask_expanded, 1) / tf.clip_by_value(input_mask_expanded.sum(1), clip_value_min=1e-9, clip_value_max=math.inf)
|
|
|
42 |
def broad_scope_class_predictor(class_embeddings, abstract_embedding, N=5, Sensitivity='Medium'):
|
43 |
predictions = pd.DataFrame(columns=['Class Name', 'Score'])
|
44 |
for i in range(len(class_embeddings)):
|
@@ -68,6 +70,7 @@ def broad_scope_class_predictor(class_embeddings, abstract_embedding, N=5, Sensi
|
|
68 |
HighestSimilarity = predictions.nlargest(N, ['Score'])
|
69 |
return HighestSimilarity
|
70 |
|
|
|
71 |
def sentence_embedder(sentences, model_path):
|
72 |
tokenizer = AutoTokenizer.from_pretrained(model_path) #instantiating the sentence embedder using HuggingFace library
|
73 |
model = AutoModel.from_pretrained(model_path, from_tf=True) #making a model instance
|
@@ -78,6 +81,7 @@ def sentence_embedder(sentences, model_path):
|
|
78 |
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask']) #outputs a (1, 384) tensor representation of input text
|
79 |
return sentence_embeddings
|
80 |
|
|
|
81 |
def add_text(history, text):
|
82 |
history = history + [(text, None)]
|
83 |
return history, ""
|
@@ -86,6 +90,26 @@ def add_file(history, file):
|
|
86 |
history = history + [((file.name,), None)]
|
87 |
return history
|
88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
def bot(history):
|
90 |
response = "**That's cool!**"
|
91 |
history[-1][1] = response
|
@@ -93,6 +117,7 @@ def bot(history):
|
|
93 |
|
94 |
|
95 |
########## LOADING PRE-COMPUTED EMBEDDINGS ##########
|
|
|
96 |
def clean_data(input, type='Dataframe'):
|
97 |
if type == 'Dataframe':
|
98 |
cleaneddf = pd.DataFrame(columns=['Class', 'Description'])
|
@@ -157,6 +182,7 @@ def clean_data(input, type='Dataframe'):
|
|
157 |
return row
|
158 |
|
159 |
|
|
|
160 |
def classifier(userin):
|
161 |
clean_in = clean_data(userin, type='String')
|
162 |
in_emb = sentence_embedder(clean_in, 'Model_bert')
|
@@ -166,6 +192,7 @@ def classifier(userin):
|
|
166 |
|
167 |
return broad_scope_predictions
|
168 |
|
|
|
169 |
def generateresponse(history):#, task):
|
170 |
"""
|
171 |
Model definition here:
|
@@ -213,6 +240,7 @@ def generateresponse(history):#, task):
|
|
213 |
return history
|
214 |
|
215 |
############# GRADIO APP ###############
|
|
|
216 |
theme = gr.themes.Base(
|
217 |
primary_hue="indigo",
|
218 |
).set(
|
|
|
35 |
device_map="auto") #low_cpu_mem_usage=True)
|
36 |
|
37 |
########## DEFINING FUNCTIONS ###################
|
38 |
+
|
39 |
def mean_pooling(model_output, attention_mask):
|
40 |
token_embeddings = model_output[0]
|
41 |
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
|
42 |
return tf.reduce_sum(token_embeddings * input_mask_expanded, 1) / tf.clip_by_value(input_mask_expanded.sum(1), clip_value_min=1e-9, clip_value_max=math.inf)
|
43 |
+
|
44 |
def broad_scope_class_predictor(class_embeddings, abstract_embedding, N=5, Sensitivity='Medium'):
|
45 |
predictions = pd.DataFrame(columns=['Class Name', 'Score'])
|
46 |
for i in range(len(class_embeddings)):
|
|
|
70 |
HighestSimilarity = predictions.nlargest(N, ['Score'])
|
71 |
return HighestSimilarity
|
72 |
|
73 |
+
|
74 |
def sentence_embedder(sentences, model_path):
|
75 |
tokenizer = AutoTokenizer.from_pretrained(model_path) #instantiating the sentence embedder using HuggingFace library
|
76 |
model = AutoModel.from_pretrained(model_path, from_tf=True) #making a model instance
|
|
|
81 |
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask']) #outputs a (1, 384) tensor representation of input text
|
82 |
return sentence_embeddings
|
83 |
|
84 |
+
|
85 |
def add_text(history, text):
|
86 |
history = history + [(text, None)]
|
87 |
return history, ""
|
|
|
90 |
history = history + [((file.name,), None)]
|
91 |
return history
|
92 |
|
93 |
+
def convert_saved_embeddings(embedding_string):
|
94 |
+
"""
|
95 |
+
Preparing pre-computed embeddings for use for comparison with new abstract embeddings .
|
96 |
+
Pre-computed embeddings are saved as tensors in string format so need to be converted back to numpy arrays in order to calculate cosine similarity.
|
97 |
+
:param embedding_string:
|
98 |
+
:return: Should be a single tensor with dims (,384) in string formate
|
99 |
+
"""
|
100 |
+
embedding = embedding_string.replace('(', '')
|
101 |
+
embedding = embedding.replace(')', '')
|
102 |
+
embedding = embedding.replace('[', '')
|
103 |
+
embedding = embedding.replace(']', '')
|
104 |
+
embedding = embedding.replace('tensor', '')
|
105 |
+
embedding = embedding.replace(' ', '')
|
106 |
+
embedding = embedding.split(',')
|
107 |
+
embedding = [float(x) for x in embedding]
|
108 |
+
embedding = np.array(embedding)
|
109 |
+
embedding = np.expand_dims(embedding, axis=0)
|
110 |
+
embedding = torch.from_numpy(embedding)
|
111 |
+
return embedding
|
112 |
+
|
113 |
def bot(history):
|
114 |
response = "**That's cool!**"
|
115 |
history[-1][1] = response
|
|
|
117 |
|
118 |
|
119 |
########## LOADING PRE-COMPUTED EMBEDDINGS ##########
|
120 |
+
|
121 |
def clean_data(input, type='Dataframe'):
|
122 |
if type == 'Dataframe':
|
123 |
cleaneddf = pd.DataFrame(columns=['Class', 'Description'])
|
|
|
182 |
return row
|
183 |
|
184 |
|
185 |
+
|
186 |
def classifier(userin):
|
187 |
clean_in = clean_data(userin, type='String')
|
188 |
in_emb = sentence_embedder(clean_in, 'Model_bert')
|
|
|
192 |
|
193 |
return broad_scope_predictions
|
194 |
|
195 |
+
|
196 |
def generateresponse(history):#, task):
|
197 |
"""
|
198 |
Model definition here:
|
|
|
240 |
return history
|
241 |
|
242 |
############# GRADIO APP ###############
|
243 |
+
|
244 |
theme = gr.themes.Base(
|
245 |
primary_hue="indigo",
|
246 |
).set(
|