dayannex commited on
Commit
5f1c9c8
·
1 Parent(s): 4756d93

correcion etiquetas español/ingles

Browse files
Files changed (1) hide show
  1. app.py +35 -2
app.py CHANGED
@@ -261,12 +261,45 @@ class Model:
261
 
262
 
263
  if (self.idioma=='es'):
 
 
 
 
 
264
 
265
- new_tokens,ig_tokens=self.reordenacion_tokens_es(tokens,'Ġ')
 
 
 
 
 
 
 
 
 
 
 
266
 
267
 
268
  else:
269
- new_tokens,ig_tokens=self.reordenacion_tokens(tokens,'#')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
 
271
  new_identificadores = self.reordenacion_identificadores(ig_tokens,predicted_tokens_classes)
272
  out1 = self.salida_json(new_tokens,new_identificadores)
 
261
 
262
 
263
  if (self.idioma=='es'):
264
+ inputs = self.tokenizer(self.texto, return_tensors="pt",max_length=512, truncation=True)
265
+ with torch.no_grad():
266
+ outputs = self.model(**inputs)
267
+ logits = outputs.logits
268
+ predictions = torch.argmax(logits, dim=2)
269
 
270
+ predicted_token_class_ids = predictions[0].tolist()
271
+
272
+
273
+ predicted_tokens_classes = [self.model.config.id2label[label_id] for label_id in predicted_token_class_ids]
274
+ tokens = self.tokenizer.convert_ids_to_tokens(inputs.input_ids[0])
275
+
276
+ predicted_tokens_classes.pop(0)
277
+ predicted_tokens_classes.pop(len(predicted_tokens_classes)-1)
278
+
279
+ tokens.pop(0)
280
+ tokens.pop(len(tokens)-1)
281
+ new_tokens,ig_tokens=self.reordenacion_tokens_es(tokens,'Ġ')
282
 
283
 
284
  else:
285
+ inputs = self.tokenizer(self.texto, return_tensors="pt")
286
+ with torch.no_grad():
287
+ outputs = self.model(**inputs)
288
+ logits = outputs.logits
289
+ predictions = torch.argmax(logits, dim=2)
290
+
291
+ predicted_token_class_ids = predictions[0].tolist()
292
+
293
+
294
+ predicted_tokens_classes = [self.model.config.id2label[label_id] for label_id in predicted_token_class_ids]
295
+ tokens = self.tokenizer.convert_ids_to_tokens(inputs.input_ids[0])
296
+
297
+ predicted_tokens_classes.pop(0)
298
+ predicted_tokens_classes.pop(len(predicted_tokens_classes)-1)
299
+
300
+ tokens.pop(0)
301
+ tokens.pop(len(tokens)-1)
302
+ new_tokens,ig_tokens=self.reordenacion_tokens(tokens,'#')
303
 
304
  new_identificadores = self.reordenacion_identificadores(ig_tokens,predicted_tokens_classes)
305
  out1 = self.salida_json(new_tokens,new_identificadores)