dayannex commited on
Commit
dc19c13
·
1 Parent(s): ef0edc1

app modified

Browse files
Files changed (1) hide show
  1. app.py +10 -8
app.py CHANGED
@@ -258,16 +258,16 @@ class ModeloDataset:
258
  self.tokenizer = AutoTokenizer.from_pretrained("BSC-LT/roberta_model_for_anonimization")
259
  def aplicar_modelo(self,_sentences,idioma):
260
  if idioma=="es":
261
- tokenizer = AutoTokenizer.from_pretrained("BSC-LT/roberta_model_for_anonimization")
262
- tokenized_text=[tokenizer.tokenize(sentence) for sentence in _sentences]
263
 
264
- ids = [tokenizer.convert_tokens_to_ids(x) for x in tokenized_text]
265
  MAX_LEN=128
266
  ids=pad_sequences(ids,maxlen=MAX_LEN,dtype="long",truncating="post", padding="post")
267
  input_ids = torch.tensor(ids)
268
  #model = RobertaForTokenClassification.from_pretrained("BSC-LT/roberta_model_for_anonimization")
269
 
270
- model = RobertaForTokenClassification.from_pretrained("BSC-LT/roberta_model_for_anonimization")
271
  with torch.no_grad():
272
  logits = model(input_ids).logits
273
  predicted_token_class_ids = logits.argmax(-1)
@@ -283,16 +283,18 @@ class ModeloDataset:
283
  else:
284
 
285
  print('idioma:',idioma)
286
- tokenizer = AutoTokenizer.from_pretrained("FacebookAI/xlm-roberta-large-finetuned-conll03-english")
287
- tokenized_text=[tokenizer.tokenize(sentence) for sentence in _sentences]
288
 
289
- ids = [tokenizer.convert_tokens_to_ids(x) for x in tokenized_text]
 
 
290
  MAX_LEN=128
291
  ids=pad_sequences(ids,maxlen=MAX_LEN,dtype="long",truncating="post", padding="post")
292
  input_ids = torch.tensor(ids)
293
 
294
 
295
- model = AutoModelForTokenClassification.from_pretrained("FacebookAI/xlm-roberta-large-finetuned-conll03-english")
296
  with torch.no_grad():
297
  logits = model(input_ids).logits
298
  predicted_token_class_ids = logits.argmax(-1)
 
258
  self.tokenizer = AutoTokenizer.from_pretrained("BSC-LT/roberta_model_for_anonimization")
259
  def aplicar_modelo(self,_sentences,idioma):
260
  if idioma=="es":
261
+ self.tokenizer = AutoTokenizer.from_pretrained("BSC-LT/roberta_model_for_anonimization")
262
+ tokenized_text=[self.tokenizer.tokenize(sentence) for sentence in _sentences]
263
 
264
+ ids = [self.tokenizer.convert_tokens_to_ids(x) for x in tokenized_text]
265
  MAX_LEN=128
266
  ids=pad_sequences(ids,maxlen=MAX_LEN,dtype="long",truncating="post", padding="post")
267
  input_ids = torch.tensor(ids)
268
  #model = RobertaForTokenClassification.from_pretrained("BSC-LT/roberta_model_for_anonimization")
269
 
270
+ self.model = RobertaForTokenClassification.from_pretrained("BSC-LT/roberta_model_for_anonimization")
271
  with torch.no_grad():
272
  logits = model(input_ids).logits
273
  predicted_token_class_ids = logits.argmax(-1)
 
283
  else:
284
 
285
  print('idioma:',idioma)
286
+ self.tokenizer = AutoTokenizer.from_pretrained("FacebookAI/xlm-roberta-large-finetuned-conll03-english")
287
+ tokenized_text=[self.tokenizer.tokenize(sentence) for sentence in _sentences]
288
 
289
+ ids = [self.tokenizer.convert_tokens_to_ids(x) for x in tokenized_text]
290
+ print('ids',ids)
291
+
292
  MAX_LEN=128
293
  ids=pad_sequences(ids,maxlen=MAX_LEN,dtype="long",truncating="post", padding="post")
294
  input_ids = torch.tensor(ids)
295
 
296
 
297
+ self.model = AutoModelForTokenClassification.from_pretrained("FacebookAI/xlm-roberta-large-finetuned-conll03-english")
298
  with torch.no_grad():
299
  logits = model(input_ids).logits
300
  predicted_token_class_ids = logits.argmax(-1)