Spaces:
Sleeping
Sleeping
app modified
Browse files
app.py
CHANGED
@@ -258,16 +258,16 @@ class ModeloDataset:
|
|
258 |
self.tokenizer = AutoTokenizer.from_pretrained("BSC-LT/roberta_model_for_anonimization")
|
259 |
def aplicar_modelo(self,_sentences,idioma):
|
260 |
if idioma=="es":
|
261 |
-
tokenizer = AutoTokenizer.from_pretrained("BSC-LT/roberta_model_for_anonimization")
|
262 |
-
tokenized_text=[tokenizer.tokenize(sentence) for sentence in _sentences]
|
263 |
|
264 |
-
ids = [tokenizer.convert_tokens_to_ids(x) for x in tokenized_text]
|
265 |
MAX_LEN=128
|
266 |
ids=pad_sequences(ids,maxlen=MAX_LEN,dtype="long",truncating="post", padding="post")
|
267 |
input_ids = torch.tensor(ids)
|
268 |
#model = RobertaForTokenClassification.from_pretrained("BSC-LT/roberta_model_for_anonimization")
|
269 |
|
270 |
-
model = RobertaForTokenClassification.from_pretrained("BSC-LT/roberta_model_for_anonimization")
|
271 |
with torch.no_grad():
|
272 |
logits = model(input_ids).logits
|
273 |
predicted_token_class_ids = logits.argmax(-1)
|
@@ -283,16 +283,18 @@ class ModeloDataset:
|
|
283 |
else:
|
284 |
|
285 |
print('idioma:',idioma)
|
286 |
-
tokenizer = AutoTokenizer.from_pretrained("FacebookAI/xlm-roberta-large-finetuned-conll03-english")
|
287 |
-
tokenized_text=[tokenizer.tokenize(sentence) for sentence in _sentences]
|
288 |
|
289 |
-
ids = [tokenizer.convert_tokens_to_ids(x) for x in tokenized_text]
|
|
|
|
|
290 |
MAX_LEN=128
|
291 |
ids=pad_sequences(ids,maxlen=MAX_LEN,dtype="long",truncating="post", padding="post")
|
292 |
input_ids = torch.tensor(ids)
|
293 |
|
294 |
|
295 |
-
model = AutoModelForTokenClassification.from_pretrained("FacebookAI/xlm-roberta-large-finetuned-conll03-english")
|
296 |
with torch.no_grad():
|
297 |
logits = model(input_ids).logits
|
298 |
predicted_token_class_ids = logits.argmax(-1)
|
|
|
258 |
self.tokenizer = AutoTokenizer.from_pretrained("BSC-LT/roberta_model_for_anonimization")
|
259 |
def aplicar_modelo(self,_sentences,idioma):
|
260 |
if idioma=="es":
|
261 |
+
self.tokenizer = AutoTokenizer.from_pretrained("BSC-LT/roberta_model_for_anonimization")
|
262 |
+
tokenized_text=[self.tokenizer.tokenize(sentence) for sentence in _sentences]
|
263 |
|
264 |
+
ids = [self.tokenizer.convert_tokens_to_ids(x) for x in tokenized_text]
|
265 |
MAX_LEN=128
|
266 |
ids=pad_sequences(ids,maxlen=MAX_LEN,dtype="long",truncating="post", padding="post")
|
267 |
input_ids = torch.tensor(ids)
|
268 |
#model = RobertaForTokenClassification.from_pretrained("BSC-LT/roberta_model_for_anonimization")
|
269 |
|
270 |
+
self.model = RobertaForTokenClassification.from_pretrained("BSC-LT/roberta_model_for_anonimization")
|
271 |
with torch.no_grad():
|
272 |
logits = model(input_ids).logits
|
273 |
predicted_token_class_ids = logits.argmax(-1)
|
|
|
283 |
else:
|
284 |
|
285 |
print('idioma:',idioma)
|
286 |
+
self.tokenizer = AutoTokenizer.from_pretrained("FacebookAI/xlm-roberta-large-finetuned-conll03-english")
|
287 |
+
tokenized_text=[self.tokenizer.tokenize(sentence) for sentence in _sentences]
|
288 |
|
289 |
+
ids = [self.tokenizer.convert_tokens_to_ids(x) for x in tokenized_text]
|
290 |
+
print('ids',ids)
|
291 |
+
|
292 |
MAX_LEN=128
|
293 |
ids=pad_sequences(ids,maxlen=MAX_LEN,dtype="long",truncating="post", padding="post")
|
294 |
input_ids = torch.tensor(ids)
|
295 |
|
296 |
|
297 |
+
self.model = AutoModelForTokenClassification.from_pretrained("FacebookAI/xlm-roberta-large-finetuned-conll03-english")
|
298 |
with torch.no_grad():
|
299 |
logits = model(input_ids).logits
|
300 |
predicted_token_class_ids = logits.argmax(-1)
|