Spaces:

SatAT
/

transformer_service

Sleeping

App Files Files Community

SatAT commited on Apr 15, 2023

Commit

3c6b6f2

1 Parent(s): 32b477a

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -49

app.py CHANGED Viewed

@@ -44,25 +44,13 @@ encoded_sent = tokenizer.encode(
                     #max_length = 128,          # Truncate all sentences.
                     #return_tensors = 'pt',     # Return pytorch tensors.
                 )
-# Add the encoded sentence to the list.
-test_input_ids.append(encoded_sent)
-test_input_ids = pad_sequences(test_input_ids, maxlen=MAX_LEN,
-                          dtype="long", truncating="post", padding="post")
-# Create attention masks
-attention_masks = []
-# Create a mask of 1s for each token followed by 0s for padding
-for seq in test_input_ids:
-  seq_mask = [float(i>0) for i in seq]
-  attention_masks.append(seq_mask)
-# Convert to tensors.
-prediction_inputs = torch.tensor(test_input_ids)
-prediction_masks = torch.tensor(attention_masks)
-prediction_data = TensorDataset(prediction_inputs, prediction_masks, [])
-prediction_sampler = SequentialSampler(prediction_data)
-prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=1)
-# Put model in evaluation mode
 model = BertForSequenceClassification.from_pretrained(
     "bert-base-uncased", # Use the 12-layer BERT model, with an uncased vocab.
     num_labels = 44, # The number of output labels--2 for binary classification.
@@ -73,38 +61,13 @@ model = BertForSequenceClassification.from_pretrained(
 model.load_state_dict(torch.load("model_last_version.pt"))
 model.to(device)
 model.eval()
-# Tracking variables
-predictions, true_labels = [], []
-# Predict
-for batch in prediction_dataloader:
-    # Add batch to GPU
-    batch = tuple(t.to(device) for t in batch)
-    # Unpack the inputs from our dataloader
-    b_input_ids, b_input_mask, b_labels = batch
-    # Telling the model not to compute or store gradients, saving memory and
-    # speeding up prediction
-    with torch.no_grad():
-        # Forward pass, calculate logit predictions
-        outputs = model(b_input_ids, token_type_ids=None,
-                        attention_mask=b_input_mask)
-    logits = outputs[0]
-    # Move logits and labels to CPU
-    logits = logits.detach().cpu().numpy()
-    label_ids = b_labels.to('cpu').numpy()
-    # Store predictions and true labels
-    predictions.append(logits)
-    true_labels.append(label_ids)
-flat_predictions = [item for sublist in predictions for item in sublist]
-flat_predictions = np.argmax(flat_predictions, axis=1).flatten()
 # Creating a instance of label Encoder.
 le = LabelEncoder()
@@ -112,7 +75,7 @@ le = LabelEncoder()
 # from transformers import pipeline
 # pipe = pipeline("ner", "Davlan/distilbert-base-multilingual-cased-ner-hrl")
-raw_predictions = le.inverse_transform(flat_predictions)#pipe(text)
 # тут уже знакомый вам код с huggingface.transformers -- его можно заменить на что угодно от fairseq до catboost
 st.markdown(f"{raw_predictions}")

                     #max_length = 128,          # Truncate all sentences.
                     #return_tensors = 'pt',     # Return pytorch tensors.
                 )
+#tkns = tokenized_sub_sentence
+indexed_tokens = tokenizer.convert_tokens_to_ids(tokenizer.tokenize(str(text))))#le.convert_tokens_to_ids(tkns)
+segments_ids = [0] * len(indexed_tokens)
+tokens_tensor = torch.tensor([indexed_tokens]).to(device)
+segments_tensors = torch.tensor([segments_ids]).to(device)
 model = BertForSequenceClassification.from_pretrained(
     "bert-base-uncased", # Use the 12-layer BERT model, with an uncased vocab.
     num_labels = 44, # The number of output labels--2 for binary classification.
 model.load_state_dict(torch.load("model_last_version.pt"))
 model.to(device)
 model.eval()
+with torch.no_grad():
+    logit = model(tokens_tensor,
+                  token_type_ids=None,
+                  attention_mask=segments_tensors)
+    logit_new = logit[0].argmax(2).detach().cpu().numpy().tolist()
+    prediction = logit_new[0]
 # Creating a instance of label Encoder.
 le = LabelEncoder()
 # from transformers import pipeline
 # pipe = pipeline("ner", "Davlan/distilbert-base-multilingual-cased-ner-hrl")
+raw_predictions = le.inverse_transform(prediction)#pipe(text)
 # тут уже знакомый вам код с huggingface.transformers -- его можно заменить на что угодно от fairseq до catboost
 st.markdown(f"{raw_predictions}")