Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -44,25 +44,13 @@ encoded_sent = tokenizer.encode(
|
|
44 |
#max_length = 128, # Truncate all sentences.
|
45 |
#return_tensors = 'pt', # Return pytorch tensors.
|
46 |
)
|
47 |
-
#
|
48 |
-
|
49 |
-
|
50 |
-
dtype="long", truncating="post", padding="post")
|
51 |
-
# Create attention masks
|
52 |
-
attention_masks = []
|
53 |
|
54 |
-
|
55 |
-
|
56 |
-
seq_mask = [float(i>0) for i in seq]
|
57 |
-
attention_masks.append(seq_mask)
|
58 |
|
59 |
-
# Convert to tensors.
|
60 |
-
prediction_inputs = torch.tensor(test_input_ids)
|
61 |
-
prediction_masks = torch.tensor(attention_masks)
|
62 |
-
prediction_data = TensorDataset(prediction_inputs, prediction_masks, [])
|
63 |
-
prediction_sampler = SequentialSampler(prediction_data)
|
64 |
-
prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=1)
|
65 |
-
# Put model in evaluation mode
|
66 |
model = BertForSequenceClassification.from_pretrained(
|
67 |
"bert-base-uncased", # Use the 12-layer BERT model, with an uncased vocab.
|
68 |
num_labels = 44, # The number of output labels--2 for binary classification.
|
@@ -73,38 +61,13 @@ model = BertForSequenceClassification.from_pretrained(
|
|
73 |
model.load_state_dict(torch.load("model_last_version.pt"))
|
74 |
model.to(device)
|
75 |
model.eval()
|
|
|
|
|
|
|
|
|
76 |
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
# Predict
|
81 |
-
for batch in prediction_dataloader:
|
82 |
-
# Add batch to GPU
|
83 |
-
batch = tuple(t.to(device) for t in batch)
|
84 |
-
|
85 |
-
# Unpack the inputs from our dataloader
|
86 |
-
b_input_ids, b_input_mask, b_labels = batch
|
87 |
-
|
88 |
-
# Telling the model not to compute or store gradients, saving memory and
|
89 |
-
# speeding up prediction
|
90 |
-
with torch.no_grad():
|
91 |
-
# Forward pass, calculate logit predictions
|
92 |
-
outputs = model(b_input_ids, token_type_ids=None,
|
93 |
-
attention_mask=b_input_mask)
|
94 |
-
|
95 |
-
logits = outputs[0]
|
96 |
-
|
97 |
-
# Move logits and labels to CPU
|
98 |
-
logits = logits.detach().cpu().numpy()
|
99 |
-
label_ids = b_labels.to('cpu').numpy()
|
100 |
-
|
101 |
-
# Store predictions and true labels
|
102 |
-
predictions.append(logits)
|
103 |
-
true_labels.append(label_ids)
|
104 |
-
|
105 |
-
flat_predictions = [item for sublist in predictions for item in sublist]
|
106 |
-
flat_predictions = np.argmax(flat_predictions, axis=1).flatten()
|
107 |
-
|
108 |
|
109 |
# Creating a instance of label Encoder.
|
110 |
le = LabelEncoder()
|
@@ -112,7 +75,7 @@ le = LabelEncoder()
|
|
112 |
|
113 |
# from transformers import pipeline
|
114 |
# pipe = pipeline("ner", "Davlan/distilbert-base-multilingual-cased-ner-hrl")
|
115 |
-
raw_predictions = le.inverse_transform(
|
116 |
# тут уже знакомый вам код с huggingface.transformers -- его можно заменить на что угодно от fairseq до catboost
|
117 |
|
118 |
st.markdown(f"{raw_predictions}")
|
|
|
44 |
#max_length = 128, # Truncate all sentences.
|
45 |
#return_tensors = 'pt', # Return pytorch tensors.
|
46 |
)
|
47 |
+
#tkns = tokenized_sub_sentence
|
48 |
+
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenizer.tokenize(str(text))))#le.convert_tokens_to_ids(tkns)
|
49 |
+
segments_ids = [0] * len(indexed_tokens)
|
|
|
|
|
|
|
50 |
|
51 |
+
tokens_tensor = torch.tensor([indexed_tokens]).to(device)
|
52 |
+
segments_tensors = torch.tensor([segments_ids]).to(device)
|
|
|
|
|
53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
model = BertForSequenceClassification.from_pretrained(
|
55 |
"bert-base-uncased", # Use the 12-layer BERT model, with an uncased vocab.
|
56 |
num_labels = 44, # The number of output labels--2 for binary classification.
|
|
|
61 |
model.load_state_dict(torch.load("model_last_version.pt"))
|
62 |
model.to(device)
|
63 |
model.eval()
|
64 |
+
with torch.no_grad():
|
65 |
+
logit = model(tokens_tensor,
|
66 |
+
token_type_ids=None,
|
67 |
+
attention_mask=segments_tensors)
|
68 |
|
69 |
+
logit_new = logit[0].argmax(2).detach().cpu().numpy().tolist()
|
70 |
+
prediction = logit_new[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
# Creating a instance of label Encoder.
|
73 |
le = LabelEncoder()
|
|
|
75 |
|
76 |
# from transformers import pipeline
|
77 |
# pipe = pipeline("ner", "Davlan/distilbert-base-multilingual-cased-ner-hrl")
|
78 |
+
raw_predictions = le.inverse_transform(prediction)#pipe(text)
|
79 |
# тут уже знакомый вам код с huggingface.transformers -- его можно заменить на что угодно от fairseq до catboost
|
80 |
|
81 |
st.markdown(f"{raw_predictions}")
|