SatAT commited on
Commit
3c6b6f2
·
1 Parent(s): 32b477a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -49
app.py CHANGED
@@ -44,25 +44,13 @@ encoded_sent = tokenizer.encode(
44
  #max_length = 128, # Truncate all sentences.
45
  #return_tensors = 'pt', # Return pytorch tensors.
46
  )
47
- # Add the encoded sentence to the list.
48
- test_input_ids.append(encoded_sent)
49
- test_input_ids = pad_sequences(test_input_ids, maxlen=MAX_LEN,
50
- dtype="long", truncating="post", padding="post")
51
- # Create attention masks
52
- attention_masks = []
53
 
54
- # Create a mask of 1s for each token followed by 0s for padding
55
- for seq in test_input_ids:
56
- seq_mask = [float(i>0) for i in seq]
57
- attention_masks.append(seq_mask)
58
 
59
- # Convert to tensors.
60
- prediction_inputs = torch.tensor(test_input_ids)
61
- prediction_masks = torch.tensor(attention_masks)
62
- prediction_data = TensorDataset(prediction_inputs, prediction_masks, [])
63
- prediction_sampler = SequentialSampler(prediction_data)
64
- prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=1)
65
- # Put model in evaluation mode
66
  model = BertForSequenceClassification.from_pretrained(
67
  "bert-base-uncased", # Use the 12-layer BERT model, with an uncased vocab.
68
  num_labels = 44, # The number of output labels--2 for binary classification.
@@ -73,38 +61,13 @@ model = BertForSequenceClassification.from_pretrained(
73
  model.load_state_dict(torch.load("model_last_version.pt"))
74
  model.to(device)
75
  model.eval()
 
 
 
 
76
 
77
- # Tracking variables
78
- predictions, true_labels = [], []
79
-
80
- # Predict
81
- for batch in prediction_dataloader:
82
- # Add batch to GPU
83
- batch = tuple(t.to(device) for t in batch)
84
-
85
- # Unpack the inputs from our dataloader
86
- b_input_ids, b_input_mask, b_labels = batch
87
-
88
- # Telling the model not to compute or store gradients, saving memory and
89
- # speeding up prediction
90
- with torch.no_grad():
91
- # Forward pass, calculate logit predictions
92
- outputs = model(b_input_ids, token_type_ids=None,
93
- attention_mask=b_input_mask)
94
-
95
- logits = outputs[0]
96
-
97
- # Move logits and labels to CPU
98
- logits = logits.detach().cpu().numpy()
99
- label_ids = b_labels.to('cpu').numpy()
100
-
101
- # Store predictions and true labels
102
- predictions.append(logits)
103
- true_labels.append(label_ids)
104
-
105
- flat_predictions = [item for sublist in predictions for item in sublist]
106
- flat_predictions = np.argmax(flat_predictions, axis=1).flatten()
107
-
108
 
109
  # Creating a instance of label Encoder.
110
  le = LabelEncoder()
@@ -112,7 +75,7 @@ le = LabelEncoder()
112
 
113
  # from transformers import pipeline
114
  # pipe = pipeline("ner", "Davlan/distilbert-base-multilingual-cased-ner-hrl")
115
- raw_predictions = le.inverse_transform(flat_predictions)#pipe(text)
116
  # тут уже знакомый вам код с huggingface.transformers -- его можно заменить на что угодно от fairseq до catboost
117
 
118
  st.markdown(f"{raw_predictions}")
 
44
  #max_length = 128, # Truncate all sentences.
45
  #return_tensors = 'pt', # Return pytorch tensors.
46
  )
47
+ #tkns = tokenized_sub_sentence
48
+ indexed_tokens = tokenizer.convert_tokens_to_ids(tokenizer.tokenize(str(text))))#le.convert_tokens_to_ids(tkns)
49
+ segments_ids = [0] * len(indexed_tokens)
 
 
 
50
 
51
+ tokens_tensor = torch.tensor([indexed_tokens]).to(device)
52
+ segments_tensors = torch.tensor([segments_ids]).to(device)
 
 
53
 
 
 
 
 
 
 
 
54
  model = BertForSequenceClassification.from_pretrained(
55
  "bert-base-uncased", # Use the 12-layer BERT model, with an uncased vocab.
56
  num_labels = 44, # The number of output labels--2 for binary classification.
 
61
  model.load_state_dict(torch.load("model_last_version.pt"))
62
  model.to(device)
63
  model.eval()
64
+ with torch.no_grad():
65
+ logit = model(tokens_tensor,
66
+ token_type_ids=None,
67
+ attention_mask=segments_tensors)
68
 
69
+ logit_new = logit[0].argmax(2).detach().cpu().numpy().tolist()
70
+ prediction = logit_new[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  # Creating a instance of label Encoder.
73
  le = LabelEncoder()
 
75
 
76
  # from transformers import pipeline
77
  # pipe = pipeline("ner", "Davlan/distilbert-base-multilingual-cased-ner-hrl")
78
+ raw_predictions = le.inverse_transform(prediction)#pipe(text)
79
  # тут уже знакомый вам код с huggingface.transformers -- его можно заменить на что угодно от fairseq до catboost
80
 
81
  st.markdown(f"{raw_predictions}")