SatAT commited on
Commit
24a231f
·
1 Parent(s): a348a05

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -56
app.py CHANGED
@@ -14,68 +14,23 @@ st.markdown("<img width=200px src='https://rozetked.me/images/uploads/dwoilp3BVj
14
  text = st.text_area("TEXT HERE")
15
  # ^-- показать текстовое поле. В поле text лежит строка, которая находится там в данный момент
16
 
17
- if torch.cuda.is_available():
18
-
19
- # Tell PyTorch to use the GPU.
20
- device = torch.device("cuda")
21
-
22
- print('There are %d GPU(s) available.' % torch.cuda.device_count())
23
-
24
- print('We will use the GPU:', torch.cuda.get_device_name(0))
25
-
26
- # If not...
27
- else:
28
- print('No GPU available, using the CPU instead.')
29
- device = torch.device("cpu")
30
- # Set the maximum sequence length.
31
- # I've chosen 64 somewhat arbitrarily. It's slightly larger than the
32
- # maximum training sentence length of 47...
33
- MAX_LEN = 64
34
-
35
- tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
36
- test_input_ids = []
37
- encoded_sent = tokenizer.encode(
38
- text, # Sentence to encode.
39
- add_special_tokens = True, # Add '[CLS]' and '[SEP]'
40
-
41
- # This function also supports truncation and conversion
42
- # to pytorch tensors, but we need to do padding, so we
43
- # can't use these features :( .
44
- #max_length = 128, # Truncate all sentences.
45
- #return_tensors = 'pt', # Return pytorch tensors.
46
- )
47
- #tkns = tokenized_sub_sentence
48
- indexed_tokens = tokenizer.convert_tokens_to_ids(tokenizer.tokenize(str(text)))#le.convert_tokens_to_ids(tkns)
49
- segments_ids = [0] * len(indexed_tokens)
50
-
51
- tokens_tensor = torch.tensor([indexed_tokens])#.to(device)
52
- segments_tensors = torch.tensor([segments_ids])#.to(device)
53
-
54
- model = BertForSequenceClassification.from_pretrained(
55
  "bert-base-uncased", # Use the 12-layer BERT model, with an uncased vocab.
56
- num_labels = 44, # The number of output labels--2 for binary classification.
57
- # You can increase this for multi-class tasks.
58
- output_attentions = False, # Whether the model returns attentions weights.
59
- output_hidden_states = False, # Whether the model returns all hidden-states.
60
- )
61
- model.load_state_dict(torch.load("model_last_version.pt", map_location=torch.device('cpu')))
62
- # model.to(device)
63
- model.eval()
64
- with torch.no_grad():
65
- logit = model(tokens_tensor,
66
- token_type_ids=None,
67
- attention_mask=segments_tensors)
68
 
69
- logit_new = logit[0].argmax(2).detach().cpu().numpy().tolist()
70
- prediction = logit_new[0]
 
 
71
 
72
- # Creating a instance of label Encoder.
73
- le = LabelEncoder()
74
- # print("Predict: ", le.inverse_transform(flat_predictions))
75
 
 
76
  # from transformers import pipeline
77
  # pipe = pipeline("ner", "Davlan/distilbert-base-multilingual-cased-ner-hrl")
78
- raw_predictions = le.inverse_transform(prediction)#pipe(text)
79
  # тут уже знакомый вам код с huggingface.transformers -- его можно заменить на что угодно от fairseq до catboost
80
 
81
  st.markdown(f"{raw_predictions}")
 
14
  text = st.text_area("TEXT HERE")
15
  # ^-- показать текстовое поле. В поле text лежит строка, которая находится там в данный момент
16
 
17
+ tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
18
+ model = BertForSequenceClassification.from_pretrained
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  "bert-base-uncased", # Use the 12-layer BERT model, with an uncased vocab.
20
+ num_labels = 44,)
 
 
 
 
 
 
 
 
 
 
 
21
 
22
+ MAX_LEN = 64
23
+ tokens = tokenizer.encode_plus(text, add_special_tokens=True, max_length=MAX_LEN, truncation=True, padding='max_length')
24
+ input_ids = torch.tensor(tokens['input_ids']).unsqueeze(0)
25
+ attention_mask = torch.tensor(tokens['attention_mask']).unsqueeze(0)
26
 
27
+ logits = model(input_ids, attention_mask)[0]
28
+ probs = torch.softmax(logits, dim=1)
 
29
 
30
+ predicted_category = torch.argmax(probs).item()
31
  # from transformers import pipeline
32
  # pipe = pipeline("ner", "Davlan/distilbert-base-multilingual-cased-ner-hrl")
33
+ raw_predictions = predicted_category#le.inverse_transform(prediction)#pipe(text)
34
  # тут уже знакомый вам код с huggingface.transformers -- его можно заменить на что угодно от fairseq до catboost
35
 
36
  st.markdown(f"{raw_predictions}")