Spaces:

SatAT
/

transformer_service

Sleeping

App Files Files Community

SatAT commited on Apr 15, 2023

Commit

24a231f

1 Parent(s): a348a05

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -56

app.py CHANGED Viewed

@@ -14,68 +14,23 @@ st.markdown("<img width=200px src='https://rozetked.me/images/uploads/dwoilp3BVj
 text = st.text_area("TEXT HERE")
 # ^-- показать текстовое поле. В поле text лежит строка, которая находится там в данный момент
-if torch.cuda.is_available():
-    # Tell PyTorch to use the GPU.
-    device = torch.device("cuda")
-    print('There are %d GPU(s) available.' % torch.cuda.device_count())
-    print('We will use the GPU:', torch.cuda.get_device_name(0))
-# If not...
-else:
-    print('No GPU available, using the CPU instead.')
-    device = torch.device("cpu")
-# Set the maximum sequence length.
-# I've chosen 64 somewhat arbitrarily. It's slightly larger than the
-# maximum training sentence length of 47...
-MAX_LEN = 64
-tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
-test_input_ids = []
-encoded_sent = tokenizer.encode(
-                    text,                      # Sentence to encode.
-                    add_special_tokens = True, # Add '[CLS]' and '[SEP]'
-                    # This function also supports truncation and conversion
-                    # to pytorch tensors, but we need to do padding, so we
-                    # can't use these features :( .
-                    #max_length = 128,          # Truncate all sentences.
-                    #return_tensors = 'pt',     # Return pytorch tensors.
-                )
-#tkns = tokenized_sub_sentence
-indexed_tokens = tokenizer.convert_tokens_to_ids(tokenizer.tokenize(str(text)))#le.convert_tokens_to_ids(tkns)
-segments_ids = [0] * len(indexed_tokens)
-tokens_tensor = torch.tensor([indexed_tokens])#.to(device)
-segments_tensors = torch.tensor([segments_ids])#.to(device)
-model = BertForSequenceClassification.from_pretrained(
     "bert-base-uncased", # Use the 12-layer BERT model, with an uncased vocab.
-    num_labels = 44, # The number of output labels--2 for binary classification.
-                    # You can increase this for multi-class tasks.
-    output_attentions = False, # Whether the model returns attentions weights.
-    output_hidden_states = False, # Whether the model returns all hidden-states.
-)
-model.load_state_dict(torch.load("model_last_version.pt", map_location=torch.device('cpu')))
-# model.to(device)
-model.eval()
-with torch.no_grad():
-    logit = model(tokens_tensor,
-                  token_type_ids=None,
-                  attention_mask=segments_tensors)
-    logit_new = logit[0].argmax(2).detach().cpu().numpy().tolist()
-    prediction = logit_new[0]
-# Creating a instance of label Encoder.
-le = LabelEncoder()
-# print("Predict: ", le.inverse_transform(flat_predictions))
 # from transformers import pipeline
 # pipe = pipeline("ner", "Davlan/distilbert-base-multilingual-cased-ner-hrl")
-raw_predictions = le.inverse_transform(prediction)#pipe(text)
 # тут уже знакомый вам код с huggingface.transformers -- его можно заменить на что угодно от fairseq до catboost
 st.markdown(f"{raw_predictions}")

 text = st.text_area("TEXT HERE")
 # ^-- показать текстовое поле. В поле text лежит строка, которая находится там в данный момент
+tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+model = BertForSequenceClassification.from_pretrained
     "bert-base-uncased", # Use the 12-layer BERT model, with an uncased vocab.
+    num_labels = 44,)
+MAX_LEN = 64
+tokens = tokenizer.encode_plus(text, add_special_tokens=True, max_length=MAX_LEN, truncation=True, padding='max_length')
+input_ids = torch.tensor(tokens['input_ids']).unsqueeze(0)
+attention_mask = torch.tensor(tokens['attention_mask']).unsqueeze(0)
+logits = model(input_ids, attention_mask)[0]
+probs = torch.softmax(logits, dim=1)
+predicted_category = torch.argmax(probs).item()
 # from transformers import pipeline
 # pipe = pipeline("ner", "Davlan/distilbert-base-multilingual-cased-ner-hrl")
+raw_predictions = predicted_category#le.inverse_transform(prediction)#pipe(text)
 # тут уже знакомый вам код с huggingface.transformers -- его можно заменить на что угодно от fairseq до catboost
 st.markdown(f"{raw_predictions}")