mohd43 commited on
Commit
245b7fc
·
verified ·
1 Parent(s): 10db6b0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -4
app.py CHANGED
@@ -1,11 +1,67 @@
1
- from transformers import pipeline
 
2
  import gradio as gr
3
- pipeline = pipeline("text-classification" , model = "ProsusAI/finbert")
 
 
 
4
 
5
  def predict(input_text):
6
- predictions = pipeline(input_text )
7
- return predictions[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
 
 
9
 
10
  gradio_app = gr.Interface(
11
  predict,
 
1
+ from transformers import BertForSequenceClassification, BertTokenizer
2
+ import torch
3
  import gradio as gr
4
+ tokenizer = BertTokenizer.from_pretrained('ProsusAI/finbert')
5
+ model = BertForSequenceClassification.from_pretrained('ProsusAI/finbert')
6
+
7
+
8
 
9
  def predict(input_text):
10
+ tokens = tokenizer.encode_plus(input_text, add_special_tokens = False, return_tensors = 'pt')
11
+ input_id_chunks = tokens['input_ids'][0].split(510)
12
+ attention_mask_chunks = tokens['attention_mask'][0].split(510)
13
+ def get_input_ids_and_attention_mask_chunk():
14
+ """
15
+ This function splits the input_ids and attention_mask into chunks of size 'chunksize'.
16
+ It also adds special tokens (101 for [CLS] and 102 for [SEP]) at the start and end of each chunk.
17
+ If the length of a chunk is less than 'chunksize', it pads the chunk with zeros at the end.
18
+
19
+ Returns:
20
+ input_id_chunks (List[torch.Tensor]): List of chunked input_ids.
21
+ attention_mask_chunks (List[torch.Tensor]): List of chunked attention_masks.
22
+ """
23
+ chunksize = 512
24
+ input_id_chunks = list(tokens['input_ids'][0].split(chunksize - 2))
25
+ attention_mask_chunks = list(tokens['attention_mask'][0].split(chunksize - 2))
26
+
27
+ for i in range(len(input_id_chunks)):
28
+ input_id_chunks[i] = torch.cat([
29
+ torch.tensor([101]), input_id_chunks[i], torch.tensor([102])
30
+ ])
31
+
32
+ attention_mask_chunks[i] = torch.cat([
33
+ torch.tensor([1]), attention_mask_chunks[i], torch.tensor([1])
34
+ ])
35
+
36
+ pad_length = chunksize - input_id_chunks[i].shape[0]
37
+
38
+ if pad_length > 0:
39
+ input_id_chunks[i] = torch.cat([
40
+ input_id_chunks[i], torch.Tensor([0] * pad_length)
41
+ ])
42
+ attention_mask_chunks[i] = torch.cat([
43
+ attention_mask_chunks[i], torch.Tensor([0] * pad_length)
44
+ ])
45
+
46
+ return input_id_chunks, attention_mask_chunks
47
+ input_id_chunks, attention_mask_chunks = get_input_ids_and_attention_mask_chunk()
48
+ input_ids = torch.stack(input_id_chunks)
49
+ attention_mask = torch.stack(attention_mask_chunks)
50
+ input_dict = {
51
+ 'input_ids' : input_ids.long(),
52
+ 'attention_mask' : attention_mask.int()
53
+ }
54
+ outputs = model(**input_dict)
55
+ probabilities = torch.nn.functional.softmax(outputs[0], dim = -1 )
56
+ mean_probabilities = probabilities.mean(dim = 0)
57
+ output = torch.argmax(mean_probabilities).item()
58
+ if output==0:
59
+ return "positive"
60
+ elif output==1:
61
+ return "negative"
62
 
63
+ elif output==2 :
64
+ return "neutral"
65
 
66
  gradio_app = gr.Interface(
67
  predict,