KojoKesse commited on
Commit
65ec70d
·
1 Parent(s): 15440b4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -0
app.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ !pip install -q transformers datasets streamlit
2
+
3
+ from transformers import AutoModelForSequenceClassification
4
+ from transformers import TFAutoModelForSequenceClassification
5
+ from transformers import AutoTokenizer, AutoConfig
6
+ import numpy as np
7
+ from scipy.special import softmax
8
+
9
+
10
+ tokenizer = AutoTokenizer.from_pretrained('bert-base-cased')
11
+
12
+ model_path = f"avichr/heBERT_sentiment_analysis"
13
+ config = AutoConfig.from_pretrained(model_path)
14
+ model = AutoModelForSequenceClassification.from_pretrained(model_path)
15
+
16
+ # Preprocess text (username and link placeholders)
17
+ def preprocess(text):
18
+ new_text = []
19
+ for t in text.split(" "):
20
+ t = '@user' if t.startswith('@') and len(t) > 1 else t
21
+ t = 'http' if t.startswith('http') else t
22
+ new_text.append(t)
23
+ return " ".join(new_text)
24
+
25
+ # Input preprocessing
26
+ text = "Covid cases are increasing fast!"
27
+ text = preprocess(text)
28
+
29
+ # PyTorch-based models
30
+ encoded_input = tokenizer(text, return_tensors='pt')
31
+ output = model(**encoded_input)
32
+ scores = output[0][0].detach().numpy()
33
+ scores = softmax(scores)
34
+
35
+ # TensorFlow-based models
36
+ # model = TFAutoModelForSequenceClassification.from_pretrained(model_path)
37
+ # model.save_pretrained(model_path)
38
+ # text = "Covid cases are increasing fast!"
39
+ # encoded_input = tokenizer(text, return_tensors='tf')
40
+ # output = model(encoded_input)
41
+ # scores = output[0][0].numpy()
42
+ # scores = softmax(scores)
43
+
44
+ config.id2label = {0: 'NEGATIVE', 1: 'NEUTRAL', 2: 'POSITIVE'}
45
+
46
+ # Print labels and scores
47
+ ranking = np.argsort(scores)
48
+ ranking = ranking[::-1]
49
+ print(f"Classified text: {text}")
50
+ for i in range(scores.shape[0]):
51
+ l = config.id2label[ranking[i]]
52
+ s = scores[ranking[i]]
53
+ print(f"{i+1}) {l} {np.round(float(s), 4)}")
54
+
55
+ from transformers import AutoModelForSequenceClassification
56
+ from transformers import TFAutoModelForSequenceClassification
57
+ from transformers import AutoTokenizer, AutoConfig
58
+ from scipy.special import softmax
59
+ import streamlit as st
60
+
61
+
62
+ def preprocess(text):
63
+ new_text = []
64
+ for t in text.split(" "):
65
+ t = '@user' if t.startswith('@') and len(t) > 1 else t
66
+ t = 'http' if t.startswith('http') else t
67
+ new_text.append(t)
68
+ return " ".join(new_text)
69
+
70
+
71
+ def sentiment_analysis(text):
72
+ text = preprocess(text)
73
+
74
+ # Load the model
75
+ model_path = f"avichr/heBERT_sentiment_analysis"
76
+ tokenizer = AutoTokenizer.from_pretrained('bert-base-cased')
77
+ config = AutoConfig.from_pretrained(model_path)
78
+ model = AutoModelForSequenceClassification.from_pretrained(model_path)
79
+
80
+ # Encode text input
81
+ encoded_input = tokenizer(text, return_tensors='pt')
82
+ output = model(**encoded_input)
83
+ scores_ = output[0][0].detach().numpy()
84
+
85
+ # Calculate softmax probabilities
86
+ scores_ = softmax(scores_)
87
+
88
+ # Format output dict of scores
89
+ labels = ['Negative', 'Neutral', 'Positive']
90
+ scores = {l:float(s) for (l,s) in zip(labels, scores_) }
91
+
92
+ return scores
93
+
94
+
95
+ import streamlit as st
96
+
97
+ st.title("Sentiment Analysis for Covid Feelings")
98
+
99
+ # User input field
100
+ text = st.text_input(label="Enter your text:")
101
+
102
+ # Perform sentiment analysis
103
+ if text:
104
+ scores = sentiment_analysis(text)
105
+
106
+ # Display sentiment scores
107
+ st.subheader("Sentiment Scores")
108
+ for label in scores:
109
+ score = scores[label]
110
+ st.write(f"{label}: {score:.2f}")
111
+
112
+
113
+
114
+
115
+
116
+ st.run(.py)