mainakhf commited on
Commit
107a7e6
·
verified ·
1 Parent(s): c4f2e3f

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +27 -0
  2. process.py +55 -0
  3. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from process import preprocess_text, Get_sentiment
3
+
4
+ def analyze_sentiment(text):
5
+ text = preprocess_text(text)
6
+ # print(Review)
7
+ result = Get_sentiment(text)
8
+ return result
9
+
10
+
11
+ def main():
12
+ st.title("Sentiment Analysis App")
13
+ st.write("Enter text below for sentiment analysis:")
14
+
15
+ # Text input
16
+ text_input = st.text_area("Input Text")
17
+
18
+ # Button to trigger sentiment analysis
19
+ if st.button("Analyze"):
20
+ if text_input:
21
+ sentiment = analyze_sentiment(text_input)
22
+ st.write("Sentiment:", sentiment[0])
23
+ else:
24
+ st.write("Please enter some text.")
25
+
26
+ if __name__ == "__main__":
27
+ main()
process.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nltk
2
+ from nltk.tokenize import word_tokenize
3
+ from nltk.corpus import stopwords
4
+ import string
5
+ from transformers import BertTokenizer, TFBertForSequenceClassification
6
+ import tensorflow as tf
7
+
8
+ # Download NLTK resources (one-time step)
9
+ nltk.download('punkt')
10
+ nltk.download('stopwords')
11
+
12
+ # Define stopwords and punctuation
13
+ stop_words = set(stopwords.words('english'))
14
+ punctuations = set(string.punctuation)
15
+
16
+ # Function to preprocess text
17
+ def preprocess_text(text):
18
+ text = str(text)
19
+ # Lowercase the text
20
+ text = text.lower()
21
+ # Tokenize the text
22
+ tokens = word_tokenize(text)
23
+ # Remove stopwords and punctuation
24
+ tokens = [token for token in tokens if token not in stop_words and token not in punctuations]
25
+ # Reconstruct the text
26
+ preprocessed_text = ' '.join(tokens)
27
+ return preprocessed_text
28
+
29
+ bert_tokenizer = BertTokenizer.from_pretrained('E:\jupyter\internship assesment\Techdome\Tokenizer')
30
+
31
+ # Load model
32
+ bert_model = TFBertForSequenceClassification.from_pretrained('E:\jupyter\internship assesment\Techdome\Model')
33
+ label = {
34
+ 1: 'positive',
35
+ 0: 'Negative'
36
+ }
37
+
38
+ def Get_sentiment(Review, Tokenizer=bert_tokenizer, Model=bert_model):
39
+ # Convert Review to a list if it's not already a list
40
+ if not isinstance(Review, list):
41
+ Review = [Review]
42
+
43
+ Input_ids, Token_type_ids, Attention_mask = Tokenizer.batch_encode_plus(Review,
44
+ padding=True,
45
+ truncation=True,
46
+ max_length=128,
47
+ return_tensors='tf').values()
48
+ prediction = Model.predict([Input_ids, Token_type_ids, Attention_mask])
49
+
50
+ # Use argmax along the appropriate axis to get the predicted labels
51
+ pred_labels = tf.argmax(prediction.logits, axis=1)
52
+
53
+ # Convert the TensorFlow tensor to a NumPy array and then to a list to get the predicted sentiment labels
54
+ pred_labels = [label[i] for i in pred_labels.numpy().tolist()]
55
+ return pred_labels
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ tensorflow
2
+ transformers
3
+ scikit-learn
4
+ streamlit
5
+ nltk