SubinKrishna commited on
Commit
df3e003
1 Parent(s): dc48a90

files added

Browse files
TBModel/config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "vinai/bertweet-base",
3
+ "architectures": [
4
+ "RobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "LABEL_0",
16
+ "1": "LABEL_1",
17
+ "2": "LABEL_2"
18
+ },
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 3072,
21
+ "label2id": {
22
+ "LABEL_0": 0,
23
+ "LABEL_1": 1,
24
+ "LABEL_2": 2
25
+ },
26
+ "layer_norm_eps": 1e-05,
27
+ "max_position_embeddings": 130,
28
+ "model_type": "roberta",
29
+ "num_attention_heads": 12,
30
+ "num_hidden_layers": 12,
31
+ "pad_token_id": 1,
32
+ "position_embedding_type": "absolute",
33
+ "problem_type": "single_label_classification",
34
+ "tokenizer_class": "BertweetTokenizer",
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.27.2",
37
+ "type_vocab_size": 1,
38
+ "use_cache": true,
39
+ "vocab_size": 64001
40
+ }
TBModel/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eeaab45505d70a0b9ac82575b41bbbc81a758d4ee164500bb9943cf5beafab09
3
+ size 539679413
TBTokenizer/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<mask>": 64000
3
+ }
TBTokenizer/bpe.codes ADDED
The diff for this file is too large to render. See raw diff
 
TBTokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": "<mask>",
6
+ "pad_token": "<pad>",
7
+ "sep_token": "</s>",
8
+ "unk_token": "<unk>"
9
+ }
TBTokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": "<mask>",
6
+ "model_max_length": 128,
7
+ "normalization": true,
8
+ "pad_token": "<pad>",
9
+ "sep_token": "</s>",
10
+ "special_tokens_map_file": null,
11
+ "tokenizer_class": "BertweetTokenizer",
12
+ "unk_token": "<unk>"
13
+ }
TBTokenizer/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
analyze.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import torch
4
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
+ import emoji
6
+
7
+ model_path = "ANLPRL/TBModel"
8
+ tokenizer_path = "ANLPRL/TBTokenizer"
9
+
10
+ # Load the tokenizer and model
11
+ model = AutoModelForSequenceClassification.from_pretrained(model_path)
12
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
13
+
14
+ def predict(text):
15
+ encoded_data = tokenizer.encode_plus(text, padding=True, truncation=True, return_tensors='pt')
16
+ input_ids = encoded_data['input_ids']
17
+ attention_mask = encoded_data['attention_mask']
18
+ with torch.no_grad():
19
+ outputs = model(input_ids, attention_mask)
20
+ logits = outputs.logits
21
+ probabilities = torch.softmax(logits, dim=1)
22
+ _, predicted = torch.max(probabilities, dim=1)
23
+
24
+ # Create dictionary to map numerical labels to categories
25
+ label_dict = {0: 'Positive', 1: 'Negative', 2: 'Neutral'}
26
+ predicted_label = label_dict[predicted.item()]
27
+
28
+ return predicted_label
29
+
30
+ # Define examples as a list
31
+ examples = [
32
+ "ChatGPT Plus uses cutting-edge AI technology to learn from customer conversations.",
33
+ "ChatGPT can produce harmful and biased answers.",
34
+ "Gpt dont have feelings or a personal identity, but it strive to provide informative responses.",
35
+ ]
36
+
37
+ # Create the Streamlit app
38
+ emoji_dict = {
39
+ "positive": "\U0001F60A",
40
+ "negative": "\U0001F61E",
41
+ "neutral": "\U0001F610"
42
+ }
43
+
44
+ st.title("CHAT-GPT SENTIMENT ANALYSIS")
45
+
46
+ # Create the form to handle user inputs
47
+ with st.form("sentiment_analysis_form"):
48
+ # Add the dropdown list for examples
49
+ selected_option = st.selectbox("Select an example to analyze", [""] + examples, index=0)
50
+
51
+ # Add the text input for user input
52
+ user_input = st.text_input("Enter your own text to analyze", "")
53
+
54
+ # Define color codes for different sentiment classes
55
+ positive_color = "#00C851"
56
+ negative_color = "#ff4444"
57
+ neutral_color = "#FFBB33"
58
+
59
+ # Add the submit button to analyze the sentiment
60
+ analyze_button = st.form_submit_button("Analyze")
61
+
62
+ # Handle the form submission
63
+ if analyze_button:
64
+ if user_input.strip() != "":
65
+ prediction = predict(user_input.strip())
66
+ if prediction == 'Positive':
67
+ st.write(f"<span style='color:{positive_color}; font-weight:bold;'>{emoji_dict['positive']} Positive</span>", unsafe_allow_html=True)
68
+ elif prediction == 'Negative':
69
+ st.write(f"<span style='color:{negative_color}; font-weight:bold;'>{emoji_dict['negative']} Negative</span>", unsafe_allow_html=True)
70
+ else:
71
+ st.write(f"<span style='color:{neutral_color}; font-weight:bold;'>{emoji_dict['neutral']} Neutral</span>", unsafe_allow_html=True)
72
+ elif selected_option != "":
73
+ prediction = predict(selected_option)
74
+ if prediction == 'Positive':
75
+ st.write(f"<span style='color:{positive_color}; font-weight:bold;'>{emoji_dict['positive']} Positive</span>", unsafe_allow_html=True)
76
+ elif prediction == 'Negative':
77
+ st.write(f"<span style='color:{negative_color}; font-weight:bold;'>{emoji_dict['negative']} Negative</span>", unsafe_allow_html=True)
78
+ else:
79
+ st.write(f"<span style='color:{neutral_color}; font-weight:bold;'>{emoji_dict['neutral']} Neutral</span>", unsafe_allow_html=True)
80
+ else:
81
+ st.write("Please enter a text or select an example to predict")
82
+
83
+
84
+ st.markdown("""---""")
85
+ st.caption("""
86
+ Developed by Applied NLP Research Lab
87
+ School of Digital Sciences,
88
+ Kerala University of Digital Sciences, Innovation and Technology,
89
+ Technopark phase 4, Thiruvananthapuram, India |
90
91
+ <span style='text-align:center; display:block;'>
92
+ https://sites.google.com/duk.ac.in/anlprl
93
+ </span>
94
+ """, unsafe_allow_html=True)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ torch
4
+ transformers
5
+ emoji