Aman72321 commited on
Commit
206bb87
1 Parent(s): 58937e8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -52
app.py CHANGED
@@ -1,22 +1,16 @@
1
- """
2
- Sentiment analysis pipeline for texts in multiple languages.
3
- """
4
-
5
  import gc
6
  from collections import defaultdict
7
- import lingua
8
- from transformers import pipeline
9
  import torch
 
10
  from lingua import Language, LanguageDetectorBuilder
11
 
12
-
13
  __version__ = "0.1.0"
14
 
15
  if torch.cuda.is_available():
16
- device_tag = 0 # first gpu
17
  else:
18
- device_tag = -1 # cpu
19
-
20
 
21
  default_models = {
22
  Language.ENGLISH: "lxyuan/distilbert-base-multilingual-cased-sentiments-student",
@@ -37,29 +31,19 @@ default_models = {
37
  language_detector = LanguageDetectorBuilder.from_all_languages().build()
38
 
39
 
40
-
41
- # Processing a batch:
42
- # Detect languages into a list and map to models
43
- # For each model, make a pipeline, make a list and process
44
- # inject int a list in the original order
45
-
46
  def split_message(message, max_length):
47
  """ Split a message into a list of chunks of given maximum size. """
48
- return [message[i: i+max_length] for i in range(0, len(message), max_length)]
49
 
50
 
51
- def process_messages_in_batches(
52
- messages_with_languages,
53
- models = None,
54
- max_length = 512
55
- ):
56
  """
57
  Process messages in batches, creating only one pipeline at a time, and maintain the original order.
58
-
59
  Params:
60
  messages_with_languages: list of tuples, each containing a message and its detected language
61
  models: dict, model paths indexed by Language
62
-
63
  Returns:
64
  OrderedDict: containing the index as keys and tuple of (message, sentiment result) as values
65
  """
@@ -81,7 +65,7 @@ def process_messages_in_batches(
81
  messages_by_model[model_name].append((index, message))
82
  else:
83
  results[index] = {"label": "none", "score": 0}
84
-
85
  # Process messages and maintain original order
86
  for model_name, batch in messages_by_model.items():
87
  sentiment_pipeline = pipeline(model=model_name, device=device_tag)
@@ -96,7 +80,7 @@ def process_messages_in_batches(
96
  message_map[idx].append(len(chunks) - 1)
97
  else:
98
  message_map[idx] = [len(chunks) - 1]
99
-
100
  chunk_sentiments = sentiment_pipeline(chunks)
101
 
102
  for idx, chunk_indices in message_map.items():
@@ -115,7 +99,7 @@ def process_messages_in_batches(
115
  # Force garbage collections to remove the model from memory
116
  del sentiment_pipeline
117
  gc.collect()
118
-
119
  # Unify common spellings of the labels
120
  for i in range(len(results)):
121
  results[i]["label"] = results[i]["label"].lower()
@@ -134,11 +118,11 @@ def sentiment(messages, models=None):
134
  the user can provide a model for a given language in the models
135
  dictionary. The keys for this dictionary are lingua.Language objects
136
  and items HuggingFace model paths.
137
-
138
  Params:
139
  messages: list of message strings
140
  models: dict, huggingface model paths indexed by lingua.Language
141
-
142
  Returns:
143
  OrderedDict: containing the index as keys and tuple of (message, sentiment result) as values
144
  """
@@ -147,26 +131,24 @@ def sentiment(messages, models=None):
147
  ]
148
 
149
  results = process_messages_in_batches(messages_with_languages, models)
150
- return results
151
-
152
-
153
- messages = [
154
- "I love this product! It's amazing!",
155
- "This movie was terrible. I regret watching it.",
156
- "今日はいい天気ですね。",
157
- "Je suis très content de votre service.",
158
- "Este restaurante tiene una comida deliciosa.",
159
- "خدمة المطعم كانت محبطة",
160
- "أنا سعيد"
161
- # Add more messages as needed
162
- ]
163
- results = sentiment(messages)
164
-
165
- # Analyze results
166
- for idx, result in enumerate(results):
167
- message = messages[idx]
168
- sentiment_label = result["label"]
169
- sentiment_score = result["score"]
170
- print(f"Message: {message}")
171
- print(f"Sentiment: {sentiment_label} (Score: {sentiment_score})")
172
- print()
 
1
+ import streamlit as st
 
 
 
2
  import gc
3
  from collections import defaultdict
 
 
4
  import torch
5
+ from transformers import pipeline
6
  from lingua import Language, LanguageDetectorBuilder
7
 
 
8
  __version__ = "0.1.0"
9
 
10
  if torch.cuda.is_available():
11
+ device_tag = 0 # first gpu
12
  else:
13
+ device_tag = -1 # cpu
 
14
 
15
  default_models = {
16
  Language.ENGLISH: "lxyuan/distilbert-base-multilingual-cased-sentiments-student",
 
31
  language_detector = LanguageDetectorBuilder.from_all_languages().build()
32
 
33
 
 
 
 
 
 
 
34
  def split_message(message, max_length):
35
  """ Split a message into a list of chunks of given maximum size. """
36
+ return [message[i: i + max_length] for i in range(0, len(message), max_length)]
37
 
38
 
39
+ def process_messages_in_batches(messages_with_languages, models=None, max_length=512):
 
 
 
 
40
  """
41
  Process messages in batches, creating only one pipeline at a time, and maintain the original order.
42
+
43
  Params:
44
  messages_with_languages: list of tuples, each containing a message and its detected language
45
  models: dict, model paths indexed by Language
46
+
47
  Returns:
48
  OrderedDict: containing the index as keys and tuple of (message, sentiment result) as values
49
  """
 
65
  messages_by_model[model_name].append((index, message))
66
  else:
67
  results[index] = {"label": "none", "score": 0}
68
+
69
  # Process messages and maintain original order
70
  for model_name, batch in messages_by_model.items():
71
  sentiment_pipeline = pipeline(model=model_name, device=device_tag)
 
80
  message_map[idx].append(len(chunks) - 1)
81
  else:
82
  message_map[idx] = [len(chunks) - 1]
83
+
84
  chunk_sentiments = sentiment_pipeline(chunks)
85
 
86
  for idx, chunk_indices in message_map.items():
 
99
  # Force garbage collections to remove the model from memory
100
  del sentiment_pipeline
101
  gc.collect()
102
+
103
  # Unify common spellings of the labels
104
  for i in range(len(results)):
105
  results[i]["label"] = results[i]["label"].lower()
 
118
  the user can provide a model for a given language in the models
119
  dictionary. The keys for this dictionary are lingua.Language objects
120
  and items HuggingFace model paths.
121
+
122
  Params:
123
  messages: list of message strings
124
  models: dict, huggingface model paths indexed by lingua.Language
125
+
126
  Returns:
127
  OrderedDict: containing the index as keys and tuple of (message, sentiment result) as values
128
  """
 
131
  ]
132
 
133
  results = process_messages_in_batches(messages_with_languages, models)
134
+ return results
135
+
136
+
137
+ def main():
138
+ st.title("Sentiment Analysis Pipeline")
139
+ messages_input = st.text_area("Enter your messages (one per line):", height=200)
140
+ messages = [message.strip() for message in messages_input.split('\n') if message.strip()]
141
+
142
+ if st.button("Analyze Sentiments"):
143
+ results = sentiment(messages)
144
+ st.write("## Results:")
145
+ for idx, result in enumerate(results):
146
+ message = messages[idx]
147
+ sentiment_label = result["label"]
148
+ sentiment_score = result["score"]
149
+ st.write(f"**Message:** {message}")
150
+ st.write(f"**Sentiment:** {sentiment_label.capitalize()} (Score: {sentiment_score:.2f})")
151
+
152
+
153
+ if __name__ == "__main__":
154
+ main()