kambris commited on
Commit
b3d1640
·
verified ·
1 Parent(s): fa7767f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -10
app.py CHANGED
@@ -1,13 +1,25 @@
1
  import streamlit as st
2
  import pandas as pd
3
- from sklearn.feature_extraction.text import CountVectorizer
4
- from transformers import pipeline
5
  from bertopic import BERTopic
 
6
 
7
- # Emotion classification pipeline (can use AraBERT or any emotion classifier)
8
- emotion_classifier = pipeline("text-classification", model="arpanghoshal/bert-base-uncased-emotion")
 
9
 
10
- # Function to process CSV file and return emotion and topic model
 
 
 
 
 
 
 
 
 
 
 
11
  def process_file(uploaded_file):
12
  # Load CSV
13
  df = pd.read_csv(uploaded_file)
@@ -19,13 +31,14 @@ def process_file(uploaded_file):
19
  # Preprocess the text: assuming the CSV has a 'text' column
20
  texts = df['text'].dropna().tolist() # Modify this according to your column name
21
 
22
- # Emotion Classification: Classify emotions for each text
23
  emotions = [emotion_classifier(text)[0]['label'] for text in texts]
24
  df['emotion'] = emotions
25
 
26
- # Topic Modeling using BERTopic (install bertopic first if not installed)
 
27
  topic_model = BERTopic()
28
- topics, _ = topic_model.fit_transform(texts)
29
  df['topic'] = topics
30
 
31
  # Display the results
@@ -35,8 +48,8 @@ def process_file(uploaded_file):
35
  return df
36
 
37
  # Streamlit App
38
- st.title("Topic Modeling & Emotion Classification")
39
- st.write("Upload a CSV file to perform topic modeling and emotion classification on the text.")
40
 
41
  # File upload widget
42
  uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])
 
1
  import streamlit as st
2
  import pandas as pd
3
+ from transformers import T5Tokenizer, T5ForConditionalGeneration, pipeline
 
4
  from bertopic import BERTopic
5
+ import torch
6
 
7
+ # Initialize ARAT5 model and tokenizer for topic modeling
8
+ tokenizer = T5Tokenizer.from_pretrained("UBC-NLP/araT5-base")
9
+ model = T5ForConditionalGeneration.from_pretrained("UBC-NLP/araT5-base")
10
 
11
+ # Emotion classification pipeline for Arabic (use an Arabic emotion classification model)
12
+ emotion_classifier = pipeline("text-classification", model="d0r13n/ara-bert-base-arabic-emotion")
13
+
14
+ # Function to get embeddings from ARAT5 for topic modeling
15
+ def generate_embeddings(texts):
16
+ # Tokenize the Arabic text for ARAT5
17
+ inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=512)
18
+ with torch.no_grad():
19
+ outputs = model.encoder(input_ids=inputs['input_ids'])
20
+ return outputs.last_hidden_state.mean(dim=1).numpy()
21
+
22
+ # Function to process the CSV file and return emotion and topic model
23
  def process_file(uploaded_file):
24
  # Load CSV
25
  df = pd.read_csv(uploaded_file)
 
31
  # Preprocess the text: assuming the CSV has a 'text' column
32
  texts = df['text'].dropna().tolist() # Modify this according to your column name
33
 
34
+ # Emotion Classification: Classify emotions for each text (Arabic)
35
  emotions = [emotion_classifier(text)[0]['label'] for text in texts]
36
  df['emotion'] = emotions
37
 
38
+ # Topic Modeling using ARAT5 embeddings
39
+ embeddings = generate_embeddings(texts)
40
  topic_model = BERTopic()
41
+ topics, _ = topic_model.fit_transform(embeddings)
42
  df['topic'] = topics
43
 
44
  # Display the results
 
48
  return df
49
 
50
  # Streamlit App
51
+ st.title("Arabic Topic Modeling & Emotion Classification with ARAT5")
52
+ st.write("Upload a CSV file to perform topic modeling and emotion classification on Arabic text.")
53
 
54
  # File upload widget
55
  uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])