Spaces:

kambris
/

SoLProject

Runtime error

App Files Files Community

kambris commited on Nov 7, 2024

Commit

b3d1640

verified ·

1 Parent(s): fa7767f

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -10

app.py CHANGED Viewed

@@ -1,13 +1,25 @@
 import streamlit as st
 import pandas as pd
-from sklearn.feature_extraction.text import CountVectorizer
-from transformers import pipeline
 from bertopic import BERTopic
-# Emotion classification pipeline (can use AraBERT or any emotion classifier)
-emotion_classifier = pipeline("text-classification", model="arpanghoshal/bert-base-uncased-emotion")
-# Function to process CSV file and return emotion and topic model
 def process_file(uploaded_file):
     # Load CSV
     df = pd.read_csv(uploaded_file)
@@ -19,13 +31,14 @@ def process_file(uploaded_file):
     # Preprocess the text: assuming the CSV has a 'text' column
     texts = df['text'].dropna().tolist()  # Modify this according to your column name
-    # Emotion Classification: Classify emotions for each text
     emotions = [emotion_classifier(text)[0]['label'] for text in texts]
     df['emotion'] = emotions
-    # Topic Modeling using BERTopic (install bertopic first if not installed)
     topic_model = BERTopic()
-    topics, _ = topic_model.fit_transform(texts)
     df['topic'] = topics
     # Display the results
@@ -35,8 +48,8 @@ def process_file(uploaded_file):
     return df
 # Streamlit App
-st.title("Topic Modeling & Emotion Classification")
-st.write("Upload a CSV file to perform topic modeling and emotion classification on the text.")
 # File upload widget
 uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])

 import streamlit as st
 import pandas as pd
+from transformers import T5Tokenizer, T5ForConditionalGeneration, pipeline
 from bertopic import BERTopic
+import torch
+# Initialize ARAT5 model and tokenizer for topic modeling
+tokenizer = T5Tokenizer.from_pretrained("UBC-NLP/araT5-base")
+model = T5ForConditionalGeneration.from_pretrained("UBC-NLP/araT5-base")
+# Emotion classification pipeline for Arabic (use an Arabic emotion classification model)
+emotion_classifier = pipeline("text-classification", model="d0r13n/ara-bert-base-arabic-emotion")
+# Function to get embeddings from ARAT5 for topic modeling
+def generate_embeddings(texts):
+    # Tokenize the Arabic text for ARAT5
+    inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=512)
+    with torch.no_grad():
+        outputs = model.encoder(input_ids=inputs['input_ids'])
+    return outputs.last_hidden_state.mean(dim=1).numpy()
+# Function to process the CSV file and return emotion and topic model
 def process_file(uploaded_file):
     # Load CSV
     df = pd.read_csv(uploaded_file)
     # Preprocess the text: assuming the CSV has a 'text' column
     texts = df['text'].dropna().tolist()  # Modify this according to your column name
+    # Emotion Classification: Classify emotions for each text (Arabic)
     emotions = [emotion_classifier(text)[0]['label'] for text in texts]
     df['emotion'] = emotions
+    # Topic Modeling using ARAT5 embeddings
+    embeddings = generate_embeddings(texts)
     topic_model = BERTopic()
+    topics, _ = topic_model.fit_transform(embeddings)
     df['topic'] = topics
     # Display the results
     return df
 # Streamlit App
+st.title("Arabic Topic Modeling & Emotion Classification with ARAT5")
+st.write("Upload a CSV file to perform topic modeling and emotion classification on Arabic text.")
 # File upload widget
 uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])