Spaces:

kambris
/

SoLProject

Runtime error

File size: 2,256 Bytes

4b4bf72
 
b3d1640
4b4bf72
b3d1640
4b4bf72
b3d1640
 
 
4b4bf72
b3d1640
 
 
 
 
 
 
 
 
 
 
 
4b4bf72
 
 
 
 
 
 
 
 
 
 
b3d1640
4b4bf72
 
 
b3d1640
 
4b4bf72
b3d1640
4b4bf72
 
 
 
 
 
 
 
 
b3d1640
 
4b4bf72

import streamlit as st
import pandas as pd
from transformers import T5Tokenizer, T5ForConditionalGeneration, pipeline
from bertopic import BERTopic
import torch

# Initialize ARAT5 model and tokenizer for topic modeling
tokenizer = T5Tokenizer.from_pretrained("UBC-NLP/araT5-base")
model = T5ForConditionalGeneration.from_pretrained("UBC-NLP/araT5-base")

# Emotion classification pipeline for Arabic (use an Arabic emotion classification model)
emotion_classifier = pipeline("text-classification", model="d0r13n/ara-bert-base-arabic-emotion")

# Function to get embeddings from ARAT5 for topic modeling
def generate_embeddings(texts):
    # Tokenize the Arabic text for ARAT5
    inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=512)
    with torch.no_grad():
        outputs = model.encoder(input_ids=inputs['input_ids'])
    return outputs.last_hidden_state.mean(dim=1).numpy()

# Function to process the CSV file and return emotion and topic model
def process_file(uploaded_file):
    # Load CSV
    df = pd.read_csv(uploaded_file)
    
    # Display basic info about the CSV
    st.write("CSV Loaded Successfully!")
    st.write(f"Data Preview: {df.head()}")
    
    # Preprocess the text: assuming the CSV has a 'text' column
    texts = df['text'].dropna().tolist()  # Modify this according to your column name

    # Emotion Classification: Classify emotions for each text (Arabic)
    emotions = [emotion_classifier(text)[0]['label'] for text in texts]
    df['emotion'] = emotions
    
    # Topic Modeling using ARAT5 embeddings
    embeddings = generate_embeddings(texts)
    topic_model = BERTopic()
    topics, _ = topic_model.fit_transform(embeddings)
    df['topic'] = topics
    
    # Display the results
    st.write("Emotions classified for each entry:")
    st.write(df[['text', 'emotion', 'topic']])
    
    return df

# Streamlit App
st.title("Arabic Topic Modeling & Emotion Classification with ARAT5")
st.write("Upload a CSV file to perform topic modeling and emotion classification on Arabic text.")

# File upload widget
uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])

if uploaded_file is not None:
    # Process the file
    result_df = process_file(uploaded_file)