Spaces:

kambris
/

SoLProject

Runtime error

App Files Files Community

SoLProject / app.py

kambris

Update app.py

b3d1640 verified 8 months ago

raw

history blame

2.26 kB

	import streamlit as st
	import pandas as pd
	from transformers import T5Tokenizer, T5ForConditionalGeneration, pipeline
	from bertopic import BERTopic
	import torch

	# Initialize ARAT5 model and tokenizer for topic modeling
	tokenizer = T5Tokenizer.from_pretrained("UBC-NLP/araT5-base")
	model = T5ForConditionalGeneration.from_pretrained("UBC-NLP/araT5-base")

	# Emotion classification pipeline for Arabic (use an Arabic emotion classification model)
	emotion_classifier = pipeline("text-classification", model="d0r13n/ara-bert-base-arabic-emotion")

	# Function to get embeddings from ARAT5 for topic modeling
	def generate_embeddings(texts):
	# Tokenize the Arabic text for ARAT5
	inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=512)
	with torch.no_grad():
	outputs = model.encoder(input_ids=inputs['input_ids'])
	return outputs.last_hidden_state.mean(dim=1).numpy()

	# Function to process the CSV file and return emotion and topic model
	def process_file(uploaded_file):
	# Load CSV
	df = pd.read_csv(uploaded_file)

	# Display basic info about the CSV
	st.write("CSV Loaded Successfully!")
	st.write(f"Data Preview: {df.head()}")

	# Preprocess the text: assuming the CSV has a 'text' column
	texts = df['text'].dropna().tolist() # Modify this according to your column name

	# Emotion Classification: Classify emotions for each text (Arabic)
	emotions = [emotion_classifier(text)[0]['label'] for text in texts]
	df['emotion'] = emotions

	# Topic Modeling using ARAT5 embeddings
	embeddings = generate_embeddings(texts)
	topic_model = BERTopic()
	topics, _ = topic_model.fit_transform(embeddings)
	df['topic'] = topics

	# Display the results
	st.write("Emotions classified for each entry:")
	st.write(df[['text', 'emotion', 'topic']])

	return df

	# Streamlit App
	st.title("Arabic Topic Modeling & Emotion Classification with ARAT5")
	st.write("Upload a CSV file to perform topic modeling and emotion classification on Arabic text.")

	# File upload widget
	uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])

	if uploaded_file is not None:
	# Process the file
	result_df = process_file(uploaded_file)