import streamlit as st import re import pandas as pd from transformers import pipeline from gensim.models import LdaModel from gensim.corpora import Dictionary # Function to preprocess text def text_preprocess(teks): teks = teks.lower() teks = re.sub("@[A-Za-z0-9_]+", " ", teks) teks = re.sub("#[A-Za-z0-9_]+", " ", teks) teks = re.sub(r"\\n", " ", teks) teks = teks.strip() teks = re.sub(r"http\S+", " ", teks) teks = re.sub(r"www.\S+", " ", teks) teks = re.sub("[^A-Za-z\s']", " ", teks) return teks # Function to perform inference and get the topic with the highest probability def get_highest_probability_topic(lda_model, dictionary, new_document, topic_names): new_bow = dictionary.doc2bow(new_document.split()) topic_distribution = lda_model.get_document_topics(new_bow, minimum_probability=0) highest_probability_topic = max(topic_distribution, key=lambda x: x[1]) topic_id, probability = highest_probability_topic topic_name = topic_names.get(topic_id, f"Topic {topic_id}") return topic_name, probability # Load sentiment analysis model pretrained_name = "w11wo/indonesian-roberta-base-sentiment-classifier" nlp = pipeline("sentiment-analysis", model=pretrained_name, tokenizer=pretrained_name) # Streamlit app def main(): st.title("Sentiment Analysis and Topic Inference App") st.write("Enter your text below:") input_text = st.text_area("Input Text") if st.button("Analyze Sentiment"): processed_text = text_preprocess(input_text) result = nlp(processed_text) sentiment = result[0]['label'] probability = result[0]['score'] st.write("Sentiment:", sentiment) st.write("Probability:", probability) if st.button("Infer Topic"): lda_model = LdaModel.load("lda.model") dictionary = Dictionary.load("dictionary.dict") topic_names = {0: 'Kurang Memuaskan', 1: 'Aplikasi Lambat', 2: 'Aplikasi Error', 3: 'Sulit Sinkronisasi', 4: 'Tidak Bisa Login', 5: 'Aplikasi Sulit Dibuka', 6: 'Aplikasi Keseringan Update', 7: 'Neutral', 8: 'Aplikasi Bug', 9: 'Pelayanan Buruk', 10: 'Aplikasi Tidak Bisa Digunakan', 11: 'Aplikasi Belum Update', 12: 'Aplikasi Bug/Lag', 13: 'Sulit Komplain', 14: 'Gangguan Server', 15: 'Tidak Bisa Update', 16: 'Tidak Bisa Download', 17: 'Jaringan Bermasalah', 18: 'Transaksi Lambat', 19: 'Tidak Bisa Buka Aplikasi', 20: 'Terlalu Banyak Iklan', 21: 'Verifikasi Wajah Gagal', 22: 'Pengajuan Pinjaman', 23: 'Sms Kode Otp Tidak Masuk', 24: 'Sulit Pengajuan Pinjaman', 25: 'Tidak Bisa Transaksi / Lambat', 26: 'Sulit Daftar', 27: 'Sulit Transfer', 28: 'Banyak Potongan', 29: 'Tidak Bisa Cek Mutasi / Mutasi Hilang', 30: 'Proses Kta Lama', 31: 'Aplikasi Tidak Real Time', 32: 'Kesulitan Pengajuan Kartu Kredit', 33: 'Mesin Atm Error', } inferred_topic, inferred_probability = get_highest_probability_topic(lda_model, dictionary, input_text, topic_names) st.write("Inferred Topic:", inferred_topic) st.write("Inference Probability:", inferred_probability) if __name__ == "__main__": main()