dhanikitkat's picture
Update app.py
6b5839d verified
import streamlit as st
import re
import pandas as pd
from transformers import pipeline
from gensim.models import LdaModel
from gensim.corpora import Dictionary
# Function to preprocess text
def text_preprocess(teks):
teks = teks.lower()
teks = re.sub("@[A-Za-z0-9_]+", " ", teks)
teks = re.sub("#[A-Za-z0-9_]+", " ", teks)
teks = re.sub(r"\\n", " ", teks)
teks = teks.strip()
teks = re.sub(r"http\S+", " ", teks)
teks = re.sub(r"www.\S+", " ", teks)
teks = re.sub("[^A-Za-z\s']", " ", teks)
return teks
# Function to perform inference and get the topic with the highest probability
def get_highest_probability_topic(lda_model, dictionary, new_document, topic_names):
new_bow = dictionary.doc2bow(new_document.split())
topic_distribution = lda_model.get_document_topics(new_bow, minimum_probability=0)
highest_probability_topic = max(topic_distribution, key=lambda x: x[1])
topic_id, probability = highest_probability_topic
topic_name = topic_names.get(topic_id, f"Topic {topic_id}")
return topic_name, probability
# Load sentiment analysis model
pretrained_name = "w11wo/indonesian-roberta-base-sentiment-classifier"
nlp = pipeline("sentiment-analysis", model=pretrained_name, tokenizer=pretrained_name)
# Streamlit app
def main():
st.title("Sentiment Analysis and Topic Inference App")
st.write("Enter your text below:")
input_text = st.text_area("Input Text")
if st.button("Analyze Sentiment"):
processed_text = text_preprocess(input_text)
result = nlp(processed_text)
sentiment = result[0]['label']
probability = result[0]['score']
st.write("Sentiment:", sentiment)
st.write("Probability:", probability)
if st.button("Infer Topic"):
lda_model = LdaModel.load("lda.model")
dictionary = Dictionary.load("dictionary.dict")
topic_names = {0: 'Kurang Memuaskan',
1: 'Aplikasi Lambat',
2: 'Aplikasi Error',
3: 'Sulit Sinkronisasi',
4: 'Tidak Bisa Login',
5: 'Aplikasi Sulit Dibuka',
6: 'Aplikasi Keseringan Update',
7: 'Neutral',
8: 'Aplikasi Bug',
9: 'Pelayanan Buruk',
10: 'Aplikasi Tidak Bisa Digunakan',
11: 'Aplikasi Belum Update',
12: 'Aplikasi Bug/Lag',
13: 'Sulit Komplain',
14: 'Gangguan Server',
15: 'Tidak Bisa Update',
16: 'Tidak Bisa Download',
17: 'Jaringan Bermasalah',
18: 'Transaksi Lambat',
19: 'Tidak Bisa Buka Aplikasi',
20: 'Terlalu Banyak Iklan',
21: 'Verifikasi Wajah Gagal',
22: 'Pengajuan Pinjaman',
23: 'Sms Kode Otp Tidak Masuk',
24: 'Sulit Pengajuan Pinjaman',
25: 'Tidak Bisa Transaksi / Lambat',
26: 'Sulit Daftar',
27: 'Sulit Transfer',
28: 'Banyak Potongan',
29: 'Tidak Bisa Cek Mutasi / Mutasi Hilang',
30: 'Proses Kta Lama',
31: 'Aplikasi Tidak Real Time',
32: 'Kesulitan Pengajuan Kartu Kredit',
33: 'Mesin Atm Error',
}
inferred_topic, inferred_probability = get_highest_probability_topic(lda_model, dictionary, input_text, topic_names)
st.write("Inferred Topic:", inferred_topic)
st.write("Inference Probability:", inferred_probability)
if __name__ == "__main__":
main()