import streamlit as st from transformers import pipeline import pandas as pd # Load the Question Answering model @st.cache_resource def load_qa_pipeline(): """Load the QA pipeline with Rifky/Indobert-QA model.""" return pipeline("question-answering", model="Rifky/Indobert-QA", tokenizer="Rifky/Indobert-QA") qa_pipeline = load_qa_pipeline() # Load SOP Dataset @st.cache_data def load_sop_dataset(): """Load SOP dataset from CSV.""" return pd.read_csv("dataset.csv") # Ensure this file is uploaded to your Hugging Face Space dataset = load_sop_dataset() # Utility function to find the most relevant context def find_best_context(question, dataset): """Find the single best context for a given question.""" best_score = 0 best_context = None for _, row in dataset.iterrows(): context_text = row['text'] overlap = len(set(question.lower().split()) & set(context_text.lower().split())) if overlap > best_score: best_score = overlap best_context = context_text return best_context # Streamlit UI st.title("Sistem Penjawab Pertanyaan SOP dengan IndoBERT") st.markdown("Ajukan pertanyaan seputar Prosedur Operasional Standar:") # User input question = st.text_area("Masukkan pertanyaan Anda:", "") # Generate answer if st.button("Dapatkan Jawaban"): if question: with st.spinner("Menemukan konteks yang paling relevan..."): context = find_best_context(question, dataset) if context: with st.spinner("Menjawab pertanyaan Anda..."): result = qa_pipeline(question=question, context=context) st.success("Jawaban:") st.write(result["answer"]) st.write("Skor Keyakinan:", result["score"]) else: st.warning("Konteks yang relevan tidak ditemukan. Silakan coba pertanyaan lain.") else: st.warning("Silakan masukkan pertanyaan.")