Spaces:

huriacane33
/

QA

Sleeping

App Files Files Community

huriacane33 commited on Dec 20, 2024

Commit

09ded81

verified ·

1 Parent(s): 7b1acfd

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -21

app.py CHANGED Viewed

@@ -2,51 +2,63 @@ import streamlit as st
 from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
 import pandas as pd
-# Memuat model dan tokenizer IndoBERT
 @st.cache_resource
-def load_indobert_model():
-    model_name = "indobenchmark/indobert-base-p1"
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModelForQuestionAnswering.from_pretrained(model_name)
     return pipeline("question-answering", model=model, tokenizer=tokenizer)
-qa_pipeline = load_indobert_model()
-# Memuat dataset SOP
 @st.cache_data
 def load_sop_dataset():
-    return pd.read_csv("dataset.csv")
 dataset = load_sop_dataset()
-# Fungsi untuk menemukan konteks terbaik
 def find_best_context(question, dataset):
     best_score = 0
     best_context = None
-    for _, row in dataset.iterrows():
-        overlap = len(set(question.lower().split()) & set(row["text"].lower().split()))
         if overlap > best_score:
             best_score = overlap
-            best_context = row["text"]
     return best_context
-# Antarmuka Streamlit
-st.title("Sistem Penjawab Pertanyaan SOP dengan IndoBERT")
-st.markdown("Ajukan pertanyaan seputar Prosedur Operasional Standar:")
-question = st.text_area("Masukkan pertanyaan Anda:", "")
-if st.button("Dapatkan Jawaban"):
     if question:
-        with st.spinner("Mencari konteks yang relevan..."):
             context = find_best_context(question, dataset)
             if context:
-                with st.spinner("Menjawab pertanyaan Anda..."):
                     result = qa_pipeline(question=question, context=context)
-                    st.success("Jawaban:")
                     st.write(result["answer"])
-                    st.write("Skor Keyakinan:", result["score"])
             else:
-                st.warning("Konteks yang relevan tidak ditemukan. Silakan coba pertanyaan lain.")
     else:
-        st.warning("Silakan masukkan pertanyaan.")

 from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
 import pandas as pd
+# Load the Question Answering model
 @st.cache_resource
+def load_qa_pipeline():
+    """Load the QA pipeline with deepset/roberta-base-squad2 model."""
+    model_name = "deepset/roberta-base-squad2"
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModelForQuestionAnswering.from_pretrained(model_name)
     return pipeline("question-answering", model=model, tokenizer=tokenizer)
+qa_pipeline = load_qa_pipeline()
+# Load SOP Dataset
 @st.cache_data
 def load_sop_dataset():
+    """Load SOP dataset from CSV."""
+    return pd.read_csv("dataset.csv")  # Ensure this file is uploaded to your Hugging Face Space
 dataset = load_sop_dataset()
+# Utility function to find the most relevant context
 def find_best_context(question, dataset):
+    """Find the single best context for a given question."""
     best_score = 0
     best_context = None
+    for index, row in dataset.iterrows():
+        # Access the 'text' column in the row
+        context_text = row['text']
+        # Simple heuristic: Count the number of overlapping words
+        overlap = len(set(question.lower().split()) & set(context_text.lower().split()))
         if overlap > best_score:
             best_score = overlap
+            best_context = context_text
     return best_context
+# Streamlit UI
+st.title("SOP Question Answering AI")
+st.markdown("Ask any question about Standard Operating Procedures:")
+# User input
+question = st.text_area("Enter your question:", "")
+# Generate answer
+if st.button("Get Answer"):
     if question:
+        with st.spinner("Finding the best context..."):
+            # Automatically find the most relevant context
             context = find_best_context(question, dataset)
             if context:
+                with st.spinner("Answering your question..."):
                     result = qa_pipeline(question=question, context=context)
+                    st.success("Answer:")
                     st.write(result["answer"])
+                    st.write("Confidence Score:", result["score"])
             else:
+                st.warning("No relevant context found. Please try rephrasing your question.")
     else:
+        st.warning("Please enter a question.")