huriacane33 commited on
Commit
09ded81
·
verified ·
1 Parent(s): 7b1acfd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -21
app.py CHANGED
@@ -2,51 +2,63 @@ import streamlit as st
2
  from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
3
  import pandas as pd
4
 
5
- # Memuat model dan tokenizer IndoBERT
6
  @st.cache_resource
7
- def load_indobert_model():
8
- model_name = "indobenchmark/indobert-base-p1"
 
9
  tokenizer = AutoTokenizer.from_pretrained(model_name)
10
  model = AutoModelForQuestionAnswering.from_pretrained(model_name)
11
  return pipeline("question-answering", model=model, tokenizer=tokenizer)
12
 
13
- qa_pipeline = load_indobert_model()
14
 
15
- # Memuat dataset SOP
16
  @st.cache_data
17
  def load_sop_dataset():
18
- return pd.read_csv("dataset.csv")
 
19
 
20
  dataset = load_sop_dataset()
21
 
22
- # Fungsi untuk menemukan konteks terbaik
23
  def find_best_context(question, dataset):
 
24
  best_score = 0
25
  best_context = None
26
- for _, row in dataset.iterrows():
27
- overlap = len(set(question.lower().split()) & set(row["text"].lower().split()))
 
 
 
 
28
  if overlap > best_score:
29
  best_score = overlap
30
- best_context = row["text"]
 
31
  return best_context
32
 
33
- # Antarmuka Streamlit
34
- st.title("Sistem Penjawab Pertanyaan SOP dengan IndoBERT")
35
- st.markdown("Ajukan pertanyaan seputar Prosedur Operasional Standar:")
36
 
37
- question = st.text_area("Masukkan pertanyaan Anda:", "")
 
38
 
39
- if st.button("Dapatkan Jawaban"):
 
40
  if question:
41
- with st.spinner("Mencari konteks yang relevan..."):
 
42
  context = find_best_context(question, dataset)
 
43
  if context:
44
- with st.spinner("Menjawab pertanyaan Anda..."):
45
  result = qa_pipeline(question=question, context=context)
46
- st.success("Jawaban:")
47
  st.write(result["answer"])
48
- st.write("Skor Keyakinan:", result["score"])
49
  else:
50
- st.warning("Konteks yang relevan tidak ditemukan. Silakan coba pertanyaan lain.")
51
  else:
52
- st.warning("Silakan masukkan pertanyaan.")
 
2
  from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
3
  import pandas as pd
4
 
5
+ # Load the Question Answering model
6
  @st.cache_resource
7
+ def load_qa_pipeline():
8
+ """Load the QA pipeline with deepset/roberta-base-squad2 model."""
9
+ model_name = "deepset/roberta-base-squad2"
10
  tokenizer = AutoTokenizer.from_pretrained(model_name)
11
  model = AutoModelForQuestionAnswering.from_pretrained(model_name)
12
  return pipeline("question-answering", model=model, tokenizer=tokenizer)
13
 
14
+ qa_pipeline = load_qa_pipeline()
15
 
16
+ # Load SOP Dataset
17
  @st.cache_data
18
  def load_sop_dataset():
19
+ """Load SOP dataset from CSV."""
20
+ return pd.read_csv("dataset.csv") # Ensure this file is uploaded to your Hugging Face Space
21
 
22
  dataset = load_sop_dataset()
23
 
24
+ # Utility function to find the most relevant context
25
  def find_best_context(question, dataset):
26
+ """Find the single best context for a given question."""
27
  best_score = 0
28
  best_context = None
29
+
30
+ for index, row in dataset.iterrows():
31
+ # Access the 'text' column in the row
32
+ context_text = row['text']
33
+ # Simple heuristic: Count the number of overlapping words
34
+ overlap = len(set(question.lower().split()) & set(context_text.lower().split()))
35
  if overlap > best_score:
36
  best_score = overlap
37
+ best_context = context_text
38
+
39
  return best_context
40
 
41
+ # Streamlit UI
42
+ st.title("SOP Question Answering AI")
43
+ st.markdown("Ask any question about Standard Operating Procedures:")
44
 
45
+ # User input
46
+ question = st.text_area("Enter your question:", "")
47
 
48
+ # Generate answer
49
+ if st.button("Get Answer"):
50
  if question:
51
+ with st.spinner("Finding the best context..."):
52
+ # Automatically find the most relevant context
53
  context = find_best_context(question, dataset)
54
+
55
  if context:
56
+ with st.spinner("Answering your question..."):
57
  result = qa_pipeline(question=question, context=context)
58
+ st.success("Answer:")
59
  st.write(result["answer"])
60
+ st.write("Confidence Score:", result["score"])
61
  else:
62
+ st.warning("No relevant context found. Please try rephrasing your question.")
63
  else:
64
+ st.warning("Please enter a question.")