zakihassan04 commited on
Commit
e896382
·
verified ·
1 Parent(s): 12431dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -7
app.py CHANGED
@@ -26,25 +26,28 @@ class SomaliQA:
26
  return parts[0].replace("Su'aal:", "").strip(), parts[1].strip()
27
  return None, None
28
 
 
 
 
29
  def answer(self, user_question):
30
  if not user_question.strip().endswith("?"):
31
  user_question += "?"
32
 
33
- cleaned_question = user_question.strip().rstrip("?")
34
 
35
  # Step 1: Exact match
36
  for text in self.texts:
37
  su_aal, jawaab = self.extract_qa(text)
38
- if su_aal and cleaned_question.lower() == su_aal.lower():
39
- return jawaab # ✅ Return exact answer from dataset
40
 
41
  # Step 2: Semantic match
42
- user_emb = self.embedder.encode(cleaned_question, convert_to_tensor=True)
43
  hits = util.semantic_search(user_emb, self.embeddings, top_k=1)
44
  if hits and len(hits[0]) > 0:
45
  idx = hits[0][0]['corpus_id']
46
  su_aal, jawaab = self.extract_qa(self.texts[idx])
47
- return jawaab # ✅ Return answer from dataset (not generated)
48
 
49
  return "Ma helin jawaab ku habboon su’aashaada."
50
 
@@ -60,7 +63,7 @@ gr.Interface(
60
  fn=qa_interface,
61
  inputs="text",
62
  outputs="text",
63
- title="Somali QA Chatbot (Dataset-based)",
64
- description="Weydii su’aal la xiriirta beeralaydajawaabta waxa laga soo saaraa dataset-kaaga (GPT2 fine-tuned).",
65
  theme="compact"
66
  ).launch()
 
26
  return parts[0].replace("Su'aal:", "").strip(), parts[1].strip()
27
  return None, None
28
 
29
+ def clean_text(self, text):
30
+ return text.strip().lower().rstrip("?").replace("’", "'").replace(" ", " ")
31
+
32
  def answer(self, user_question):
33
  if not user_question.strip().endswith("?"):
34
  user_question += "?"
35
 
36
+ user_clean = self.clean_text(user_question)
37
 
38
  # Step 1: Exact match
39
  for text in self.texts:
40
  su_aal, jawaab = self.extract_qa(text)
41
+ if su_aal and user_clean == self.clean_text(su_aal):
42
+ return jawaab # ✅ Return exact dataset answer
43
 
44
  # Step 2: Semantic match
45
+ user_emb = self.embedder.encode(user_clean, convert_to_tensor=True)
46
  hits = util.semantic_search(user_emb, self.embeddings, top_k=1)
47
  if hits and len(hits[0]) > 0:
48
  idx = hits[0][0]['corpus_id']
49
  su_aal, jawaab = self.extract_qa(self.texts[idx])
50
+ return jawaab # ✅ Return semantically matched answer
51
 
52
  return "Ma helin jawaab ku habboon su’aashaada."
53
 
 
63
  fn=qa_interface,
64
  inputs="text",
65
  outputs="text",
66
+ title="Somali GPT-2 QA System (Dataset-based)",
67
+ description="Weydii su’aal ku saabsan beerahawaxaad helaysaa jawaab sax ah oo laga soo qaaday dataset-kaaga.",
68
  theme="compact"
69
  ).launch()