NaimaAqeel commited on
Commit
60d9162
·
verified ·
1 Parent(s): ffef3e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -9
app.py CHANGED
@@ -60,19 +60,59 @@ def generate_with_gpt(prompt, max_length=150):
60
  )
61
  return gpt_tokenizer.decode(outputs[0], skip_special_tokens=True)
62
 
63
- def refine_answer_with_gpt(context, question, initial_answer):
64
- prompt = f"""
65
- Based on the following context, refine the answer to make it more clear and complete:
 
 
 
 
66
 
67
- Context: {context}
 
 
68
 
69
- Question: {question}
 
70
 
71
- Initial Answer: {initial_answer}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
- Improved Answer:
74
- """
75
- return generate_with_gpt(prompt)
76
 
77
  def extract_direct_definition(text, term):
78
  """Try to find a sentence that directly defines the term"""
 
60
  )
61
  return gpt_tokenizer.decode(outputs[0], skip_special_tokens=True)
62
 
63
+ def ask_question(file, question, history):
64
+ if not file:
65
+ return "Please upload a file.", history
66
+
67
+ text = extract_text(file)
68
+ if not text:
69
+ return "Could not extract text from the file.", history
70
 
71
+ chunks = chunk_text(text)
72
+ if not chunks:
73
+ return "No meaningful text chunks could be created.", history
74
 
75
+ # Initialize answer as None
76
+ answer = None
77
 
78
+ try:
79
+ # Normalize question for better matching
80
+ normalized_question = question.lower().strip(" ?")
81
+
82
+ # First try to find direct definitions
83
+ if "artificial system" in normalized_question:
84
+ answer = extract_direct_definition(text, "artificial system")
85
+ elif "natural system" in normalized_question:
86
+ answer = extract_direct_definition(text, "natural system")
87
+ elif "component" in normalized_question:
88
+ answer = extract_direct_definition(text, "component")
89
+
90
+ # If no direct definition found, use semantic search
91
+ if not answer:
92
+ emb_chunks = embedder.encode(chunks, convert_to_tensor=True)
93
+ emb_question = embedder.encode(question, convert_to_tensor=True)
94
+ scores = util.pytorch_cos_sim(emb_question, emb_chunks)[0]
95
+ best_idx = scores.argmax().item()
96
+ best_chunk = chunks[best_idx]
97
+
98
+ if scores[best_idx] < 0.3: # Low confidence
99
+ top_k = min(3, len(chunks))
100
+ best_indices = scores.topk(top_k).indices.tolist()
101
+ best_chunk = " ".join([chunks[i] for i in best_indices])
102
+
103
+ result = qa_pipeline(question=question, context=best_chunk)
104
+ if result["score"] > 0.1 and len(result["answer"].split()) >= 2:
105
+ answer = result["answer"]
106
+
107
+ # Final fallback if no answer found
108
+ if not answer:
109
+ answer = "Sorry, I couldn't find a clear answer in the document."
110
+
111
+ except Exception as e:
112
+ answer = f"An error occurred: {str(e)}"
113
 
114
+ history.append((question, answer))
115
+ return "", history
 
116
 
117
  def extract_direct_definition(text, term):
118
  """Try to find a sentence that directly defines the term"""