Spaces:

NaimaAqeel
/

Chatbot

Running

App Files Files Community

NaimaAqeel commited on 10 days ago

Commit

60d9162

verified ·

1 Parent(s): ffef3e9

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -9

app.py CHANGED Viewed

@@ -60,19 +60,59 @@ def generate_with_gpt(prompt, max_length=150):
         )
     return gpt_tokenizer.decode(outputs[0], skip_special_tokens=True)
-def refine_answer_with_gpt(context, question, initial_answer):
-    prompt = f"""
-    Based on the following context, refine the answer to make it more clear and complete:
-    Context: {context}
-    Question: {question}
-    Initial Answer: {initial_answer}
-    Improved Answer:
-    """
-    return generate_with_gpt(prompt)
 def extract_direct_definition(text, term):
     """Try to find a sentence that directly defines the term"""

         )
     return gpt_tokenizer.decode(outputs[0], skip_special_tokens=True)
+def ask_question(file, question, history):
+    if not file:
+        return "Please upload a file.", history
+    text = extract_text(file)
+    if not text:
+        return "Could not extract text from the file.", history
+    chunks = chunk_text(text)
+    if not chunks:
+        return "No meaningful text chunks could be created.", history
+    # Initialize answer as None
+    answer = None
+    try:
+        # Normalize question for better matching
+        normalized_question = question.lower().strip(" ?")
+        # First try to find direct definitions
+        if "artificial system" in normalized_question:
+            answer = extract_direct_definition(text, "artificial system")
+        elif "natural system" in normalized_question:
+            answer = extract_direct_definition(text, "natural system")
+        elif "component" in normalized_question:
+            answer = extract_direct_definition(text, "component")
+        # If no direct definition found, use semantic search
+        if not answer:
+            emb_chunks = embedder.encode(chunks, convert_to_tensor=True)
+            emb_question = embedder.encode(question, convert_to_tensor=True)
+            scores = util.pytorch_cos_sim(emb_question, emb_chunks)[0]
+            best_idx = scores.argmax().item()
+            best_chunk = chunks[best_idx]
+            if scores[best_idx] < 0.3:  # Low confidence
+                top_k = min(3, len(chunks))
+                best_indices = scores.topk(top_k).indices.tolist()
+                best_chunk = " ".join([chunks[i] for i in best_indices])
+            result = qa_pipeline(question=question, context=best_chunk)
+            if result["score"] > 0.1 and len(result["answer"].split()) >= 2:
+                answer = result["answer"]
+        # Final fallback if no answer found
+        if not answer:
+            answer = "Sorry, I couldn't find a clear answer in the document."
+    except Exception as e:
+        answer = f"An error occurred: {str(e)}"
+    history.append((question, answer))
+    return "", history
 def extract_direct_definition(text, term):
     """Try to find a sentence that directly defines the term"""