Spaces:

NaimaAqeel
/

Chatbot

Runtime error

App Files Files Community

NaimaAqeel commited on 24 days ago

Commit

2737463

verified ·

1 Parent(s): de6a22c

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -13

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ import faiss
 import torch
 # ===============================
-# EMBEDDING MODEL SETUP
 # ===============================
 model_name = "sentence-transformers/all-MiniLM-L6-v2"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -21,15 +21,12 @@ def get_embeddings(texts):
     inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt", max_length=512)
     with torch.no_grad():
         outputs = embedding_model(**inputs)
-    embeddings = outputs.last_hidden_state[:, 0].cpu().numpy()
-    # Normalize embeddings to unit length for cosine similarity
-    embeddings = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)
-    return embeddings
 # ===============================
 # TEXT CHUNKING
 # ===============================
-def chunk_text(text, chunk_size=500, overlap=50):
     chunks = []
     start = 0
     while start < len(text):
@@ -44,7 +41,7 @@ def chunk_text(text, chunk_size=500, overlap=50):
 index_path = "faiss_index.pkl"
 document_texts_path = "document_texts.pkl"
 document_texts = []
-embedding_dim = 384  # For all-MiniLM-L6-v2
 if os.path.exists(index_path) and os.path.exists(document_texts_path):
     try:
@@ -109,7 +106,7 @@ def upload_document(file):
 # ===============================
 qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-base")
-def generate_answer_from_file(query, top_k=7):
     if not document_texts:
         return "No documents indexed yet."
@@ -118,20 +115,23 @@ def generate_answer_from_file(query, top_k=7):
     retrieved_chunks = [document_texts[i] for i in indices[0]]
     context = "\n\n".join(retrieved_chunks)
     prompt = (
-        "You are a helpful and precise assistant reading student notes or textbook passages.\n\n"
-        "Based on the context provided, answer the question accurately and in detail using full sentences.\n\n"
         "### Example\n"
         "Context:\nArtificial systems are created by people. These systems are designed to perform specific tasks, improve efficiency, and solve problems. Examples include knowledge systems, engineering systems, and social systems.\n\n"
         "Question: What is an Artificial System?\n"
-        "Answer: Artificial systems are systems created by humans to perform specific tasks, improve efficiency, and solve problems. They include systems such as knowledge systems, engineering systems, and social systems.\n\n"
         "### Now answer this\n"
         f"Context:\n{context}\n\n"
         f"Question: {query}\n"
-        "Answer:\nPlease answer ONLY based on the context above without adding extra information."
     )
-    result = qa_pipeline(prompt, max_length=700, do_sample=False)[0]['generated_text']
     return result.strip()
 # ===============================
@@ -156,3 +156,4 @@ search_interface = gr.Interface(
 app = gr.TabbedInterface([upload_interface, search_interface], ["Upload", "Ask"])
 app.launch()

 import torch
 # ===============================
+# EMBEDDING MODEL
 # ===============================
 model_name = "sentence-transformers/all-MiniLM-L6-v2"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
     inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt", max_length=512)
     with torch.no_grad():
         outputs = embedding_model(**inputs)
+    return outputs.last_hidden_state[:, 0].cpu().numpy()
 # ===============================
 # TEXT CHUNKING
 # ===============================
+def chunk_text(text, chunk_size=800, overlap=100):
     chunks = []
     start = 0
     while start < len(text):
 index_path = "faiss_index.pkl"
 document_texts_path = "document_texts.pkl"
 document_texts = []
+embedding_dim = 384
 if os.path.exists(index_path) and os.path.exists(document_texts_path):
     try:
 # ===============================
 qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-base")
+def generate_answer_from_file(query, top_k=10):
     if not document_texts:
         return "No documents indexed yet."
     retrieved_chunks = [document_texts[i] for i in indices[0]]
     context = "\n\n".join(retrieved_chunks)
+    print("\n--- Retrieved Context ---\n", context)  # Debugging print
+    # Prompt Engineering
     prompt = (
+        "You are a helpful assistant reading student notes or textbook passages.\n\n"
+        "Based on the context provided, answer the question accurately and clearly.\n\n"
         "### Example\n"
         "Context:\nArtificial systems are created by people. These systems are designed to perform specific tasks, improve efficiency, and solve problems. Examples include knowledge systems, engineering systems, and social systems.\n\n"
         "Question: What is an Artificial System?\n"
+        "Answer: Artificial systems are systems created by humans to perform specific tasks, improve efficiency, and solve problems. They include systems like knowledge systems, engineering systems, and social systems.\n\n"
         "### Now answer this\n"
         f"Context:\n{context}\n\n"
         f"Question: {query}\n"
+        f"Answer:"
     )
+    result = qa_pipeline(prompt, max_length=512, do_sample=False)[0]['generated_text']
     return result.strip()
 # ===============================
 app = gr.TabbedInterface([upload_interface, search_interface], ["Upload", "Ask"])
 app.launch()