Spaces:

NaimaAqeel
/

Chatbot

Runtime error

App Files Files Community

NaimaAqeel commited on May 28

Commit

9502a66

verified ·

1 Parent(s): d00c686

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -28

app.py CHANGED Viewed

@@ -8,9 +8,9 @@ from transformers import AutoModel, AutoTokenizer, pipeline
 import faiss
 import torch
-# =============================================
-# EMBEDDING MODEL SETUP
-# =============================================
 model_name = "sentence-transformers/all-MiniLM-L6-v2"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 embedding_model = AutoModel.from_pretrained(model_name)
@@ -23,9 +23,9 @@ def get_embeddings(texts):
         outputs = embedding_model(**inputs)
     return outputs.last_hidden_state[:, 0].cpu().numpy()
-# =============================================
 # TEXT CHUNKING
-# =============================================
 def chunk_text(text, chunk_size=500, overlap=50):
     chunks = []
     start = 0
@@ -35,14 +35,14 @@ def chunk_text(text, chunk_size=500, overlap=50):
         start += chunk_size - overlap
     return chunks
-# =============================================
 # FAISS INDEX SETUP
-# =============================================
 index_path = "faiss_index.pkl"
 document_texts_path = "document_texts.pkl"
 document_texts = []
-embedding_dim = 384  # for all-MiniLM-L6-v2
 if os.path.exists(index_path) and os.path.exists(document_texts_path):
     try:
         with open(index_path, "rb") as f:
@@ -55,9 +55,9 @@ if os.path.exists(index_path) and os.path.exists(document_texts_path):
 else:
     index = faiss.IndexFlatIP(embedding_dim)
-# =============================================
-# DOCUMENT PROCESSING
-# =============================================
 def extract_text_from_pdf(path):
     text = ""
     try:
@@ -77,9 +77,9 @@ def extract_text_from_docx(path):
         print(f"DOCX error: {e}")
     return text
-# =============================================
-# UPLOAD AND INDEX FILE
-# =============================================
 def upload_document(file):
     ext = os.path.splitext(file.name)[-1].lower()
     if ext == ".pdf":
@@ -101,12 +101,12 @@ def upload_document(file):
     return "Document uploaded and indexed successfully."
-# =============================================
-# QA PIPELINE WITH FLAN-T5
-# =============================================
 qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-base")
-def generate_answer_from_file(query, top_k=3):
     if not document_texts:
         return "No documents indexed yet."
@@ -115,27 +115,33 @@ def generate_answer_from_file(query, top_k=3):
     retrieved_chunks = [document_texts[i] for i in indices[0]]
     context = " ".join(retrieved_chunks)
-    prompt = f"Context: {context}\n\nQuestion: {query}\nAnswer:"
-    result = qa_pipeline(prompt, max_length=200)[0]['generated_text']
-    return result
-# =============================================
-# GRADIO UI
-# =============================================
 upload_interface = gr.Interface(
     fn=upload_document,
     inputs=gr.File(file_types=[".pdf", ".docx"]),
     outputs="text",
     title="Upload Document",
-    description="Upload a Word or PDF file to index it for question answering."
 )
 search_interface = gr.Interface(
     fn=generate_answer_from_file,
-    inputs=gr.Textbox(placeholder="Ask a question about the uploaded document..."),
     outputs="text",
-    title="Ask Your Document",
-    description="Ask any question. The chatbot will read the document and answer like ChatGPT."
 )
 app = gr.TabbedInterface([upload_interface, search_interface], ["Upload", "Ask"])

 import faiss
 import torch
+# ===============================
+# EMBEDDING MODEL
+# ===============================
 model_name = "sentence-transformers/all-MiniLM-L6-v2"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 embedding_model = AutoModel.from_pretrained(model_name)
         outputs = embedding_model(**inputs)
     return outputs.last_hidden_state[:, 0].cpu().numpy()
+# ===============================
 # TEXT CHUNKING
+# ===============================
 def chunk_text(text, chunk_size=500, overlap=50):
     chunks = []
     start = 0
         start += chunk_size - overlap
     return chunks
+# ===============================
 # FAISS INDEX SETUP
+# ===============================
 index_path = "faiss_index.pkl"
 document_texts_path = "document_texts.pkl"
 document_texts = []
+embedding_dim = 384
 if os.path.exists(index_path) and os.path.exists(document_texts_path):
     try:
         with open(index_path, "rb") as f:
 else:
     index = faiss.IndexFlatIP(embedding_dim)
+# ===============================
+# FILE EXTRACTORS
+# ===============================
 def extract_text_from_pdf(path):
     text = ""
     try:
         print(f"DOCX error: {e}")
     return text
+# ===============================
+# UPLOAD HANDLER
+# ===============================
 def upload_document(file):
     ext = os.path.splitext(file.name)[-1].lower()
     if ext == ".pdf":
     return "Document uploaded and indexed successfully."
+# ===============================
+# GENERATION PIPELINE (FLAN-T5)
+# ===============================
 qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-base")
+def generate_answer_from_file(query, top_k=5):
     if not document_texts:
         return "No documents indexed yet."
     retrieved_chunks = [document_texts[i] for i in indices[0]]
     context = " ".join(retrieved_chunks)
+    prompt = (
+        f"Use the following context from a textbook or academic document to answer the question accurately and in detail.\n\n"
+        f"Context:\n{context}\n\n"
+        f"Question: {query}\n\n"
+        f"Answer:"
+    )
+    result = qa_pipeline(prompt, max_length=512, do_sample=False)[0]['generated_text']
+    return result.strip()
+# ===============================
+# GRADIO INTERFACES
+# ===============================
 upload_interface = gr.Interface(
     fn=upload_document,
     inputs=gr.File(file_types=[".pdf", ".docx"]),
     outputs="text",
     title="Upload Document",
+    description="Upload your Word or PDF document for question answering."
 )
 search_interface = gr.Interface(
     fn=generate_answer_from_file,
+    inputs=gr.Textbox(placeholder="Ask your question about the uploaded document..."),
     outputs="text",
+    title="Ask the Document",
+    description="Ask questions about the uploaded content. The chatbot will answer based on the document."
 )
 app = gr.TabbedInterface([upload_interface, search_interface], ["Upload", "Ask"])