Spaces:

NaimaAqeel
/

Chatbot

Runtime error

App Files Files Community

NaimaAqeel commited on Apr 19

Commit

145a282

verified ·

1 Parent(s): dc15ddb

Update app.py

Browse files

Files changed (1) hide show

app.py +3 -73

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import os
-import sys
 import pickle
 import numpy as np
 import gradio as gr
@@ -7,9 +6,10 @@ import fitz  # PyMuPDF
 from docx import Document
 from transformers import AutoModel, AutoTokenizer
 import faiss
 # =============================================
-# EMBEDDING MODEL SETUP (NO sentence-transformers dependency)
 # =============================================
 model_name = "sentence-transformers/all-MiniLM-L6-v2"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -62,74 +62,4 @@ def extract_text_from_docx(docx_path):
         doc = Document(docx_path)
         text = "\n".join([para.text for para in doc.paragraphs])
     except Exception as e:
-        print(f"DOCX error: {e}")
-    return text
-# =============================================
-# CORE FUNCTIONALITY
-# =============================================
-def upload_files(files):
-    global index, document_texts
-    try:
-        for file in files:
-            file_path = file.name
-            if file_path.endswith('.pdf'):
-                text = extract_text_from_pdf(file_path)
-            elif file_path.endswith('.docx'):
-                text = extract_text_from_docx(file_path)
-            else:
-                continue
-            sentences = [s.strip() for s in text.split("\n") if s.strip()]
-            if not sentences:
-                continue
-            embeddings = get_embeddings(sentences)
-            index.add(embeddings)
-            document_texts.extend(sentences)
-        # Save updated index
-        with open(index_path, "wb") as f:
-            pickle.dump(index, f)
-        with open(document_texts_path, "wb") as f:
-            pickle.dump(document_texts, f)
-        return f"Processed {len(files)} files, added {len(sentences)} sentences"
-    except Exception as e:
-        return f"Error: {str(e)}"
-def query_text(query):
-    try:
-        query_embedding = get_embeddings(query)
-        D, I = index.search(query_embedding, k=3)
-        results = []
-        for idx in I[0]:
-            if 0 <= idx < len(document_texts):
-                results.append(document_texts[idx])
-        return "\n\n---\n\n".join(results) if results else "No matches found"
-    except Exception as e:
-        return f"Query error: {str(e)}"
-# =============================================
-# GRADIO INTERFACE
-# =============================================
-with gr.Blocks() as demo:
-    gr.Markdown("## Document Search with Semantic Similarity")
-    with gr.Tab("Upload Documents"):
-        file_input = gr.File(file_count="multiple", file_types=[".pdf", ".docx"])
-        upload_btn = gr.Button("Process Files")
-        upload_output = gr.Textbox()
-    with gr.Tab("Search"):
-        query_input = gr.Textbox(label="Enter your query")
-        search_btn = gr.Button("Search")
-        results_output = gr.Textbox()
-    upload_btn.click(upload_files, inputs=file_input, outputs=upload_output)
-    search_btn.click(query_text, inputs=query_input, outputs=results_output)
-if __name__ == "__main__":
-    demo.launch()

 import os
 import pickle
 import numpy as np
 import gradio as gr
 from docx import Document
 from transformers import AutoModel, AutoTokenizer
 import faiss
+import torch
 # =============================================
+# EMBEDDING MODEL SETUP
 # =============================================
 model_name = "sentence-transformers/all-MiniLM-L6-v2"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
         doc = Document(docx_path)
         text = "\n".join([para.text for para in doc.paragraphs])
     except Exception as e:
+        print