Spaces:

NaimaAqeel
/

Chatbot

Runtime error

App Files Files Community

NaimaAqeel commited on Jun 4, 2024

Commit

f7133fb

verified ·

1 Parent(s): f812db9

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -43

app.py CHANGED Viewed

@@ -17,9 +17,17 @@ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 def extract_text_from_pdf(pdf_path):
     # ...
-# Function to extract text from a Word document (same as before)
 def extract_text_from_docx(docx_path):
-    # ...
 # Initialize the embedding model (same as before)
 embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
@@ -50,14 +58,19 @@ document_texts = []
 def preprocess_text(text):
-    # ... (text preprocessing logic, same as before)
 def upload_files(files):
     global index, document_texts
     try:
         for file_path in files:
-            # ... (file processing logic, same as before)
             # Preprocess text (call the new function)
             sentences = preprocess_text(text)
@@ -66,48 +79,11 @@ def upload_files(files):
             embeddings = embedding_model.encode(sentences)
             index.add(np.array(embeddings))
-        # Save the updated index and documents (same as before)
-        # ...
         return "Files processed successfully"
     except Exception as e:
-        print(f"Error processing files: {e}")
-        return f"Error processing files: {e}"
-def query_text(text):
-    try:
-        # Preprocess query text
-        query_sentences = preprocess_text(text)
-        query_embeddings = embedding_model.encode(query_sentences)
-        # Retrieve relevant documents using FAISS
-        D, I = index.search(np.array(query_embeddings), k=5)
-        retrieved_docs = [document_texts[idx] for idx in I[0] if idx != -1]
-        # Retriever-Augmented Generation (RAG)
-        retriever_inputs = retriever_tokenizer(
-            text=retrieved_docs, return_tensors="pt", padding=True
-        )
-        retriever_outputs = retriever(**retriever_inputs)
-        retrieved_texts = retriever_tokenizer.batch_decode(retriever_outputs.logits)
-        # Generate response using retrieved information (as prompts/context)
-        generator_inputs = generator_tokenizer(
-            text=[text] + retrieved_texts, return_tensors="pt", padding=True
-        )
-        generator_outputs = generator(**generator_inputs)
-        response = generator_tokenizer.decode(generator_outputs.sequences[0], skip_special_tokens=True)
-        return response
-    except Exception as e:
-        print(f"Error querying text: {e}")
-        return f"Error querying text: {e}"
-# Create Gradio interface
-with gr.Blocks() as demo:
-    # ... (rest of the Gradio interface definition)
-    query_button.click(fn=query_text, inputs

 def extract_text_from_pdf(pdf_path):
     # ...
+# Function to extract text from a Word document (fixed indentation)
 def extract_text_from_docx(docx_path):
+    """Extracts text from a Word document."""
+    text = ""
+    try:
+        doc = Document(docx_path)
+        text = "\n".join([para.text for para in doc.paragraphs])
+    except Exception as e:
+        print(f"Error extracting text from DOCX: {e}")
+    return text
 # Initialize the embedding model (same as before)
 embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
 def preprocess_text(text):
+    # ... (text preprocessing logic, e.g., sentence segmentation and optional stop word removal)
 def upload_files(files):
     global index, document_texts
     try:
         for file_path in files:
+            if file_path.endswith('.pdf'):
+                text = extract_text_from_pdf(file_path)
+            elif file_path.endswith('.docx'):
+                text = extract_text_from_docx(file_path)
+            else:
+                return "Unsupported file format"
             # Preprocess text (call the new function)
             sentences = preprocess_text(text)
             embeddings = embedding_model.encode(sentences)
             index.add(np.array(embeddings))
+        # Save the updated index and documents
         return "Files processed successfully"
     except Exception as e:
+        print(