Spaces:

NaimaAqeel
/

Chatbot

Runtime error

App Files Files Community

NaimaAqeel commited on Jun 6, 2024

Commit

ec0cc7d

verified ·

1 Parent(s): 8c85ad8

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -11

app.py CHANGED Viewed

@@ -39,18 +39,14 @@ retriever_tokenizer = AutoTokenizer.from_pretrained(retriever_model_name)
 hf_embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
 # Load or create FAISS index
-index_path = "faiss_index.pkl"
 if os.path.exists(index_path):
-    with open(index_path, "rb") as f:
-        faiss_index = faiss.read_index(f)
-        print("Loaded FAISS index from faiss_index.pkl")
 else:
     # Create a new FAISS index
     d = embedding_model.get_sentence_embedding_dimension()  # Dimension of the embeddings
-    nlist = 100  # Number of clusters (for IVF)
-    quantizer = faiss.IndexFlatL2(d)  # This is the quantizer for IVF
-    faiss_index = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_L2)
-    faiss_index.train(np.zeros((nlist, d)).astype(np.float32))  # Train the index with dummy data
 def extract_text_from_pdf(pdf_path):
     text = ""
@@ -93,8 +89,7 @@ def upload_files(files):
                 sentences = preprocess_text(text)
                 embeddings = embedding_model.encode(sentences)
-                for embedding in embeddings:
-                    faiss_index.add(np.array([embedding]).astype(np.float32))  # Add each embedding individually
             except Exception as e:
                 print(f"Error processing file '{file.name}': {e}")
@@ -118,7 +113,21 @@ def process_and_query(state, files, question):
     if question:
         question_embedding = embedding_model.encode([question])
-        # Perform FAISS search and generate response as before
     return {"error": "No question provided"}
@@ -139,3 +148,4 @@ with gr.Blocks() as demo:
         query_button.click(fn=process_and_query, inputs=[query], outputs=query_output)
 demo.launch()

 hf_embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
 # Load or create FAISS index
+index_path = "faiss_index.index"
 if os.path.exists(index_path):
+    faiss_index = faiss.read_index(index_path)
+    print("Loaded FAISS index from faiss_index.index")
 else:
     # Create a new FAISS index
     d = embedding_model.get_sentence_embedding_dimension()  # Dimension of the embeddings
+    faiss_index = faiss.IndexFlatL2(d)  # Using IndexFlatL2 for simplicity
 def extract_text_from_pdf(pdf_path):
     text = ""
                 sentences = preprocess_text(text)
                 embeddings = embedding_model.encode(sentences)
+                faiss_index.add(np.array(embeddings).astype(np.float32))  # Add embeddings
             except Exception as e:
                 print(f"Error processing file '{file.name}': {e}")
     if question:
         question_embedding = embedding_model.encode([question])
+        # Perform FAISS search
+        D, I = faiss_index.search(np.array(question_embedding).astype(np.float32), k=5)
+        retrieved_results = [state["sentences"][i] for i in I[0]]
+        # Generate response based on retrieved results
+        combined_input = question + " ".join(retrieved_results)
+        inputs = generator_tokenizer(combined_input, return_tensors="pt")
+        with torch.no_grad():
+            generator_outputs = generator.generate(**inputs)
+            generated_text = generator_tokenizer.decode(generator_outputs[0], skip_special_tokens=True)
+        # Update conversation history
+        state["conversation"].append({"question": question, "answer": generated_text})
+        return {"message": generated_text, "conversation": state["conversation"]}
     return {"error": "No question provided"}
         query_button.click(fn=process_and_query, inputs=[query], outputs=query_output)
 demo.launch()