Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -39,18 +39,14 @@ retriever_tokenizer = AutoTokenizer.from_pretrained(retriever_model_name)
|
|
39 |
hf_embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
|
40 |
|
41 |
# Load or create FAISS index
|
42 |
-
index_path = "faiss_index.
|
43 |
if os.path.exists(index_path):
|
44 |
-
|
45 |
-
|
46 |
-
print("Loaded FAISS index from faiss_index.pkl")
|
47 |
else:
|
48 |
# Create a new FAISS index
|
49 |
d = embedding_model.get_sentence_embedding_dimension() # Dimension of the embeddings
|
50 |
-
|
51 |
-
quantizer = faiss.IndexFlatL2(d) # This is the quantizer for IVF
|
52 |
-
faiss_index = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_L2)
|
53 |
-
faiss_index.train(np.zeros((nlist, d)).astype(np.float32)) # Train the index with dummy data
|
54 |
|
55 |
def extract_text_from_pdf(pdf_path):
|
56 |
text = ""
|
@@ -93,8 +89,7 @@ def upload_files(files):
|
|
93 |
sentences = preprocess_text(text)
|
94 |
embeddings = embedding_model.encode(sentences)
|
95 |
|
96 |
-
|
97 |
-
faiss_index.add(np.array([embedding]).astype(np.float32)) # Add each embedding individually
|
98 |
|
99 |
except Exception as e:
|
100 |
print(f"Error processing file '{file.name}': {e}")
|
@@ -118,7 +113,21 @@ def process_and_query(state, files, question):
|
|
118 |
if question:
|
119 |
question_embedding = embedding_model.encode([question])
|
120 |
|
121 |
-
# Perform FAISS search
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
|
123 |
return {"error": "No question provided"}
|
124 |
|
@@ -139,3 +148,4 @@ with gr.Blocks() as demo:
|
|
139 |
query_button.click(fn=process_and_query, inputs=[query], outputs=query_output)
|
140 |
|
141 |
demo.launch()
|
|
|
|
39 |
hf_embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
|
40 |
|
41 |
# Load or create FAISS index
|
42 |
+
index_path = "faiss_index.index"
|
43 |
if os.path.exists(index_path):
|
44 |
+
faiss_index = faiss.read_index(index_path)
|
45 |
+
print("Loaded FAISS index from faiss_index.index")
|
|
|
46 |
else:
|
47 |
# Create a new FAISS index
|
48 |
d = embedding_model.get_sentence_embedding_dimension() # Dimension of the embeddings
|
49 |
+
faiss_index = faiss.IndexFlatL2(d) # Using IndexFlatL2 for simplicity
|
|
|
|
|
|
|
50 |
|
51 |
def extract_text_from_pdf(pdf_path):
|
52 |
text = ""
|
|
|
89 |
sentences = preprocess_text(text)
|
90 |
embeddings = embedding_model.encode(sentences)
|
91 |
|
92 |
+
faiss_index.add(np.array(embeddings).astype(np.float32)) # Add embeddings
|
|
|
93 |
|
94 |
except Exception as e:
|
95 |
print(f"Error processing file '{file.name}': {e}")
|
|
|
113 |
if question:
|
114 |
question_embedding = embedding_model.encode([question])
|
115 |
|
116 |
+
# Perform FAISS search
|
117 |
+
D, I = faiss_index.search(np.array(question_embedding).astype(np.float32), k=5)
|
118 |
+
retrieved_results = [state["sentences"][i] for i in I[0]]
|
119 |
+
|
120 |
+
# Generate response based on retrieved results
|
121 |
+
combined_input = question + " ".join(retrieved_results)
|
122 |
+
inputs = generator_tokenizer(combined_input, return_tensors="pt")
|
123 |
+
with torch.no_grad():
|
124 |
+
generator_outputs = generator.generate(**inputs)
|
125 |
+
generated_text = generator_tokenizer.decode(generator_outputs[0], skip_special_tokens=True)
|
126 |
+
|
127 |
+
# Update conversation history
|
128 |
+
state["conversation"].append({"question": question, "answer": generated_text})
|
129 |
+
|
130 |
+
return {"message": generated_text, "conversation": state["conversation"]}
|
131 |
|
132 |
return {"error": "No question provided"}
|
133 |
|
|
|
148 |
query_button.click(fn=process_and_query, inputs=[query], outputs=query_output)
|
149 |
|
150 |
demo.launch()
|
151 |
+
|