NaimaAqeel commited on
Commit
ec0cc7d
·
verified ·
1 Parent(s): 8c85ad8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -11
app.py CHANGED
@@ -39,18 +39,14 @@ retriever_tokenizer = AutoTokenizer.from_pretrained(retriever_model_name)
39
  hf_embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
40
 
41
  # Load or create FAISS index
42
- index_path = "faiss_index.pkl"
43
  if os.path.exists(index_path):
44
- with open(index_path, "rb") as f:
45
- faiss_index = faiss.read_index(f)
46
- print("Loaded FAISS index from faiss_index.pkl")
47
  else:
48
  # Create a new FAISS index
49
  d = embedding_model.get_sentence_embedding_dimension() # Dimension of the embeddings
50
- nlist = 100 # Number of clusters (for IVF)
51
- quantizer = faiss.IndexFlatL2(d) # This is the quantizer for IVF
52
- faiss_index = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_L2)
53
- faiss_index.train(np.zeros((nlist, d)).astype(np.float32)) # Train the index with dummy data
54
 
55
  def extract_text_from_pdf(pdf_path):
56
  text = ""
@@ -93,8 +89,7 @@ def upload_files(files):
93
  sentences = preprocess_text(text)
94
  embeddings = embedding_model.encode(sentences)
95
 
96
- for embedding in embeddings:
97
- faiss_index.add(np.array([embedding]).astype(np.float32)) # Add each embedding individually
98
 
99
  except Exception as e:
100
  print(f"Error processing file '{file.name}': {e}")
@@ -118,7 +113,21 @@ def process_and_query(state, files, question):
118
  if question:
119
  question_embedding = embedding_model.encode([question])
120
 
121
- # Perform FAISS search and generate response as before
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
  return {"error": "No question provided"}
124
 
@@ -139,3 +148,4 @@ with gr.Blocks() as demo:
139
  query_button.click(fn=process_and_query, inputs=[query], outputs=query_output)
140
 
141
  demo.launch()
 
 
39
  hf_embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
40
 
41
  # Load or create FAISS index
42
+ index_path = "faiss_index.index"
43
  if os.path.exists(index_path):
44
+ faiss_index = faiss.read_index(index_path)
45
+ print("Loaded FAISS index from faiss_index.index")
 
46
  else:
47
  # Create a new FAISS index
48
  d = embedding_model.get_sentence_embedding_dimension() # Dimension of the embeddings
49
+ faiss_index = faiss.IndexFlatL2(d) # Using IndexFlatL2 for simplicity
 
 
 
50
 
51
  def extract_text_from_pdf(pdf_path):
52
  text = ""
 
89
  sentences = preprocess_text(text)
90
  embeddings = embedding_model.encode(sentences)
91
 
92
+ faiss_index.add(np.array(embeddings).astype(np.float32)) # Add embeddings
 
93
 
94
  except Exception as e:
95
  print(f"Error processing file '{file.name}': {e}")
 
113
  if question:
114
  question_embedding = embedding_model.encode([question])
115
 
116
+ # Perform FAISS search
117
+ D, I = faiss_index.search(np.array(question_embedding).astype(np.float32), k=5)
118
+ retrieved_results = [state["sentences"][i] for i in I[0]]
119
+
120
+ # Generate response based on retrieved results
121
+ combined_input = question + " ".join(retrieved_results)
122
+ inputs = generator_tokenizer(combined_input, return_tensors="pt")
123
+ with torch.no_grad():
124
+ generator_outputs = generator.generate(**inputs)
125
+ generated_text = generator_tokenizer.decode(generator_outputs[0], skip_special_tokens=True)
126
+
127
+ # Update conversation history
128
+ state["conversation"].append({"question": question, "answer": generated_text})
129
+
130
+ return {"message": generated_text, "conversation": state["conversation"]}
131
 
132
  return {"error": "No question provided"}
133
 
 
148
  query_button.click(fn=process_and_query, inputs=[query], outputs=query_output)
149
 
150
  demo.launch()
151
+