shukdevdatta123 commited on
Commit
7a25c1c
·
verified ·
1 Parent(s): 42f1e3b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -3,6 +3,7 @@ import PyPDF2
3
  import openai
4
  import faiss
5
  import os
 
6
  from sklearn.feature_extraction.text import TfidfVectorizer
7
  from sklearn.metrics.pairwise import cosine_similarity
8
 
@@ -21,7 +22,7 @@ def get_embeddings(text, model="text-embedding-ada-002"):
21
 
22
  # Function to search for similar content
23
  def search_similar(query_embedding, index, stored_texts, top_k=3):
24
- distances, indices = index.search([query_embedding], top_k)
25
  results = [(stored_texts[i], distances[0][idx]) for idx, i in enumerate(indices[0])]
26
  return results
27
 
@@ -55,9 +56,12 @@ if openai_api_key:
55
  # Generate embeddings for all chunks
56
  embeddings = [get_embeddings(chunk) for chunk in chunks]
57
 
 
 
 
58
  # Create a FAISS index for similarity search
59
- index = faiss.IndexFlatL2(len(embeddings[0]))
60
- index.add(embeddings)
61
 
62
  st.write("Course materials have been processed and indexed.")
63
 
 
3
  import openai
4
  import faiss
5
  import os
6
+ import numpy as np
7
  from sklearn.feature_extraction.text import TfidfVectorizer
8
  from sklearn.metrics.pairwise import cosine_similarity
9
 
 
22
 
23
  # Function to search for similar content
24
  def search_similar(query_embedding, index, stored_texts, top_k=3):
25
+ distances, indices = index.search(np.array([query_embedding]), top_k)
26
  results = [(stored_texts[i], distances[0][idx]) for idx, i in enumerate(indices[0])]
27
  return results
28
 
 
56
  # Generate embeddings for all chunks
57
  embeddings = [get_embeddings(chunk) for chunk in chunks]
58
 
59
+ # Convert the list of embeddings into a NumPy array (shape: [num_chunks, embedding_size])
60
+ embeddings_np = np.array(embeddings).astype("float32")
61
+
62
  # Create a FAISS index for similarity search
63
+ index = faiss.IndexFlatL2(len(embeddings_np[0])) # Use the length of the embedding vectors for the dimension
64
+ index.add(embeddings_np)
65
 
66
  st.write("Course materials have been processed and indexed.")
67