AzizTh commited on
Commit
8f70b69
·
verified ·
1 Parent(s): b2b6fff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -20
app.py CHANGED
@@ -1,40 +1,44 @@
 
1
  import pandas as pd
2
  import numpy as np
3
  import faiss
4
  import gradio as gr
5
  from sentence_transformers import SentenceTransformer
6
 
7
- # Load the CSV file with embeddings
8
- csv_path = 'df_after_rec_embedding.csv' # Update with your actual CSV path
9
- df = pd.read_csv(csv_path)
10
- data = df.to_numpy().astype('float32')
 
 
 
 
11
 
12
  # Create a FAISS index
13
- dimension = data.shape[1]
14
  index = faiss.IndexFlatL2(dimension) # L2 distance metric
15
- index.add(data) # Add data to the index
16
 
17
  # Load the nomic-ai/nomic-embed-text-v1 model
18
- model = SentenceTransformer('nomic-ai/nomic-embed-text-v1',device='cpu', trust_remote_code=True)
19
-
20
 
21
  # Function to embed query and search using FAISS
22
  def search(query):
23
  # Embed the query using the model
24
  query_vector = model.encode([query])[0].astype('float32')
25
-
26
  # Search the FAISS index
27
- distances, indices = index.search(np.array([query_vector]), k=1) # Search for top 5 closest vectors
28
-
29
- # Return results with indices and distances
30
- return [f"Index: {i}, Distance: {d:.4f}" for i, d in zip(indices[0], distances[0])]
31
-
32
 
33
  # Create the Gradio interface
34
  def gradio_app():
35
  with gr.Blocks() as demo:
36
  gr.Markdown("## FAISS Search Interface with Nomic Embedder")
37
-
38
  with gr.Row():
39
  with gr.Column():
40
  query_input = gr.Textbox(
@@ -42,19 +46,18 @@ def gradio_app():
42
  placeholder="Type your search query here"
43
  )
44
  search_button = gr.Button("Search")
45
-
46
  with gr.Column():
47
- search_results = gr.Textbox(label="Search Results")
48
-
49
  search_button.click(
50
  fn=search,
51
  inputs=[query_input],
52
  outputs=[search_results]
53
  )
54
-
55
  return demo
56
 
57
-
58
  # Launch the Gradio app
59
  demo = gradio_app()
60
  demo.launch()
 
1
+
2
  import pandas as pd
3
  import numpy as np
4
  import faiss
5
  import gradio as gr
6
  from sentence_transformers import SentenceTransformer
7
 
8
+ # Load the embeddings from the embeddings CSV file
9
+ embeddings_csv_path = 'df_after_rec_embedding.csv' # Path to the embeddings CSV
10
+ embeddings = pd.read_csv(embeddings_csv_path).to_numpy().astype('float32')
11
+
12
+ # Load the content from the content CSV file
13
+ content_csv_path = 'content.csv' # Path to the content CSV
14
+ content_df = pd.read_csv(content_csv_path) # Load the entire content DataFrame
15
+ content_column = content_df['2'].tolist() # Extract the content from column '2'
16
 
17
  # Create a FAISS index
18
+ dimension = embeddings.shape[1]
19
  index = faiss.IndexFlatL2(dimension) # L2 distance metric
20
+ index.add(embeddings) # Add embeddings to the index
21
 
22
  # Load the nomic-ai/nomic-embed-text-v1 model
23
+ model = SentenceTransformer('nomic-ai/nomic-embed-text-v1', trust_remote_code=True)
 
24
 
25
  # Function to embed query and search using FAISS
26
  def search(query):
27
  # Embed the query using the model
28
  query_vector = model.encode([query])[0].astype('float32')
29
+
30
  # Search the FAISS index
31
+ distances, indices = index.search(np.array([query_vector]), k=5) # Search for top 5 closest vectors
32
+
33
+ # Return corresponding content for the top results using the indices
34
+ results = [f"Content: {content_column[i]}\nDistance: {d:.4f}" for i, d in zip(indices[0], distances[0])]
35
+ return "\n\n".join(results)
36
 
37
  # Create the Gradio interface
38
  def gradio_app():
39
  with gr.Blocks() as demo:
40
  gr.Markdown("## FAISS Search Interface with Nomic Embedder")
41
+
42
  with gr.Row():
43
  with gr.Column():
44
  query_input = gr.Textbox(
 
46
  placeholder="Type your search query here"
47
  )
48
  search_button = gr.Button("Search")
49
+
50
  with gr.Column():
51
+ search_results = gr.Textbox(label="Search Results", lines=10)
52
+
53
  search_button.click(
54
  fn=search,
55
  inputs=[query_input],
56
  outputs=[search_results]
57
  )
58
+
59
  return demo
60
 
 
61
  # Launch the Gradio app
62
  demo = gradio_app()
63
  demo.launch()