AzizTh's picture
Update app.py
a9f61e8 verified
import pandas as pd
import numpy as np
import faiss
import gradio as gr
from sentence_transformers import SentenceTransformer
# Load the embeddings from the embeddings CSV file
embeddings_csv_path = 'df_after_rec_embedding.csv' # Path to the embeddings CSV
embeddings = pd.read_csv(embeddings_csv_path).to_numpy().astype('float32')
# Load the content from the content CSV file
content_csv_path = 'content.csv' # Path to the content CSV
content_df = pd.read_csv(content_csv_path) # Load the entire content DataFrame
content_column = content_df['2'].tolist() # Extract the content from column '2'
# Create a FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension) # L2 distance metric
index.add(embeddings) # Add embeddings to the index
# Load the nomic-ai/nomic-embed-text-v1 model
model = SentenceTransformer('nomic-ai/nomic-embed-text-v1', trust_remote_code=True)
# Function to embed query and search using FAISS
def search(query):
# Embed the query using the model
query_vector = model.encode([query])[0].astype('float32')
# Search the FAISS index
distances, indices = index.search(np.array([query_vector]), k=1) # Search for top 5 closest vectors
# Return corresponding content for the top results using the indices
results = [f"Content: {content_column[i]}\nDistance: {d:.4f}" for i, d in zip(indices[0], distances[0])]
return "\n\n".join(results)
# Create the Gradio interface
def gradio_app():
with gr.Blocks() as demo:
gr.Markdown("## FAISS Search Interface with Nomic Embedder")
with gr.Row():
with gr.Column():
query_input = gr.Textbox(
label="Search Query",
placeholder="Type your search query here"
)
search_button = gr.Button("Search")
with gr.Column():
search_results = gr.Textbox(label="Search Results", lines=10)
search_button.click(
fn=search,
inputs=[query_input],
outputs=[search_results]
)
return demo
# Launch the Gradio app
demo = gradio_app()
demo.launch()