import pandas as pd import numpy as np import faiss import gradio as gr from sentence_transformers import SentenceTransformer # Load the embeddings from the embeddings CSV file embeddings_csv_path = 'df_after_rec_embedding.csv' # Path to the embeddings CSV embeddings = pd.read_csv(embeddings_csv_path).to_numpy().astype('float32') # Load the content from the content CSV file content_csv_path = 'content.csv' # Path to the content CSV content_df = pd.read_csv(content_csv_path) # Load the entire content DataFrame content_column = content_df['2'].tolist() # Extract the content from column '2' # Create a FAISS index dimension = embeddings.shape[1] index = faiss.IndexFlatL2(dimension) # L2 distance metric index.add(embeddings) # Add embeddings to the index # Load the nomic-ai/nomic-embed-text-v1 model model = SentenceTransformer('nomic-ai/nomic-embed-text-v1', trust_remote_code=True) # Function to embed query and search using FAISS def search(query): # Embed the query using the model query_vector = model.encode([query])[0].astype('float32') # Search the FAISS index distances, indices = index.search(np.array([query_vector]), k=1) # Search for top 5 closest vectors # Return corresponding content for the top results using the indices results = [f"Content: {content_column[i]}\nDistance: {d:.4f}" for i, d in zip(indices[0], distances[0])] return "\n\n".join(results) # Create the Gradio interface def gradio_app(): with gr.Blocks() as demo: gr.Markdown("## FAISS Search Interface with Nomic Embedder") with gr.Row(): with gr.Column(): query_input = gr.Textbox( label="Search Query", placeholder="Type your search query here" ) search_button = gr.Button("Search") with gr.Column(): search_results = gr.Textbox(label="Search Results", lines=10) search_button.click( fn=search, inputs=[query_input], outputs=[search_results] ) return demo # Launch the Gradio app demo = gradio_app() demo.launch()