|
|
|
import pandas as pd |
|
import numpy as np |
|
import faiss |
|
import gradio as gr |
|
from sentence_transformers import SentenceTransformer |
|
|
|
|
|
embeddings_csv_path = 'df_after_rec_embedding.csv' |
|
embeddings = pd.read_csv(embeddings_csv_path).to_numpy().astype('float32') |
|
|
|
|
|
content_csv_path = 'content.csv' |
|
content_df = pd.read_csv(content_csv_path) |
|
content_column = content_df['2'].tolist() |
|
|
|
|
|
dimension = embeddings.shape[1] |
|
index = faiss.IndexFlatL2(dimension) |
|
index.add(embeddings) |
|
|
|
|
|
model = SentenceTransformer('nomic-ai/nomic-embed-text-v1', trust_remote_code=True) |
|
|
|
|
|
def search(query): |
|
|
|
query_vector = model.encode([query])[0].astype('float32') |
|
|
|
|
|
distances, indices = index.search(np.array([query_vector]), k=1) |
|
|
|
|
|
results = [f"Content: {content_column[i]}\nDistance: {d:.4f}" for i, d in zip(indices[0], distances[0])] |
|
return "\n\n".join(results) |
|
|
|
|
|
def gradio_app(): |
|
with gr.Blocks() as demo: |
|
gr.Markdown("## FAISS Search Interface with Nomic Embedder") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
query_input = gr.Textbox( |
|
label="Search Query", |
|
placeholder="Type your search query here" |
|
) |
|
search_button = gr.Button("Search") |
|
|
|
with gr.Column(): |
|
search_results = gr.Textbox(label="Search Results", lines=10) |
|
|
|
search_button.click( |
|
fn=search, |
|
inputs=[query_input], |
|
outputs=[search_results] |
|
) |
|
|
|
return demo |
|
|
|
|
|
demo = gradio_app() |
|
demo.launch() |
|
|