import pickle import faiss import numpy as np import gradio as gr import openai # Initialize OpenAI API openai.api_key = "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" # Replace with your API key # Function to embed text using OpenAI def embed_text(text): response = openai.Embedding.create( input=text, model="text-embedding-ada-002" # Use the appropriate model name here ) return np.array(response['data'][0]['embedding']) # Load the FAISS index index = faiss.read_index("faiss_Recursive100_index.bin") # Load metadata from the pickle file with open("metadata_Recursive100.pkl", "rb") as f: metadata = pickle.load(f) # Function to perform the search def semantic_search(query, k=5): query_embedding = embed_text(query) distances, indices = index.search(np.array([query_embedding]), k) results_by_title = {} for idx in indices[0]: title = metadata[idx]['title'] chunk = metadata[idx]['chunk'] if title not in results_by_title: results_by_title[title] = [] results_by_title[title].append(chunk) # Prepare the results for display results_text = "" for title, chunks in results_by_title.items(): full_text = " ".join(chunks) results_text += f"**Title**: {title}\n\n**Full Text**: {full_text}\n\n{'-'*50}\n\n" return results_text # Create the Gradio interface interface = gr.Interface( fn=semantic_search, inputs=[gr.Textbox(label="Enter your query"), gr.Slider(1, 10, value=5, step=1, label="Number of Results")], outputs="markdown", title="Semantic Search with FAISS", description="Enter a query to search the indexed text using FAISS." ) # Launch the interface interface.launch()