import json from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity import numpy as np import gradio as gr # Load the SentenceTransformer model model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2') # Load the embeddings from the JSON file with open('/content/drive/My Drive/final_data_with_embeddings.json', 'r') as f: data = json.load(f) # Function to perform the search def search_courses(user_query): query_embedding = model.encode(user_query) # Get the embedding for user query similarity_scores = [] # Array to store similarity scores # Compare the user query embedding with each stored embedding for dets in data: embed = np.array(dets['embedding']) similarity = cosine_similarity([query_embedding], [embed]) similarity_scores.append((similarity[0][0], dets)) # Sort the similarity scores in descending order similarity_scores.sort(key=lambda x: x[0], reverse=True) # Get the top 4 courses top_4_dets = [item[1] for item in similarity_scores[:4]] results = [] for i,det in enumerate(top_4_dets,1): course_info = f"{i}. " \ f"**Category**: {det['Course Category']}\n\n" \ f"**Course Name**: {det['Course Name']}\n\n" \ f"**Course URL**: {det['Course Url']}\n\n" \ f"**Description**: {det['Course Description']}\n\n" results.append(course_info) return "\n\n\n".join(results) # Create the Gradio interface iface = gr.Interface(fn=search_courses, inputs="text", outputs="markdown", title="Course Search with Sentence Transformers", description="Enter a query to find the top 4 most similar courses.") # Launch the Gradio app iface.launch()