Spaces:
Sleeping
Sleeping
import json | |
from sentence_transformers import SentenceTransformer | |
from sklearn.metrics.pairwise import cosine_similarity | |
import numpy as np | |
import gradio as gr | |
# Load the SentenceTransformer model | |
model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2') | |
# Load the embeddings from the JSON file | |
with open('./final_data_with_embeddings.json', 'r') as f: | |
data = json.load(f) | |
# Function to perform the search | |
def search_courses(user_query): | |
query_embedding = model.encode(user_query) # Get the embedding for user query | |
similarity_scores = [] # Array to store similarity scores | |
# Compare the user query embedding with each stored embedding | |
for dets in data: | |
embed = np.array(dets['embedding']) | |
similarity = cosine_similarity([query_embedding], [embed]) | |
similarity_scores.append((similarity[0][0], dets)) | |
# Sort the similarity scores in descending order | |
similarity_scores.sort(key=lambda x: x[0], reverse=True) | |
# Get the top 4 courses | |
top_4_dets = [item[1] for item in similarity_scores[:4]] | |
results = [] | |
for i,det in enumerate(top_4_dets,1): | |
course_info = f"{i}. " \ | |
f"**Category**: {det['Course Category']}\n\n" \ | |
f"**Course Name**: {det['Course Name']}\n\n" \ | |
f"**Course URL**: {det['Course Url']}\n\n" \ | |
f"**Description**: {det['Course Description']}\n\n" | |
results.append(course_info) | |
return "\n\n\n".join(results) | |
# Create the Gradio interface | |
iface = gr.Interface(fn=search_courses, | |
inputs="text", | |
outputs="markdown", | |
title="Course Search with Sentence Transformers", | |
description="Enter a query to find the top 4 most similar courses.") | |
# Launch the Gradio app | |
iface.launch() | |