File size: 3,973 Bytes
ae4467e 586e301 093e116 47e6638 093e116 e2a8154 093e116 586e301 093e116 586e301 093e116 586e301 a018cee 586e301 093e116 586e301 093e116 586e301 47e6638 093e116 586e301 093e116 586e301 093e116 586e301 47e6638 093e116 586e301 093e116 586e301 093e116 586e301 093e116 586e301 093e116 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import pandas as pd
from sentence_transformers import SentenceTransformer, util
from transformers import pipeline
import torch
import gradio as gr
import os
# Load the dataset
csv_file_path = os.path.join(os.getcwd(), 'Analytics_Vidhya_Free_Course_data.csv')
df = pd.read_csv(csv_file_path, encoding='ISO-8859-1')
df.fillna('', inplace=True)
# Load the pre-trained model for embeddings
model = SentenceTransformer('multi-qa-mpnet-base-dot-v1')
# Combine title and description to create a full text for each course
df['full_text'] = df.iloc[:, 0] + " " + df.iloc[:, 1] + " " + df['Instructor Name'] + " " + df['Rating'].astype(str) + " " + df['Category']
# Convert full course texts into embeddings
# Precompute and encode course texts into embeddings (this line)
course_embeddings = model.encode(df['full_text'].tolist(), convert_to_tensor=True)
# Load a model for text generation (e.g., BART)
generator = pipeline('text2text-generation', model='facebook/bart-large-cnn')
def expand_query(query):
paraphraser = pipeline('text2text-generation', model='Vamsi/T5_Paraphrase_Paws')
expanded_queries = paraphraser(query, num_return_sequences=3, max_length=50, do_sample=True)
return [q['generated_text'] for q in expanded_queries]
def generate_description(query):
response = generator(query, max_length=100, num_return_sequences=1)
return response[0]['generated_text']
def search_courses(query, level_filter=None, category_filter=None, top_k=3):
expanded_queries = expand_query(query)
all_similarities = []
for expanded_query in expanded_queries:
query_embedding = model.encode(expanded_query, convert_to_tensor=True)
similarities = util.pytorch_cos_sim(query_embedding, course_embeddings)[0]
all_similarities.append(similarities)
aggregated_similarities = torch.max(torch.stack(all_similarities), dim=0)[0]
filtered_df = df.copy()
if level_filter and level_filter != "Nil":
filtered_df = filtered_df[filtered_df['Level of Difficulty'] == level_filter]
if category_filter and category_filter != "NIL":
filtered_df = filtered_df[filtered_df['Category'] == category_filter]
if filtered_df.empty:
return "<p>No matching courses found.</p>"
filtered_similarities = aggregated_similarities[filtered_df.index]
top_results = filtered_similarities.topk(k=min(top_k, len(filtered_similarities)))
results = []
for idx in top_results.indices:
idx = int(idx)
course_title = filtered_df.iloc[idx]['Course Title']
course_description = filtered_df.iloc[idx, 1]
course_url = filtered_df.iloc[idx, -1]
generated_description = generate_description(course_title + " " + course_description)
course_link = f'<a href="{course_url}" target="_blank">{course_title}</a>'
results.append(f"<strong>{course_link}</strong><br>{course_description}<br>{generated_description}<br><br>")
return "<ol>" + "".join([f"<li>{result}</li>" for result in results]) + "</ol>"
def create_gradio_interface():
with gr.Blocks() as demo:
gr.Markdown("# Analytics Vidhya Free Courses")
gr.Markdown("Enter your query and use filters to narrow down the search.")
query = gr.Textbox(label=" Search for a course", placeholder="Enter course topic or description")
with gr.Accordion(" Filters", open=False):
level_filter = gr.Dropdown(choices=["Beginner", "Intermediate", "Advanced", "Nil"], label=" Course Level", multiselect=False)
category_filter = gr.Dropdown(choices=["Data Science", "Machine Learning", "Deep Learning", "AI", "NLP", "NIL"], label=" Category", multiselect=False)
search_button = gr.Button("Search")
output = gr.HTML(label="Search Results")
search_button.click(fn=search_courses, inputs=[query, level_filter, category_filter], outputs=output)
return demo
# Launch Gradio interface
demo = create_gradio_interface()
demo.launch() |