from shiny import App, ui, render import requests from bs4 import BeautifulSoup import pandas as pd import torch from transformers import BertTokenizer, BertModel import numpy as np from sklearn.metrics.pairwise import cosine_similarity # Step 1: Scrape the free courses from Analytics Vidhya url = "https://courses.analyticsvidhya.com/pages/all-free-courses" response = requests.get(url) soup = BeautifulSoup(response.content, 'html.parser') courses = [] # Extracting course title, image, and course link for course_card in soup.find_all('header', class_='course-card__img-container'): img_tag = course_card.find('img', class_='course-card__img') if img_tag: title = img_tag.get('alt') image_url = img_tag.get('src') link_tag = course_card.find_previous('a') if link_tag: course_link = link_tag.get('href') if not course_link.startswith('http'): course_link = 'https://courses.analyticsvidhya.com' + course_link courses.append({ 'title': title, 'image_url': image_url, 'course_link': course_link }) # Step 2: Create DataFrame df = pd.DataFrame(courses) # Load pre-trained BERT model and tokenizer tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') model = BertModel.from_pretrained('bert-base-uncased') # Function to generate embeddings using BERT def get_bert_embedding(text): inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True) with torch.no_grad(): outputs = model(**inputs) return outputs.last_hidden_state.mean(dim=1).numpy() # Create embeddings for course titles df['embedding'] = df['title'].apply(lambda x: get_bert_embedding(x)) # Function to perform search using BERT-based similarity def search_courses(query): query_embedding = get_bert_embedding(query) course_embeddings = np.vstack(df['embedding'].values) # Compute cosine similarity between query embedding and course embeddings similarities = cosine_similarity(query_embedding, course_embeddings).flatten() # Add the similarity scores to the DataFrame df['score'] = similarities # Sort by similarity score in descending order and return top results top_results = df.sort_values(by='score', ascending=False).head(10) return top_results[['title', 'image_url', 'course_link', 'score']].to_dict(orient='records') # Shiny UI and Server app_ui = ui.page_fluid( ui.tags.style( """ @import url('https://fonts.googleapis.com/css2?family=Poppins:wght@300;500;700&display=swap'); body { font-family: 'Poppins', sans-serif; background-color: #f4f6f9; } .container { padding: 20px; } h2 { color: #ff6f61; font-weight: 700; text-align: center; } .result-container { display: flex; flex-wrap: wrap; gap: 20px; justify-content: center; } .course-card { background-color: #ffffff; border-radius: 12px; box-shadow: 0 4px 10px rgba(0, 0, 0, 0.15); overflow: hidden; width: calc(50% - 10px); transition: transform 0.3s, box-shadow 0.3s; } .course-card:hover { transform: scale(1.05); box-shadow: 0 6px 20px rgba(0, 0, 0, 0.2); } .course-image { width: 100%; height: 180px; object-fit: cover; border-top-left-radius: 12px; border-top-right-radius: 12px; } .course-info { padding: 15px; } .course-info h3 { font-size: 20px; color: #333; margin-top: 0; } .course-info p { color: #666; font-size: 16px; margin-bottom: 10px; } .course-link { background-color: #ff6f61; color: white; padding: 8px 12px; text-decoration: none; border-radius: 6px; font-size: 15px; display: inline-block; margin-top: 10px; transition: background-color 0.2s; } .course-link:hover { background-color: #e85a50; } .no-results { text-align: center; color: #888; font-style: italic; } """ ), ui.h2("Analytics Vidhya Smart Course Search"), ui.input_text("query", "Enter your search query", placeholder="e.g., machine learning, data science, python"), ui.output_text("search_info"), ui.output_ui("results") ) def server(input, output, session): @output @render.ui def results(): if not input.query(): return ui.p("Enter a search query to get started!", class_="no-results") # Perform the search query = input.query() results = search_courses(query) if results: result_ui = [] for item in results: course_title = item['title'] course_image = item['image_url'] course_link = item['course_link'] relevance_score = round(item['score'] * 100, 2) # Create course card UI result_ui.append( ui.div( ui.img(src=course_image, class_="course-image"), ui.div( ui.h3(course_title), ui.p(f"Relevance: {relevance_score}%"), ui.a("View Course", href=course_link, target="_blank", class_="course-link"), class_="course-info" ), class_="course-card" ) ) return ui.div(*result_ui, class_="result-container") else: return ui.p("No results found.", class_="no-results") @output @render.text def search_info(): return f"Results for '{input.query()}'" if input.query() else "Search for courses by typing a query above." app = App(app_ui, server)