Abinivesh commited on
Commit
b1f8665
·
verified ·
1 Parent(s): c9aa314

Upload app_using_streamlit.py

Browse files
Files changed (1) hide show
  1. app_using_streamlit.py +89 -0
app_using_streamlit.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ import pandas as pd
4
+ import streamlit as st
5
+ import torch
6
+ from transformers import BertTokenizer, BertModel
7
+ import numpy as np
8
+ from sklearn.metrics.pairwise import cosine_similarity
9
+
10
+ # Step 1: Scrape the free courses from Analytics Vidhya
11
+ url = "https://courses.analyticsvidhya.com/pages/all-free-courses"
12
+ response = requests.get(url)
13
+ soup = BeautifulSoup(response.content, 'html.parser')
14
+
15
+ courses = []
16
+
17
+ # Extracting course title, image, and course link
18
+ for course_card in soup.find_all('header', class_='course-card__img-container'):
19
+ img_tag = course_card.find('img', class_='course-card__img')
20
+
21
+ if img_tag:
22
+ title = img_tag.get('alt')
23
+ image_url = img_tag.get('src')
24
+
25
+ link_tag = course_card.find_previous('a')
26
+ if link_tag:
27
+ course_link = link_tag.get('href')
28
+ if not course_link.startswith('http'):
29
+ course_link = 'https://courses.analyticsvidhya.com' + course_link
30
+
31
+ courses.append({
32
+ 'title': title,
33
+ 'image_url': image_url,
34
+ 'course_link': course_link
35
+ })
36
+
37
+ # Step 2: Create DataFrame
38
+ df = pd.DataFrame(courses)
39
+
40
+ # Load pre-trained BERT model and tokenizer
41
+ tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
42
+ model = BertModel.from_pretrained('bert-base-uncased')
43
+
44
+ # Function to generate embeddings using BERT
45
+ def get_bert_embedding(text):
46
+ inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
47
+ with torch.no_grad():
48
+ outputs = model(**inputs)
49
+ return outputs.last_hidden_state.mean(dim=1).numpy()
50
+
51
+ # Create embeddings for course titles
52
+ df['embedding'] = df['title'].apply(lambda x: get_bert_embedding(x))
53
+
54
+ # Function to perform search using BERT-based similarity
55
+ def search_courses(query):
56
+ query_embedding = get_bert_embedding(query)
57
+ course_embeddings = np.vstack(df['embedding'].values)
58
+
59
+ # Compute cosine similarity between query embedding and course embeddings
60
+ similarities = cosine_similarity(query_embedding, course_embeddings).flatten()
61
+
62
+ # Add the similarity scores to the DataFrame
63
+ df['score'] = similarities
64
+
65
+ # Sort by similarity score in descending order and return top results
66
+ top_results = df.sort_values(by='score', ascending=False).head(10)
67
+ return top_results[['title', 'image_url', 'course_link', 'score']].to_dict(orient='records')
68
+
69
+ # Streamlit Interface
70
+ st.title("Analytics Vidhya Smart Course Search")
71
+ st.write("Find the most relevant courses from Analytics Vidhya based on your query.")
72
+
73
+ query = st.text_input("Enter your search query", placeholder="e.g., machine learning, data science, python")
74
+
75
+ if query:
76
+ results = search_courses(query)
77
+ if results:
78
+ for item in results:
79
+ course_title = item['title']
80
+ course_image = item['image_url']
81
+ course_link = item['course_link']
82
+ relevance_score = round(item['score'] * 100, 2)
83
+
84
+ st.image(course_image, width=300)
85
+ st.markdown(f"### [{course_title}]({course_link})")
86
+ st.write(f"Relevance: {relevance_score}%")
87
+ st.markdown("---")
88
+ else:
89
+ st.write("No results found.")