Abinivesh commited on
Commit
66c5462
·
verified ·
1 Parent(s): 322b202

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -89
app.py DELETED
@@ -1,89 +0,0 @@
1
- import requests
2
- from bs4 import BeautifulSoup
3
- import pandas as pd
4
- import streamlit as st
5
- import torch
6
- from transformers import BertTokenizer, BertModel
7
- import numpy as np
8
- from sklearn.metrics.pairwise import cosine_similarity
9
-
10
- # Step 1: Scrape the free courses from Analytics Vidhya
11
- url = "https://courses.analyticsvidhya.com/pages/all-free-courses"
12
- response = requests.get(url)
13
- soup = BeautifulSoup(response.content, 'html.parser')
14
-
15
- courses = []
16
-
17
- # Extracting course title, image, and course link
18
- for course_card in soup.find_all('header', class_='course-card__img-container'):
19
- img_tag = course_card.find('img', class_='course-card__img')
20
-
21
- if img_tag:
22
- title = img_tag.get('alt')
23
- image_url = img_tag.get('src')
24
-
25
- link_tag = course_card.find_previous('a')
26
- if link_tag:
27
- course_link = link_tag.get('href')
28
- if not course_link.startswith('http'):
29
- course_link = 'https://courses.analyticsvidhya.com' + course_link
30
-
31
- courses.append({
32
- 'title': title,
33
- 'image_url': image_url,
34
- 'course_link': course_link
35
- })
36
-
37
- # Step 2: Create DataFrame
38
- df = pd.DataFrame(courses)
39
-
40
- # Load pre-trained BERT model and tokenizer
41
- tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
42
- model = BertModel.from_pretrained('bert-base-uncased')
43
-
44
- # Function to generate embeddings using BERT
45
- def get_bert_embedding(text):
46
- inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
47
- with torch.no_grad():
48
- outputs = model(**inputs)
49
- return outputs.last_hidden_state.mean(dim=1).numpy()
50
-
51
- # Create embeddings for course titles
52
- df['embedding'] = df['title'].apply(lambda x: get_bert_embedding(x))
53
-
54
- # Function to perform search using BERT-based similarity
55
- def search_courses(query):
56
- query_embedding = get_bert_embedding(query)
57
- course_embeddings = np.vstack(df['embedding'].values)
58
-
59
- # Compute cosine similarity between query embedding and course embeddings
60
- similarities = cosine_similarity(query_embedding, course_embeddings).flatten()
61
-
62
- # Add the similarity scores to the DataFrame
63
- df['score'] = similarities
64
-
65
- # Sort by similarity score in descending order and return top results
66
- top_results = df.sort_values(by='score', ascending=False).head(10)
67
- return top_results[['title', 'image_url', 'course_link', 'score']].to_dict(orient='records')
68
-
69
- # Streamlit Interface
70
- st.title("Analytics Vidhya Smart Course Search")
71
- st.write("Find the most relevant courses from Analytics Vidhya based on your query.")
72
-
73
- query = st.text_input("Enter your search query", placeholder="e.g., machine learning, data science, python")
74
-
75
- if query:
76
- results = search_courses(query)
77
- if results:
78
- for item in results:
79
- course_title = item['title']
80
- course_image = item['image_url']
81
- course_link = item['course_link']
82
- relevance_score = round(item['score'] * 100, 2)
83
-
84
- st.image(course_image, width=300)
85
- st.markdown(f"### [{course_title}]({course_link})")
86
- st.write(f"Relevance: {relevance_score}%")
87
- st.markdown("---")
88
- else:
89
- st.write("No results found.")