Spaces:
Sleeping
Sleeping
Upload 7 files
Browse files- analytics_vidhya_courses_Final.xlsx +0 -0
- app.py +104 -0
- course_emb.pkl +3 -0
- gitattributes +35 -0
- main.py +73 -0
- requirements.txt +9 -0
- search.py +28 -0
analytics_vidhya_courses_Final.xlsx
ADDED
Binary file (64.1 kB). View file
|
|
app.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pickle
|
3 |
+
import pandas as pd
|
4 |
+
from sentence_transformers import SentenceTransformer
|
5 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
6 |
+
|
7 |
+
# Load model and data
|
8 |
+
with open("course_emb.pkl", "rb") as f:
|
9 |
+
course_emb = pickle.load(f)
|
10 |
+
|
11 |
+
df = pd.read_excel("analytics_vidhya_courses_Final.xlsx")
|
12 |
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
13 |
+
|
14 |
+
def search_courses(query, top_n=5):
|
15 |
+
if not query.strip():
|
16 |
+
return "Please enter a search query."
|
17 |
+
|
18 |
+
query_embedding = model.encode([query])
|
19 |
+
similarities = cosine_similarity(query_embedding, course_emb)
|
20 |
+
top_n_idx = similarities[0].argsort()[-top_n:][::-1]
|
21 |
+
|
22 |
+
results = []
|
23 |
+
for idx in top_n_idx:
|
24 |
+
course = df.iloc[idx]
|
25 |
+
results.append({
|
26 |
+
"title": course["Course Title"],
|
27 |
+
"description": course["Course Description"],
|
28 |
+
"similarity": float(similarities[0][idx])
|
29 |
+
})
|
30 |
+
return results
|
31 |
+
|
32 |
+
def gradio_interface(query):
|
33 |
+
results = search_courses(query)
|
34 |
+
if isinstance(results, str):
|
35 |
+
return results
|
36 |
+
|
37 |
+
# Format results as HTML with updated styling
|
38 |
+
html_output = "<div style='font-family: Inter, sans-serif;'>"
|
39 |
+
|
40 |
+
for i, course in enumerate(results, 1):
|
41 |
+
relevance = int(course['similarity'] * 100)
|
42 |
+
html_output += f"""
|
43 |
+
<div style='background: #f8f9fa; padding: 20px; margin: 15px 0; border-radius: 12px; box-shadow: 0 2px 6px rgba(0,0,0,0.05);'>
|
44 |
+
<h3 style='color: #1a237e; margin: 0 0 12px 0; font-weight: 600;'>#{i}. {course['title']}</h3>
|
45 |
+
<div style='color: #3949ab; font-size: 0.9em; margin-bottom: 10px; font-weight: 500;'>Match Score: {relevance}%</div>
|
46 |
+
<p style='color: #424242; margin: 0; line-height: 1.6;'>{course['description']}</p>
|
47 |
+
</div>
|
48 |
+
"""
|
49 |
+
|
50 |
+
html_output += "</div>"
|
51 |
+
return html_output
|
52 |
+
|
53 |
+
# Create Gradio interface with improved styling
|
54 |
+
css = """
|
55 |
+
.gradio-container {
|
56 |
+
font-family: 'Inter', sans-serif;
|
57 |
+
}
|
58 |
+
.gradio-button {
|
59 |
+
background: linear-gradient(135deg, #3949ab, #1a237e) !important;
|
60 |
+
}
|
61 |
+
.gradio-button:hover {
|
62 |
+
background: linear-gradient(135deg, #1a237e, #3949ab) !important;
|
63 |
+
}
|
64 |
+
"""
|
65 |
+
|
66 |
+
with gr.Blocks(css=css, theme="soft") as iface:
|
67 |
+
gr.Markdown(
|
68 |
+
"""
|
69 |
+
# 😻 Smart Learning Pathfinder
|
70 |
+
Unlock your learning potential with AI-powered course recommendations tailored just for you!
|
71 |
+
"""
|
72 |
+
)
|
73 |
+
|
74 |
+
with gr.Row():
|
75 |
+
query_input = gr.Textbox(
|
76 |
+
label="What would you like to master?",
|
77 |
+
placeholder="Tell us your learning interests (e.g., 'AI fundamentals' or 'data science for beginners')",
|
78 |
+
scale=4
|
79 |
+
)
|
80 |
+
|
81 |
+
with gr.Row():
|
82 |
+
search_button = gr.Button("✨ Discover Courses", variant="primary")
|
83 |
+
|
84 |
+
with gr.Row():
|
85 |
+
output = gr.HTML(label="Personalized Recommendations")
|
86 |
+
|
87 |
+
search_button.click(
|
88 |
+
fn=gradio_interface,
|
89 |
+
inputs=query_input,
|
90 |
+
outputs=output,
|
91 |
+
)
|
92 |
+
|
93 |
+
gr.Markdown(
|
94 |
+
"""
|
95 |
+
### 💡 Optimization Tips:
|
96 |
+
- Share your current knowledge level
|
97 |
+
- Mention specific skills you want to develop
|
98 |
+
- Include your learning preferences
|
99 |
+
- Specify your target outcomes
|
100 |
+
"""
|
101 |
+
)
|
102 |
+
|
103 |
+
# Launch the interface
|
104 |
+
iface.launch(share=True)
|
course_emb.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:50af6e24422db342f6af21bbb5f495590a2cedb6f5cae013bd41ce5a64177c84
|
3 |
+
size 100003
|
gitattributes
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
main.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
from bs4 import BeautifulSoup
|
3 |
+
import pandas as pd
|
4 |
+
import time
|
5 |
+
|
6 |
+
base_url = "https://courses.analyticsvidhya.com/collections?page="
|
7 |
+
course_url_base = "https://courses.analyticsvidhya.com"
|
8 |
+
|
9 |
+
course_data = []
|
10 |
+
|
11 |
+
for page in range(1,9):
|
12 |
+
print(f"Scraping page {page}...")
|
13 |
+
response = requests.get(base_url + str(page))
|
14 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
15 |
+
|
16 |
+
course_section = soup.find_all('div', class_="collections__product-cards collections__product-cards___0b9ab")
|
17 |
+
if not course_section:
|
18 |
+
print("No course section found, skipping this page.")
|
19 |
+
continue
|
20 |
+
|
21 |
+
courses = course_section[0].find_all('li')
|
22 |
+
|
23 |
+
for course in courses:
|
24 |
+
link_tag = course.find('a', href=True)
|
25 |
+
if not link_tag:
|
26 |
+
continue
|
27 |
+
course_relative_link = link_tag['href']
|
28 |
+
course_link = course_url_base + course_relative_link
|
29 |
+
|
30 |
+
course_response = requests.get(course_link)
|
31 |
+
course_soup = BeautifulSoup(course_response.text, 'html.parser')
|
32 |
+
|
33 |
+
title_tag = course_soup.find('h1', class_="section__heading")
|
34 |
+
if title_tag:
|
35 |
+
course_title = title_tag.get_text(strip=True)
|
36 |
+
else:
|
37 |
+
course_title = "N/A"
|
38 |
+
|
39 |
+
description_tag = course_soup.find_all('div', class_="rich-text__container")
|
40 |
+
course_description = " ".join([p.get_text(strip=True) for tag in description_tag for p in tag.find_all('p')]) if description_tag else "N/A"
|
41 |
+
|
42 |
+
curriculum_section = course_soup.find('div', class_="course-curriculum__container")
|
43 |
+
if curriculum_section:
|
44 |
+
curriculum_content = []
|
45 |
+
|
46 |
+
chapters = curriculum_section.find_all('li', class_="course-curriculum__chapter")
|
47 |
+
for chapter in chapters:
|
48 |
+
title = chapter.find('h5', class_="course-curriculum__chapter-title")
|
49 |
+
if title:
|
50 |
+
curriculum_content.append(title.get_text(strip=True))
|
51 |
+
|
52 |
+
chapter_content = chapter.find('ul', class_="course-curriculum__chapter-content")
|
53 |
+
if chapter_content:
|
54 |
+
curriculum_content.extend(
|
55 |
+
[f" - {item.get_text(strip=True)}" for item in chapter_content.find_all('li')]
|
56 |
+
)
|
57 |
+
course_curriculum = "\n".join(curriculum_content) if curriculum_content else "N/A"
|
58 |
+
else:
|
59 |
+
course_curriculum = "N/A"
|
60 |
+
|
61 |
+
course_data.append({
|
62 |
+
"Course Title": course_title,
|
63 |
+
"Course Description": course_description,
|
64 |
+
"Course Curriculum": course_curriculum,
|
65 |
+
"Link": course_link
|
66 |
+
})
|
67 |
+
|
68 |
+
time.sleep(1)
|
69 |
+
|
70 |
+
df = pd.DataFrame(course_data)
|
71 |
+
file_path = r"C:\Users\rachi\OneDrive\Desktop\Analytics VIdya - Gen AI\analytics_vidhya_courses.xlsx"
|
72 |
+
df.to_excel(file_path, index=False)
|
73 |
+
print(f"Data saved to {file_path}")
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
requests>=2.31.0
|
2 |
+
beautifulsoup4>=4.12.0
|
3 |
+
pandas>=2.0.0
|
4 |
+
openpyxl>=3.1.2
|
5 |
+
sentence-transformers>=2.2.2
|
6 |
+
torch>=2.0.0
|
7 |
+
gradio>=5.9.1
|
8 |
+
numpy>=1.24.0
|
9 |
+
tqdm>=4.65.0
|
search.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pickle
|
2 |
+
import pandas as pd
|
3 |
+
from sentence_transformers import SentenceTransformer
|
4 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
5 |
+
|
6 |
+
with open("course_emb.pkl", "rb") as f:
|
7 |
+
course_emb = pickle.load(f)
|
8 |
+
|
9 |
+
df = pd.read_excel("analytics_vidhya_courses_Final.xlsx")
|
10 |
+
|
11 |
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
12 |
+
|
13 |
+
def search_courses(query, top_n=5):
|
14 |
+
query_embedding = model.encode([query])
|
15 |
+
|
16 |
+
similarities = cosine_similarity(query_embedding, course_emb)
|
17 |
+
|
18 |
+
top_n_idx = similarities[0].argsort()[-top_n:][::-1]
|
19 |
+
|
20 |
+
return df.iloc[top_n_idx][["Course Title", "Course Description"]]
|
21 |
+
|
22 |
+
query = input("Enter your search query: ")
|
23 |
+
top_courses = search_courses(query)
|
24 |
+
|
25 |
+
print("\nTop relevant courses:")
|
26 |
+
for idx, row in top_courses.iterrows():
|
27 |
+
print(f"Title: {row['Course Title']}")
|
28 |
+
print(f"Description: {row['Course Description']}\n")
|