Abs6187 commited on
Commit
0300fda
·
verified ·
1 Parent(s): e28668f

Upload 7 files

Browse files
Files changed (7) hide show
  1. analytics_vidhya_courses_Final.xlsx +0 -0
  2. app.py +104 -0
  3. course_emb.pkl +3 -0
  4. gitattributes +35 -0
  5. main.py +73 -0
  6. requirements.txt +9 -0
  7. search.py +28 -0
analytics_vidhya_courses_Final.xlsx ADDED
Binary file (64.1 kB). View file
 
app.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pickle
3
+ import pandas as pd
4
+ from sentence_transformers import SentenceTransformer
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+
7
+ # Load model and data
8
+ with open("course_emb.pkl", "rb") as f:
9
+ course_emb = pickle.load(f)
10
+
11
+ df = pd.read_excel("analytics_vidhya_courses_Final.xlsx")
12
+ model = SentenceTransformer('all-MiniLM-L6-v2')
13
+
14
+ def search_courses(query, top_n=5):
15
+ if not query.strip():
16
+ return "Please enter a search query."
17
+
18
+ query_embedding = model.encode([query])
19
+ similarities = cosine_similarity(query_embedding, course_emb)
20
+ top_n_idx = similarities[0].argsort()[-top_n:][::-1]
21
+
22
+ results = []
23
+ for idx in top_n_idx:
24
+ course = df.iloc[idx]
25
+ results.append({
26
+ "title": course["Course Title"],
27
+ "description": course["Course Description"],
28
+ "similarity": float(similarities[0][idx])
29
+ })
30
+ return results
31
+
32
+ def gradio_interface(query):
33
+ results = search_courses(query)
34
+ if isinstance(results, str):
35
+ return results
36
+
37
+ # Format results as HTML with updated styling
38
+ html_output = "<div style='font-family: Inter, sans-serif;'>"
39
+
40
+ for i, course in enumerate(results, 1):
41
+ relevance = int(course['similarity'] * 100)
42
+ html_output += f"""
43
+ <div style='background: #f8f9fa; padding: 20px; margin: 15px 0; border-radius: 12px; box-shadow: 0 2px 6px rgba(0,0,0,0.05);'>
44
+ <h3 style='color: #1a237e; margin: 0 0 12px 0; font-weight: 600;'>#{i}. {course['title']}</h3>
45
+ <div style='color: #3949ab; font-size: 0.9em; margin-bottom: 10px; font-weight: 500;'>Match Score: {relevance}%</div>
46
+ <p style='color: #424242; margin: 0; line-height: 1.6;'>{course['description']}</p>
47
+ </div>
48
+ """
49
+
50
+ html_output += "</div>"
51
+ return html_output
52
+
53
+ # Create Gradio interface with improved styling
54
+ css = """
55
+ .gradio-container {
56
+ font-family: 'Inter', sans-serif;
57
+ }
58
+ .gradio-button {
59
+ background: linear-gradient(135deg, #3949ab, #1a237e) !important;
60
+ }
61
+ .gradio-button:hover {
62
+ background: linear-gradient(135deg, #1a237e, #3949ab) !important;
63
+ }
64
+ """
65
+
66
+ with gr.Blocks(css=css, theme="soft") as iface:
67
+ gr.Markdown(
68
+ """
69
+ # 😻 Smart Learning Pathfinder
70
+ Unlock your learning potential with AI-powered course recommendations tailored just for you!
71
+ """
72
+ )
73
+
74
+ with gr.Row():
75
+ query_input = gr.Textbox(
76
+ label="What would you like to master?",
77
+ placeholder="Tell us your learning interests (e.g., 'AI fundamentals' or 'data science for beginners')",
78
+ scale=4
79
+ )
80
+
81
+ with gr.Row():
82
+ search_button = gr.Button("✨ Discover Courses", variant="primary")
83
+
84
+ with gr.Row():
85
+ output = gr.HTML(label="Personalized Recommendations")
86
+
87
+ search_button.click(
88
+ fn=gradio_interface,
89
+ inputs=query_input,
90
+ outputs=output,
91
+ )
92
+
93
+ gr.Markdown(
94
+ """
95
+ ### 💡 Optimization Tips:
96
+ - Share your current knowledge level
97
+ - Mention specific skills you want to develop
98
+ - Include your learning preferences
99
+ - Specify your target outcomes
100
+ """
101
+ )
102
+
103
+ # Launch the interface
104
+ iface.launch(share=True)
course_emb.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50af6e24422db342f6af21bbb5f495590a2cedb6f5cae013bd41ce5a64177c84
3
+ size 100003
gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
main.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ import pandas as pd
4
+ import time
5
+
6
+ base_url = "https://courses.analyticsvidhya.com/collections?page="
7
+ course_url_base = "https://courses.analyticsvidhya.com"
8
+
9
+ course_data = []
10
+
11
+ for page in range(1,9):
12
+ print(f"Scraping page {page}...")
13
+ response = requests.get(base_url + str(page))
14
+ soup = BeautifulSoup(response.text, 'html.parser')
15
+
16
+ course_section = soup.find_all('div', class_="collections__product-cards collections__product-cards___0b9ab")
17
+ if not course_section:
18
+ print("No course section found, skipping this page.")
19
+ continue
20
+
21
+ courses = course_section[0].find_all('li')
22
+
23
+ for course in courses:
24
+ link_tag = course.find('a', href=True)
25
+ if not link_tag:
26
+ continue
27
+ course_relative_link = link_tag['href']
28
+ course_link = course_url_base + course_relative_link
29
+
30
+ course_response = requests.get(course_link)
31
+ course_soup = BeautifulSoup(course_response.text, 'html.parser')
32
+
33
+ title_tag = course_soup.find('h1', class_="section__heading")
34
+ if title_tag:
35
+ course_title = title_tag.get_text(strip=True)
36
+ else:
37
+ course_title = "N/A"
38
+
39
+ description_tag = course_soup.find_all('div', class_="rich-text__container")
40
+ course_description = " ".join([p.get_text(strip=True) for tag in description_tag for p in tag.find_all('p')]) if description_tag else "N/A"
41
+
42
+ curriculum_section = course_soup.find('div', class_="course-curriculum__container")
43
+ if curriculum_section:
44
+ curriculum_content = []
45
+
46
+ chapters = curriculum_section.find_all('li', class_="course-curriculum__chapter")
47
+ for chapter in chapters:
48
+ title = chapter.find('h5', class_="course-curriculum__chapter-title")
49
+ if title:
50
+ curriculum_content.append(title.get_text(strip=True))
51
+
52
+ chapter_content = chapter.find('ul', class_="course-curriculum__chapter-content")
53
+ if chapter_content:
54
+ curriculum_content.extend(
55
+ [f" - {item.get_text(strip=True)}" for item in chapter_content.find_all('li')]
56
+ )
57
+ course_curriculum = "\n".join(curriculum_content) if curriculum_content else "N/A"
58
+ else:
59
+ course_curriculum = "N/A"
60
+
61
+ course_data.append({
62
+ "Course Title": course_title,
63
+ "Course Description": course_description,
64
+ "Course Curriculum": course_curriculum,
65
+ "Link": course_link
66
+ })
67
+
68
+ time.sleep(1)
69
+
70
+ df = pd.DataFrame(course_data)
71
+ file_path = r"C:\Users\rachi\OneDrive\Desktop\Analytics VIdya - Gen AI\analytics_vidhya_courses.xlsx"
72
+ df.to_excel(file_path, index=False)
73
+ print(f"Data saved to {file_path}")
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ requests>=2.31.0
2
+ beautifulsoup4>=4.12.0
3
+ pandas>=2.0.0
4
+ openpyxl>=3.1.2
5
+ sentence-transformers>=2.2.2
6
+ torch>=2.0.0
7
+ gradio>=5.9.1
8
+ numpy>=1.24.0
9
+ tqdm>=4.65.0
search.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import pandas as pd
3
+ from sentence_transformers import SentenceTransformer
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+
6
+ with open("course_emb.pkl", "rb") as f:
7
+ course_emb = pickle.load(f)
8
+
9
+ df = pd.read_excel("analytics_vidhya_courses_Final.xlsx")
10
+
11
+ model = SentenceTransformer('all-MiniLM-L6-v2')
12
+
13
+ def search_courses(query, top_n=5):
14
+ query_embedding = model.encode([query])
15
+
16
+ similarities = cosine_similarity(query_embedding, course_emb)
17
+
18
+ top_n_idx = similarities[0].argsort()[-top_n:][::-1]
19
+
20
+ return df.iloc[top_n_idx][["Course Title", "Course Description"]]
21
+
22
+ query = input("Enter your search query: ")
23
+ top_courses = search_courses(query)
24
+
25
+ print("\nTop relevant courses:")
26
+ for idx, row in top_courses.iterrows():
27
+ print(f"Title: {row['Course Title']}")
28
+ print(f"Description: {row['Course Description']}\n")