aswin-10 commited on
Commit
b21e81f
·
verified ·
1 Parent(s): c8e3926

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +109 -0
app.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sentence_transformers import SentenceTransformer, util
3
+ from transformers import pipeline
4
+ import torch
5
+ import gradio as gr
6
+ import os
7
+
8
+ # Use the relative path where the CSV is uploaded
9
+ csv_file_path = os.path.join(os.getcwd(), 'Analytics_Vidhya_Free_Course_data.csv')
10
+
11
+ # Load the dataset
12
+ df = pd.read_csv(csv_file_path, encoding='ISO-8859-1')
13
+
14
+ # Load the pre-trained model for embeddings (using SentenceTransformers)
15
+ model = SentenceTransformer('multi-qa-mpnet-base-dot-v1')
16
+
17
+ # Combine title and description to create a full text for each course
18
+ df['full_text'] = df.iloc[:,0] + " " + df.iloc[:,1] + " " + df['Instructor Name'] + " " + str(df['Rating']) + " " + df['Category']
19
+
20
+ # Convert full course texts into embeddings
21
+ course_embeddings = model.encode(df['full_text'].tolist(), convert_to_tensor=True)
22
+
23
+ # Function to expand the query using paraphrasing
24
+ def expand_query(query):
25
+ paraphraser = pipeline('text2text-generation', model='Vamsi/T5_Paraphrase_Paws')
26
+ expanded_queries = paraphraser(query, num_return_sequences=3, max_length=50, do_sample=True)
27
+ return [q['generated_text'] for q in expanded_queries]
28
+
29
+ # Function to search for the most relevant courses
30
+ def search_courses(query, level_filter=None, category_filter=None, top_k=3):
31
+ # Step 1: Expand the query using paraphrasing
32
+ expanded_queries = expand_query(query)
33
+
34
+ # Step 2: Initialize an array to store all similarities
35
+ all_similarities = []
36
+
37
+ for expanded_query in expanded_queries:
38
+ # Convert each expanded query into an embedding
39
+ query_embedding = model.encode(expanded_query, convert_to_tensor=True)
40
+
41
+ # Compute cosine similarities between the query embedding and course embeddings
42
+ similarities = util.pytorch_cos_sim(query_embedding, course_embeddings)[0]
43
+
44
+ # Append to the list of all similarities
45
+ all_similarities.append(similarities)
46
+
47
+ # Step 3: Convert the list of tensors to a single tensor by taking the maximum similarity for each course
48
+ aggregated_similarities = torch.max(torch.stack(all_similarities), dim=0)[0]
49
+
50
+ # Step 4: Apply filters
51
+ filtered_df = df.copy()
52
+ if level_filter:
53
+ filtered_df = filtered_df[filtered_df['Level of Difficulty'] == level_filter]
54
+ if category_filter:
55
+ filtered_df = filtered_df[filtered_df['Category'] == category_filter]
56
+
57
+ if filtered_df.empty:
58
+ return "<p>No matching courses found.</p>"
59
+
60
+ # Recalculate similarities for the filtered data
61
+ filtered_similarities = aggregated_similarities[filtered_df.index]
62
+
63
+ # Step 5: Get top_k most similar courses
64
+ top_results = filtered_similarities.topk(k=min(top_k, len(filtered_similarities)))
65
+
66
+ # Prepare the output as clickable links
67
+ results = []
68
+ for idx in top_results.indices:
69
+ idx = int(idx)
70
+ course_title = filtered_df.iloc[idx]['Course Title']
71
+ course_description = filtered_df.iloc[idx,1]
72
+ course_url = filtered_df.iloc[idx,-1]
73
+
74
+
75
+ # Format the result as a clickable hyperlink using raw HTML
76
+ course_link = f'<a href="{course_url}" target="_blank">{course_title}</a>'
77
+ results.append(f"<strong>{course_link}</strong><br>{course_description}<br><br>")
78
+
79
+ # Combine all results into an HTML formatted list
80
+ return "<ol>" + "".join([f"<li>{result}</li>" for result in results]) + "</ol>"
81
+
82
+ # Create Gradio UI
83
+ def create_gradio_interface():
84
+ with gr.Blocks() as demo:
85
+ gr.Markdown("# Analytics Vidhya Free Courses")
86
+ gr.Markdown("Enter your query and use filters to narrow down the search.")
87
+
88
+ # Input elements
89
+ query = gr.Textbox(label=" Search for a course", placeholder="Enter course topic or description")
90
+
91
+ # Filters (in a collapsible form)
92
+ with gr.Accordion(" Filters", open=False):
93
+ level_filter = gr.Dropdown(choices=["Beginner", "Intermediate", "Advanced"], label=" Course Level", multiselect=False)
94
+ category_filter = gr.Dropdown(choices=["Data Science", "Machine Learning", "Deep Learning", "AI", "NLP"], label=" Category", multiselect=False)
95
+
96
+ # Search button
97
+ search_button = gr.Button("Search")
98
+
99
+ # Output HTML for displaying results
100
+ output = gr.HTML(label="Search Results")
101
+
102
+ # On button click, trigger the search function
103
+ search_button.click(fn=search_courses, inputs=[query, level_filter, category_filter], outputs=output)
104
+
105
+ return demo
106
+
107
+ # Launch Gradio interface
108
+ demo = create_gradio_interface()
109
+ demo.launch(share=True, debug=True)