aswin-10 commited on
Commit
093e116
·
verified ·
1 Parent(s): e2a8154

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -48
app.py CHANGED
@@ -3,107 +3,83 @@ from sentence_transformers import SentenceTransformer, util
3
  from transformers import pipeline
4
  import torch
5
  import gradio as gr
6
- import os
7
-
8
- # Use the relative path where the CSV is uploaded
9
- csv_file_path = os.path.join(os.getcwd(), 'Analytics_Vidhya_Free_Course_data.csv')
10
 
11
  # Load the dataset
 
12
  df = pd.read_csv(csv_file_path, encoding='ISO-8859-1')
 
13
 
14
- # Load the pre-trained model for embeddings (using SentenceTransformers)
15
  model = SentenceTransformer('multi-qa-mpnet-base-dot-v1')
16
 
17
  # Combine title and description to create a full text for each course
18
- df['full_text'] = df.iloc[:,0] + " " + df.iloc[:,1] + " " + df['Instructor Name'] + " " + str(df['Rating']) + " " + df['Category']
19
 
20
  # Convert full course texts into embeddings
21
  course_embeddings = model.encode(df['full_text'].tolist(), convert_to_tensor=True)
22
 
23
- # Function to expand the query using paraphrasing
 
 
24
  def expand_query(query):
25
  paraphraser = pipeline('text2text-generation', model='Vamsi/T5_Paraphrase_Paws')
26
  expanded_queries = paraphraser(query, num_return_sequences=3, max_length=50, do_sample=True)
27
  return [q['generated_text'] for q in expanded_queries]
28
 
29
- # Function to search for the most relevant courses
 
 
 
30
  def search_courses(query, level_filter=None, category_filter=None, top_k=3):
31
- # Step 1: Expand the query using paraphrasing
32
  expanded_queries = expand_query(query)
33
-
34
- # Step 2: Initialize an array to store all similarities
35
  all_similarities = []
36
 
37
  for expanded_query in expanded_queries:
38
- # Convert each expanded query into an embedding
39
  query_embedding = model.encode(expanded_query, convert_to_tensor=True)
40
-
41
- # Compute cosine similarities between the query embedding and course embeddings
42
  similarities = util.pytorch_cos_sim(query_embedding, course_embeddings)[0]
43
-
44
- # Append to the list of all similarities
45
  all_similarities.append(similarities)
46
 
47
- # Step 3: Convert the list of tensors to a single tensor by taking the maximum similarity for each course
48
  aggregated_similarities = torch.max(torch.stack(all_similarities), dim=0)[0]
49
-
50
- # Step 4: Apply filters
51
  filtered_df = df.copy()
52
- if level_filter:
 
53
  filtered_df = filtered_df[filtered_df['Level of Difficulty'] == level_filter]
54
- if category_filter:
55
  filtered_df = filtered_df[filtered_df['Category'] == category_filter]
56
-
57
  if filtered_df.empty:
58
  return "<p>No matching courses found.</p>"
59
-
60
- # Recalculate similarities for the filtered data
61
- filtered_similarities = aggregated_similarities[filtered_df.index]
62
 
63
- # Step 5: Get top_k most similar courses
64
  top_results = filtered_similarities.topk(k=min(top_k, len(filtered_similarities)))
65
 
66
- # Prepare the output as clickable links
67
  results = []
68
  for idx in top_results.indices:
69
  idx = int(idx)
70
  course_title = filtered_df.iloc[idx]['Course Title']
71
- course_description = filtered_df.iloc[idx,1]
72
- course_url = filtered_df.iloc[idx,-1]
73
-
74
-
75
- # Format the result as a clickable hyperlink using raw HTML
76
  course_link = f'<a href="{course_url}" target="_blank">{course_title}</a>'
77
- results.append(f"<strong>{course_link}</strong><br>{course_description}<br><br>")
78
 
79
- # Combine all results into an HTML formatted list
80
  return "<ol>" + "".join([f"<li>{result}</li>" for result in results]) + "</ol>"
81
 
82
- # Create Gradio UI
83
  def create_gradio_interface():
84
  with gr.Blocks() as demo:
85
  gr.Markdown("# Analytics Vidhya Free Courses")
86
  gr.Markdown("Enter your query and use filters to narrow down the search.")
87
-
88
- # Input elements
89
  query = gr.Textbox(label=" Search for a course", placeholder="Enter course topic or description")
90
-
91
- # Filters (in a collapsible form)
92
  with gr.Accordion(" Filters", open=False):
93
- level_filter = gr.Dropdown(choices=["Beginner", "Intermediate", "Advanced"], label=" Course Level", multiselect=False)
94
- category_filter = gr.Dropdown(choices=["Data Science", "Machine Learning", "Deep Learning", "AI", "NLP"], label=" Category", multiselect=False)
95
-
96
- # Search button
97
  search_button = gr.Button("Search")
98
-
99
- # Output HTML for displaying results
100
  output = gr.HTML(label="Search Results")
101
-
102
- # On button click, trigger the search function
103
  search_button.click(fn=search_courses, inputs=[query, level_filter, category_filter], outputs=output)
104
 
105
  return demo
106
 
107
  # Launch Gradio interface
108
  demo = create_gradio_interface()
109
- demo.launch(share=True, debug=True)
 
3
  from transformers import pipeline
4
  import torch
5
  import gradio as gr
6
+ import os
 
 
 
7
 
8
  # Load the dataset
9
+ csv_file_path = os.path.join(os.getcwd(), 'Analytics_Vidhya_Free_Course_data.csv')
10
  df = pd.read_csv(csv_file_path, encoding='ISO-8859-1')
11
+ df.fillna('', inplace=True)
12
 
13
+ # Load the pre-trained model for embeddings
14
  model = SentenceTransformer('multi-qa-mpnet-base-dot-v1')
15
 
16
  # Combine title and description to create a full text for each course
17
+ df['full_text'] = df.iloc[:, 0] + " " + df.iloc[:, 1] + " " + df['Instructor Name'] + " " + df['Rating'].astype(str) + " " + df['Category']
18
 
19
  # Convert full course texts into embeddings
20
  course_embeddings = model.encode(df['full_text'].tolist(), convert_to_tensor=True)
21
 
22
+ # Load a model for text generation (e.g., BART)
23
+ generator = pipeline('text2text-generation', model='facebook/bart-large-cnn')
24
+
25
  def expand_query(query):
26
  paraphraser = pipeline('text2text-generation', model='Vamsi/T5_Paraphrase_Paws')
27
  expanded_queries = paraphraser(query, num_return_sequences=3, max_length=50, do_sample=True)
28
  return [q['generated_text'] for q in expanded_queries]
29
 
30
+ def generate_description(query):
31
+ response = generator(query, max_length=100, num_return_sequences=1)
32
+ return response[0]['generated_text']
33
+
34
  def search_courses(query, level_filter=None, category_filter=None, top_k=3):
 
35
  expanded_queries = expand_query(query)
 
 
36
  all_similarities = []
37
 
38
  for expanded_query in expanded_queries:
 
39
  query_embedding = model.encode(expanded_query, convert_to_tensor=True)
 
 
40
  similarities = util.pytorch_cos_sim(query_embedding, course_embeddings)[0]
 
 
41
  all_similarities.append(similarities)
42
 
 
43
  aggregated_similarities = torch.max(torch.stack(all_similarities), dim=0)[0]
 
 
44
  filtered_df = df.copy()
45
+
46
+ if level_filter and level_filter != "Nil":
47
  filtered_df = filtered_df[filtered_df['Level of Difficulty'] == level_filter]
48
+ if category_filter and category_filter != "NIL":
49
  filtered_df = filtered_df[filtered_df['Category'] == category_filter]
50
+
51
  if filtered_df.empty:
52
  return "<p>No matching courses found.</p>"
 
 
 
53
 
54
+ filtered_similarities = aggregated_similarities[filtered_df.index]
55
  top_results = filtered_similarities.topk(k=min(top_k, len(filtered_similarities)))
56
 
 
57
  results = []
58
  for idx in top_results.indices:
59
  idx = int(idx)
60
  course_title = filtered_df.iloc[idx]['Course Title']
61
+ course_description = filtered_df.iloc[idx, 1]
62
+ course_url = filtered_df.iloc[idx, -1]
63
+ generated_description = generate_description(course_title + " " + course_description)
 
 
64
  course_link = f'<a href="{course_url}" target="_blank">{course_title}</a>'
65
+ results.append(f"<strong>{course_link}</strong><br>{course_description}<br>{generated_description}<br><br>")
66
 
 
67
  return "<ol>" + "".join([f"<li>{result}</li>" for result in results]) + "</ol>"
68
 
 
69
  def create_gradio_interface():
70
  with gr.Blocks() as demo:
71
  gr.Markdown("# Analytics Vidhya Free Courses")
72
  gr.Markdown("Enter your query and use filters to narrow down the search.")
 
 
73
  query = gr.Textbox(label=" Search for a course", placeholder="Enter course topic or description")
 
 
74
  with gr.Accordion(" Filters", open=False):
75
+ level_filter = gr.Dropdown(choices=["Beginner", "Intermediate", "Advanced", "Nil"], label=" Course Level", multiselect=False)
76
+ category_filter = gr.Dropdown(choices=["Data Science", "Machine Learning", "Deep Learning", "AI", "NLP", "NIL"], label=" Category", multiselect=False)
 
 
77
  search_button = gr.Button("Search")
 
 
78
  output = gr.HTML(label="Search Results")
 
 
79
  search_button.click(fn=search_courses, inputs=[query, level_filter, category_filter], outputs=output)
80
 
81
  return demo
82
 
83
  # Launch Gradio interface
84
  demo = create_gradio_interface()
85
+ demo.launch()