Nattyboi commited on
Commit
e295414
·
1 Parent(s): 5c6cdb1

added provider

Browse files
Files changed (1) hide show
  1. app.py +16 -0
app.py CHANGED
@@ -66,7 +66,21 @@ def extract_course_info(text: str) -> CourseRecommendation:
66
  return CourseRecommendation(courseName=coursename, completionTime=completiontime)
67
 
68
 
 
 
69
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
  @app.get("/courses",tags=["Scrape"])
72
  def get_course(query):
@@ -79,11 +93,13 @@ def get_course(query):
79
  title = item.get('title')
80
  link = item.get('link')
81
  snippet = item.get('snippet')
 
82
  content_structure={}
83
 
84
  content_structure["courseTitle"]=title
85
  content_structure["courseLink"]=link
86
  content_structure["courseSnippet"]= snippet
 
87
  content_structure["scrapedCourseDetails"]= scrapeCourse(url=link)
88
  content.append(content_structure)
89
 
 
66
  return CourseRecommendation(courseName=coursename, completionTime=completiontime)
67
 
68
 
69
+ import re
70
+ from urllib.parse import urlparse
71
 
72
+ def extract_provider(url):
73
+ # Parse the URL
74
+ parsed_url = urlparse(url)
75
+
76
+ # Extract domain and split it to get the main part
77
+ domain = parsed_url.netloc.split('.')[0]
78
+
79
+ # Extract course name
80
+ match = re.search(r'/course/([^/]+)/', url)
81
+ course_name = match.group(1) if match else "Not found"
82
+
83
+ return domain
84
 
85
  @app.get("/courses",tags=["Scrape"])
86
  def get_course(query):
 
93
  title = item.get('title')
94
  link = item.get('link')
95
  snippet = item.get('snippet')
96
+ provider = extract_provider(link)
97
  content_structure={}
98
 
99
  content_structure["courseTitle"]=title
100
  content_structure["courseLink"]=link
101
  content_structure["courseSnippet"]= snippet
102
+ content_structure["provider"]= provider
103
  content_structure["scrapedCourseDetails"]= scrapeCourse(url=link)
104
  content.append(content_structure)
105