Arafath10 commited on
Commit
0e27472
·
verified ·
1 Parent(s): 875ce0c

Create user_guide_sync.py

Browse files
Files changed (1) hide show
  1. user_guide_sync.py +48 -0
user_guide_sync.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+
4
+ # URL of the page to scrape
5
+ url = 'https://help.storemate.cloud/docs/'
6
+
7
+
8
+ def get_web_data(valid_links):
9
+ for url in valid_links:
10
+ # Send a GET request to the URL
11
+ response = requests.get(url)
12
+
13
+ # Parse the page content with BeautifulSoup
14
+ soup = BeautifulSoup(response.content, 'html.parser')
15
+
16
+ # Find the title and section content
17
+ title = soup.find('h1').get_text()
18
+
19
+ # Find the section with the title "Renew Package Subscription"
20
+ section = soup.find('h1').find_next('div')
21
+ # Extract the text content from the section
22
+ section_text = section.get_text().strip()
23
+ section_text = section_text + f"\nmore detail link : {url}"
24
+
25
+ file = open(f"{title}.txt","w")
26
+ file.write(f"{title}\n{section_text}")
27
+
28
+
29
+ def get_base_links():
30
+ # Send a GET request to the URL
31
+ response = requests.get(url)
32
+
33
+ # Parse the page content with BeautifulSoup
34
+ soup = BeautifulSoup(response.content, 'html.parser')
35
+
36
+ # Find all <a> tags with href attributes
37
+ links = soup.find_all('a', href=True)
38
+
39
+ valid_links = []
40
+ # Extract and print all the URLs
41
+ for link in links:
42
+ if url in str(link):
43
+ valid_links.append(link['href'])
44
+
45
+ get_web_data(valid_links)
46
+
47
+ return "data updated"
48
+