import requests from bs4 import BeautifulSoup # URL of the page to scrape url = 'https://help.storemate.cloud/docs/' def get_web_data(valid_links): for url in valid_links: # Send a GET request to the URL response = requests.get(url) # Parse the page content with BeautifulSoup soup = BeautifulSoup(response.content, 'html.parser') # Find the title and section content title = soup.find('h1').get_text() # Find the section with the title "Renew Package Subscription" section = soup.find('h1').find_next('div') # Extract the text content from the section section_text = section.get_text().strip() section_text = section_text + f"\nmore detail link : {url}" file = open(f"user_guide/{title}.txt","w") file.write(f"{title}\n{section_text}") file.close() def get_base_links(): # Send a GET request to the URL response = requests.get(url) # Parse the page content with BeautifulSoup soup = BeautifulSoup(response.content, 'html.parser') # Find all tags with href attributes links = soup.find_all('a', href=True) valid_links = [] # Extract and print all the URLs for link in links: if url in str(link): valid_links.append(link['href']) get_web_data(valid_links) return "data updated" def update_user_guide(): get_base_links()