import streamlit as st import requests from bs4 import BeautifulSoup import base64 import json st.title("Web Scraper with Streamlit") # User input for the URL url = st.text_input("Enter the URL of the website you want to scrape:") # Function to create a download link for a JSON file def get_binary_file_downloader_html(json_data, title): json_data = json_data.encode() b64 = base64.b64encode(json_data).decode() href = f'Download JSON' return href if st.button("Scrape Data"): if not url: st.error("Please enter a valid URL.") else: # Send an HTTP GET request to the URL response = requests.get(url) if response.status_code == 200: # Parse the HTML content of the page soup = BeautifulSoup(response.text, "html.parser") # Extract data from the parsed HTML (e.g., quotes) quotes = soup.find_all("span", class_="text") # Create a list to store the extracted quotes extracted_data = [quote.get_text() for quote in quotes] # Generate a JSON file json_data = json.dumps(extracted_data, indent=4) # Provide a link to download the JSON file st.markdown("### Extracted Data (JSON):") st.text(json_data) st.markdown("### Download JSON") st.markdown(get_binary_file_downloader_html(json_data, "extracted_data.json"), unsafe_allow_html=True) else: st.error("Failed to retrieve the web page. Status code:", response.status_code)