Web_to_json / app.py
gouravgujariya's picture
Update app.py
6ffad4f
raw
history blame
1.62 kB
import streamlit as st
import requests
from bs4 import BeautifulSoup
import base64
import json
st.title("Web Scraper with Streamlit")
# User input for the URL
url = st.text_input("Enter the URL of the website you want to scrape:")
# Function to create a download link for a JSON file
def get_binary_file_downloader_html(json_data, title):
json_data = json_data.encode()
b64 = base64.b64encode(json_data).decode()
href = f'<a href="data:file/json;base64,{b64}" download="{title}">Download JSON</a>'
return href
if st.button("Scrape Data"):
if not url:
st.error("Please enter a valid URL.")
else:
# Send an HTTP GET request to the URL
response = requests.get(url)
if response.status_code == 200:
# Parse the HTML content of the page
soup = BeautifulSoup(response.text, "html.parser")
# Extract data from the parsed HTML (e.g., quotes)
quotes = soup.find_all("span", class_="text")
# Create a list to store the extracted quotes
extracted_data = [quote.get_text() for quote in quotes]
# Generate a JSON file
json_data = json.dumps(extracted_data, indent=4)
# Provide a link to download the JSON file
st.markdown("### Extracted Data (JSON):")
st.text(json_data)
st.markdown("### Download JSON")
st.markdown(get_binary_file_downloader_html(json_data, "extracted_data.json"), unsafe_allow_html=True)
else:
st.error("Failed to retrieve the web page. Status code:", response.status_code)