Spaces:

gouravgujariya
/

Web_to_json

Runtime error

App Files Files Community

Web_to_json / app.py

gouravgujariya

Update app.py

a7cdae2 over 1 year ago

raw

history blame contribute delete

1.82 kB

	import streamlit as st
	import requests
	from bs4 import BeautifulSoup
	import base64
	import json

	st.title("Web Scraper with Streamlit")
	st.text("This website is capable to webscrape the html based websites but not for Dynamic based")
	st.text("example:")
	st.text("1. https://books.toscrape.com/")
	st.text("2. http://quotes.toscrape.com")

	# User input for the URL
	url = st.text_input("Enter the URL of the website you want to scrape:")


	# Function to create a download link for a JSON file
	def get_binary_file_downloader_html(json_data, title):
	json_data = json_data.encode()
	b64 = base64.b64encode(json_data).decode()
	href = f'<a href="data:file/json;base64,{b64}" download="{title}">Download JSON</a>'
	return href

	if st.button("Scrape Data"):
	if not url:
	st.error("Please enter a valid URL.")
	else:
	# Send an HTTP GET request to the URL
	response = requests.get(url)

	if response.status_code == 200:
	# Parse the HTML content of the page
	soup = BeautifulSoup(response.text, "html.parser")

	# Extract data from the parsed HTML (e.g., quotes)
	quotes = soup.find_all("span", class_="text")

	# Create a list to store the extracted quotes
	extracted_data = [quote.get_text() for quote in quotes]

	# Generate a JSON file
	json_data = json.dumps(extracted_data, indent=4)

	# Provide a link to download the JSON file
	st.markdown("### Extracted Data (JSON):")
	st.text(json_data)
	st.markdown("### Download JSON")
	st.markdown(get_binary_file_downloader_html(json_data, "extracted_data.json"), unsafe_allow_html=True)
	else:
	st.error("Failed to retrieve the web page. Status code:", response.status_code)