File size: 1,608 Bytes
271db44
 
 
 
 
 
 
 
 
 
5a381fc
 
 
 
 
 
 
 
271db44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import streamlit as st
import requests
from bs4 import BeautifulSoup
import json

st.title("Web Scraper with Streamlit")

# User input for the URL
url = st.text_input("Enter the URL of the website you want to scrape:")


# Function to create a download link for a JSON file
def get_binary_file_downloader_html(json_data, title):
    json_data = json_data.encode()
    b64 = base64.b64encode(json_data).decode()
    href = f'<a href="data:file/json;base64,{b64}" download="{title}">Download JSON</a>'
    return href

if st.button("Scrape Data"):
    if not url:
        st.error("Please enter a valid URL.")
    else:
        # Send an HTTP GET request to the URL
        response = requests.get(url)

        if response.status_code == 200:
            # Parse the HTML content of the page
            soup = BeautifulSoup(response.text, "html.parser")

            # Extract data from the parsed HTML (e.g., quotes)
            quotes = soup.find_all("span", class_="text")

            # Create a list to store the extracted quotes
            extracted_data = [quote.get_text() for quote in quotes]

            # Generate a JSON file
            json_data = json.dumps(extracted_data, indent=4)

            # Provide a link to download the JSON file
            st.markdown("### Extracted Data (JSON):")
            st.text(json_data)
            st.markdown("### Download JSON")
            st.markdown(get_binary_file_downloader_html(json_data, "extracted_data.json"), unsafe_allow_html=True)
        else:
            st.error("Failed to retrieve the web page. Status code:", response.status_code)