gouravgujariya commited on
Commit
271db44
·
1 Parent(s): 647110d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -0
app.py CHANGED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import json
5
+
6
+ st.title("Web Scraper with Streamlit")
7
+
8
+ # User input for the URL
9
+ url = st.text_input("Enter the URL of the website you want to scrape:")
10
+
11
+ if st.button("Scrape Data"):
12
+ if not url:
13
+ st.error("Please enter a valid URL.")
14
+ else:
15
+ # Send an HTTP GET request to the URL
16
+ response = requests.get(url)
17
+
18
+ if response.status_code == 200:
19
+ # Parse the HTML content of the page
20
+ soup = BeautifulSoup(response.text, "html.parser")
21
+
22
+ # Extract data from the parsed HTML (e.g., quotes)
23
+ quotes = soup.find_all("span", class_="text")
24
+
25
+ # Create a list to store the extracted quotes
26
+ extracted_data = [quote.get_text() for quote in quotes]
27
+
28
+ # Generate a JSON file
29
+ json_data = json.dumps(extracted_data, indent=4)
30
+
31
+ # Provide a link to download the JSON file
32
+ st.markdown("### Extracted Data (JSON):")
33
+ st.text(json_data)
34
+ st.markdown("### Download JSON")
35
+ st.markdown(get_binary_file_downloader_html(json_data, "extracted_data.json"), unsafe_allow_html=True)
36
+ else:
37
+ st.error("Failed to retrieve the web page. Status code:", response.status_code)
38
+
39
+ # Function to create a download link for a JSON file
40
+ def get_binary_file_downloader_html(json_data, title):
41
+ json_data = json_data.encode()
42
+ b64 = base64.b64encode(json_data).decode()
43
+ href = f'<a href="data:file/json;base64,{b64}" download="{title}">Download JSON</a>'
44
+ return href