Spaces:

Arhashmi
/

scraping_project

Runtime error

Arhashmi commited on Jan 8, 2024

Commit

fc2385b

1 Parent(s): efc3d79

Upload 4 files

Files changed (4) hide show

app.py ADDED Viewed

+import streamlit as st
+from scrape import scrape_data_from_url
+import response
+data = "No data"
+url = ""
+def main():
+    global data
+    global url
+    st.title("Web Scraping and Chat App")
+    url = st.text_input("Enter URL:")
+    if st.button("Scrape Data"):
+        data = scrape_data_from_url(url)
+        st.success("Data scraped successfully!")
+    st.subheader("Scraped Data:")
+    st.write(data)
+    user_input = st.text_input("Enter your message:")
+    if st.button("Send"):
+        bot_response = response.get_response(user_input, data)
+        st.success("Bot Response:")
+        st.write(bot_response)
+if __name__ == '__main__':
+    main()

requirements.txt ADDED Viewed

+beautifulsoup4==4.12.2
+streamlit
+blinker==1.6.2
+certifi==2023.7.22
+charset-normalizer==3.2.0
+click==8.1.6
+colorama==0.4.6
+Flask==2.3.2
+idna==3.4
+itsdangerous==2.1.2
+Jinja2==3.1.2
+MarkupSafe==2.1.3
+requests==2.31.0
+soupsieve==2.4.1
+urllib3==2.0.4
+Werkzeug==2.3.6

response.py ADDED Viewed

+def get_response(user_input: str, scraped_data: str) -> str:
+    user_input_lower = user_input.lower()
+    scraped_data_lower = scraped_data.lower()
+    print("User Input:", user_input_lower)
+    print("Scraped Data:", scraped_data_lower)
+    if "birthplace" in user_input_lower and "imran khan" in scraped_data_lower:
+        return "Imran Khan was born in Lahore, Pakistan."
+    else:
+        return "I'm sorry, I couldn't find an answer based on the provided data."

scrape.py ADDED Viewed

+import requests
+from bs4 import BeautifulSoup
+def scrape_data_from_url(url):
+    try:
+        response = requests.get(url)
+        response.raise_for_status()  # Check if the request was successful
+        soup = BeautifulSoup(response.text, 'html.parser')
+        # Find all <p> tags and extract their text content
+        paragraphs = soup.find_all('p')
+        # Combine the text content from all paragraphs into a single string
+        data = "\n".join(paragraph.get_text() for paragraph in paragraphs)
+        return data
+    except requests.exceptions.RequestException as e:
+        return f"Error fetching data: {str(e)}"