blazingbunny's picture
Update app.py
a82d9f3 verified
raw
history blame
2.37 kB
import nest_asyncio
import json
import streamlit as st # This import is necessary to access secrets and for the web interface
from selenium import webdriver
from chromedriver_binary import add_chromedriver_to_path
from scrapegraphai.graphs import SearchGraph
import subprocess # Import the subprocess module
# Apply necessary settings for asyncio compatibility in Jupyter/Streamlit environments
nest_asyncio.apply()
# Attempt to install Playwright browsers
try:
subprocess.run(["playwright", "install"], check=True)
except subprocess.CalledProcessError as e:
st.error("Failed to install Playwright: {}".format(e))
# Apply necessary settings for asyncio compatibility in Jupyter/Streamlit environments
nest_asyncio.apply()
# Access your API keys securely
OPENAI_API_KEY = st.secrets["OPENAI_API_KEY"]
def setup_selenium():
add_chromedriver_to_path()
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
driver = webdriver.Chrome(options=options)
return driver
def get_web_page_title(url):
driver = setup_selenium()
driver.get(url)
title = driver.title
driver.quit()
return title
st.title("Hybrid AI and Web Scraping Application")
url = st.text_input("Enter the URL to scrape for title")
if st.button("Scrape for Title"):
title = get_web_page_title(url)
st.write(f"The title of the page is: {title}")
query_prompt = st.text_input("Enter your AI query", value="List me all the attributes of 'cannabis strain'.")
if st.button("Fetch Data from AI"):
# Define the configuration for the graph based on user input
graph_config = {
"llm": {
"api_key": OPENAI_API_KEY,
"model": "gpt-3.5-turbo",
"temperature": 0,
},
}
# Create the SearchGraph instance dynamically
search_graph = SearchGraph(prompt=query_prompt, config=graph_config)
try:
# Run the graph to fetch results
result = search_graph.run()
# Convert the result to a JSON string with indentation for better readability
output = json.dumps(result, indent=2)
# Display each line of the JSON output
st.text_area("Result", value=output, height=300)
except Exception as e:
st.error(f"An error occurred: {e}")