import nest_asyncio import json import streamlit as st # This import is necessary to access secrets and for the web interface from selenium import webdriver from chromedriver_binary import add_chromedriver_to_path from scrapegraphai.graphs import SearchGraph playwright install # Apply necessary settings for asyncio compatibility in Jupyter/Streamlit environments nest_asyncio.apply() # Access your API keys securely OPENAI_API_KEY = st.secrets["OPENAI_API_KEY"] def setup_selenium(): add_chromedriver_to_path() options = webdriver.ChromeOptions() options.add_argument("--headless") options.add_argument("--no-sandbox") options.add_argument("--disable-dev-shm-usage") driver = webdriver.Chrome(options=options) return driver def get_web_page_title(url): driver = setup_selenium() driver.get(url) title = driver.title driver.quit() return title st.title("Hybrid AI and Web Scraping Application") url = st.text_input("Enter the URL to scrape for title") if st.button("Scrape for Title"): title = get_web_page_title(url) st.write(f"The title of the page is: {title}") query_prompt = st.text_input("Enter your AI query", value="List me all the attributes of 'cannabis strain'.") if st.button("Fetch Data from AI"): # Define the configuration for the graph based on user input graph_config = { "llm": { "api_key": OPENAI_API_KEY, "model": "gpt-3.5-turbo", "temperature": 0, }, } # Create the SearchGraph instance dynamically search_graph = SearchGraph(prompt=query_prompt, config=graph_config) try: # Run the graph to fetch results result = search_graph.run() # Convert the result to a JSON string with indentation for better readability output = json.dumps(result, indent=2) # Display each line of the JSON output st.text_area("Result", value=output, height=300) except Exception as e: st.error(f"An error occurred: {e}")