Spaces:

blazingbunny
/

Entity-Attribute-Extractor-scrapegraph

Runtime error

File size: 2,373 Bytes

6e50eae
85878a9
d111a26
 
 
85878a9
a82d9f3
85878a9
 
6e50eae
 
a82d9f3
 
 
 
 
 
 
 
 
85878a9
 
6e50eae
d111a26

import nest_asyncio
import json
import streamlit as st  # This import is necessary to access secrets and for the web interface
from selenium import webdriver
from chromedriver_binary import add_chromedriver_to_path
from scrapegraphai.graphs import SearchGraph
import subprocess  # Import the subprocess module

# Apply necessary settings for asyncio compatibility in Jupyter/Streamlit environments
nest_asyncio.apply()

# Attempt to install Playwright browsers
try:
    subprocess.run(["playwright", "install"], check=True)
except subprocess.CalledProcessError as e:
    st.error("Failed to install Playwright: {}".format(e))
    
# Apply necessary settings for asyncio compatibility in Jupyter/Streamlit environments
nest_asyncio.apply()

# Access your API keys securely
OPENAI_API_KEY = st.secrets["OPENAI_API_KEY"]

def setup_selenium():
    add_chromedriver_to_path()
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    driver = webdriver.Chrome(options=options)
    return driver

def get_web_page_title(url):
    driver = setup_selenium()
    driver.get(url)
    title = driver.title
    driver.quit()
    return title

st.title("Hybrid AI and Web Scraping Application")
url = st.text_input("Enter the URL to scrape for title")

if st.button("Scrape for Title"):
    title = get_web_page_title(url)
    st.write(f"The title of the page is: {title}")

query_prompt = st.text_input("Enter your AI query", value="List me all the attributes of 'cannabis strain'.")

if st.button("Fetch Data from AI"):
    # Define the configuration for the graph based on user input
    graph_config = {
        "llm": {
            "api_key": OPENAI_API_KEY,
            "model": "gpt-3.5-turbo",
            "temperature": 0,
        },
    }

    # Create the SearchGraph instance dynamically
    search_graph = SearchGraph(prompt=query_prompt, config=graph_config)

    try:
        # Run the graph to fetch results
        result = search_graph.run()
        # Convert the result to a JSON string with indentation for better readability
        output = json.dumps(result, indent=2)
        # Display each line of the JSON output
        st.text_area("Result", value=output, height=300)
    except Exception as e:
        st.error(f"An error occurred: {e}")