Spaces:

blazingbunny
/

Entity-Attribute-Extractor-scrapegraph

Runtime error

App Files Files Community

Entity-Attribute-Extractor-scrapegraph / app.py

blazingbunny

Update app.py

a82d9f3 verified 12 months ago

raw

history blame

2.37 kB

	import nest_asyncio
	import json
	import streamlit as st # This import is necessary to access secrets and for the web interface
	from selenium import webdriver
	from chromedriver_binary import add_chromedriver_to_path
	from scrapegraphai.graphs import SearchGraph
	import subprocess # Import the subprocess module

	# Apply necessary settings for asyncio compatibility in Jupyter/Streamlit environments
	nest_asyncio.apply()

	# Attempt to install Playwright browsers
	try:
	subprocess.run(["playwright", "install"], check=True)
	except subprocess.CalledProcessError as e:
	st.error("Failed to install Playwright: {}".format(e))

	# Apply necessary settings for asyncio compatibility in Jupyter/Streamlit environments
	nest_asyncio.apply()

	# Access your API keys securely
	OPENAI_API_KEY = st.secrets["OPENAI_API_KEY"]

	def setup_selenium():
	add_chromedriver_to_path()
	options = webdriver.ChromeOptions()
	options.add_argument("--headless")
	options.add_argument("--no-sandbox")
	options.add_argument("--disable-dev-shm-usage")
	driver = webdriver.Chrome(options=options)
	return driver

	def get_web_page_title(url):
	driver = setup_selenium()
	driver.get(url)
	title = driver.title
	driver.quit()
	return title

	st.title("Hybrid AI and Web Scraping Application")
	url = st.text_input("Enter the URL to scrape for title")

	if st.button("Scrape for Title"):
	title = get_web_page_title(url)
	st.write(f"The title of the page is: {title}")

	query_prompt = st.text_input("Enter your AI query", value="List me all the attributes of 'cannabis strain'.")

	if st.button("Fetch Data from AI"):
	# Define the configuration for the graph based on user input
	graph_config = {
	"llm": {
	"api_key": OPENAI_API_KEY,
	"model": "gpt-3.5-turbo",
	"temperature": 0,
	},
	}

	# Create the SearchGraph instance dynamically
	search_graph = SearchGraph(prompt=query_prompt, config=graph_config)

	try:
	# Run the graph to fetch results
	result = search_graph.run()
	# Convert the result to a JSON string with indentation for better readability
	output = json.dumps(result, indent=2)
	# Display each line of the JSON output
	st.text_area("Result", value=output, height=300)
	except Exception as e:
	st.error(f"An error occurred: {e}")