Spaces:

blazingbunny
/

Entity-Attribute-Extractor-scrapegraph

Runtime error

App Files Files Community

blazingbunny commited on May 10, 2024

Commit

d111a26

verified ·

1 Parent(s): bb352cf

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -26

app.py CHANGED Viewed

@@ -1,7 +1,9 @@
 import nest_asyncio
 import json
 from scrapegraphai.graphs import SearchGraph
-import streamlit as st  # This import is necessary to access secrets in Streamlit
 # Apply necessary settings for asyncio compatibility in Jupyter/Streamlit environments
 nest_asyncio.apply()
@@ -9,28 +11,51 @@ nest_asyncio.apply()
 # Access your API keys securely
 OPENAI_API_KEY = st.secrets["OPENAI_API_KEY"]
-# Define the configuration for the graph
-graph_config = {
-    "llm": {
-        "api_key": OPENAI_API_KEY,  # Use the secure variable to pass the actual API key
-        "model": "gpt-3.5-turbo",
-        "temperature": 0,
-    },
-}
-# Create the SearchGraph instance with a prompt
-search_graph = SearchGraph(
-    prompt="List me all the attributes of 'cannabis strain'.",
-    config=graph_config
-)
-try:
-    # Run the graph to fetch results
-    result = search_graph.run()
-    # Convert the result to a JSON string with indentation for better readability
-    output = json.dumps(result, indent=2)
-    # Print each line of the JSON output
-    for line in output.split("\n"):
-        print(line)
-except Exception as e:
-    print(f"An error occurred: {e}")

 import nest_asyncio
 import json
+import streamlit as st  # This import is necessary to access secrets and for the web interface
+from selenium import webdriver
+from chromedriver_binary import add_chromedriver_to_path
 from scrapegraphai.graphs import SearchGraph
 # Apply necessary settings for asyncio compatibility in Jupyter/Streamlit environments
 nest_asyncio.apply()
 # Access your API keys securely
 OPENAI_API_KEY = st.secrets["OPENAI_API_KEY"]
+# Selenium setup function
+def setup_selenium():
+    add_chromedriver_to_path()
+    options = webdriver.ChromeOptions()
+    options.add_argument("--headless")
+    options.add_argument("--no-sandbox")
+    options.add_argument("--disable-dev-shm-usage")
+    driver = webdriver.Chrome(options=options)
+    return driver
+def get_web_page_title(url):
+    driver = setup_selenium()
+    driver.get(url)
+    title = driver.title
+    driver.quit()
+    return title
+st.title("Hybrid AI and Web Scraping Application")
+url = st.text_input("Enter the URL to scrape for title")
+if st.button("Scrape for Title"):
+    title = get_web_page_title(url)
+    st.write(f"The title of the page is: {title}")
+query_prompt = st.text_input("Enter your AI query", value="List me all the attributes of 'cannabis strain'.")
+if st.button("Fetch Data from AI"):
+    # Define the configuration for the graph based on user input
+    graph_config = {
+        "llm": {
+            "api_key": OPENAI_API_KEY,
+            "model": "gpt-3.5-turbo",
+            "temperature": 0,
+        },
+    }
+    # Create the SearchGraph instance dynamically
+    search_graph = SearchGraph(prompt=query_prompt, config=graph_config)
+    try:
+        # Run the graph to fetch results
+        result = search_graph.run()
+        # Convert the result to a JSON string with indentation for better readability
+        output = json.dumps(result, indent=2)
+        # Display each line of the JSON output
+        st.text_area("Result", value=output, height=300)
+    except Exception as e:
+        st.error(f"An error occurred: {e}")