Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -6,10 +6,8 @@ from langchain_community.llms import HuggingFaceEndpoint
|
|
6 |
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
|
7 |
import gradio as gr
|
8 |
import subprocess
|
9 |
-
import
|
10 |
-
from langchain_community.vectorstores.redis import RedisVectorStore
|
11 |
|
12 |
-
#Using Mistral Modal
|
13 |
# Ensure Playwright installs required browsers and dependencies
|
14 |
subprocess.run(["playwright", "install"])
|
15 |
#subprocess.run(["playwright", "install-deps"])
|
@@ -23,22 +21,16 @@ repo_id = "mistralai/Mistral-7B-Instruct-v0.2"
|
|
23 |
llm_model_instance = HuggingFaceEndpoint(
|
24 |
repo_id=repo_id, max_length=128, temperature=0.5, token=HUGGINGFACEHUB_API_TOKEN
|
25 |
)
|
26 |
-
|
27 |
embedder_model_instance = HuggingFaceInferenceAPIEmbeddings(
|
28 |
api_key=HUGGINGFACEHUB_API_TOKEN, model_name="sentence-transformers/all-MiniLM-l6-v2"
|
29 |
)
|
30 |
|
31 |
-
|
32 |
-
r = redis.Redis(host="localhost", port=6379)
|
33 |
-
vector_store = RedisVectorStore(redis=r)
|
34 |
-
|
35 |
graph_config = {
|
36 |
"llm": {"model_instance": llm_model_instance},
|
37 |
-
"embeddings": {"model_instance": embedder_model_instance}
|
38 |
-
"vector_store": {"model_instance": vector_store}
|
39 |
}
|
40 |
|
41 |
-
#To Scrape the data and summarize it
|
42 |
def scrape_and_summarize(prompt, source):
|
43 |
smart_scraper_graph = SmartScraperGraph(
|
44 |
prompt=prompt,
|
@@ -47,24 +39,45 @@ def scrape_and_summarize(prompt, source):
|
|
47 |
)
|
48 |
result = smart_scraper_graph.run()
|
49 |
exec_info = smart_scraper_graph.get_execution_info()
|
50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
-
|
|
|
|
|
53 |
with gr.Blocks() as demo:
|
54 |
-
gr.Markdown("
|
55 |
-
gr.Markdown("""
|
56 |
-
|
|
|
|
|
57 |
with gr.Row():
|
58 |
with gr.Column():
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
scrape_button = gr.Button("Scrape the data")
|
64 |
|
65 |
with gr.Column():
|
66 |
result_output = gr.JSON(label="Result")
|
67 |
-
exec_info_output = gr.Textbox(label="
|
68 |
|
69 |
scrape_button.click(
|
70 |
scrape_and_summarize,
|
|
|
6 |
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
|
7 |
import gradio as gr
|
8 |
import subprocess
|
9 |
+
import json
|
|
|
10 |
|
|
|
11 |
# Ensure Playwright installs required browsers and dependencies
|
12 |
subprocess.run(["playwright", "install"])
|
13 |
#subprocess.run(["playwright", "install-deps"])
|
|
|
21 |
llm_model_instance = HuggingFaceEndpoint(
|
22 |
repo_id=repo_id, max_length=128, temperature=0.5, token=HUGGINGFACEHUB_API_TOKEN
|
23 |
)
|
24 |
+
|
25 |
embedder_model_instance = HuggingFaceInferenceAPIEmbeddings(
|
26 |
api_key=HUGGINGFACEHUB_API_TOKEN, model_name="sentence-transformers/all-MiniLM-l6-v2"
|
27 |
)
|
28 |
|
|
|
|
|
|
|
|
|
29 |
graph_config = {
|
30 |
"llm": {"model_instance": llm_model_instance},
|
31 |
+
"embeddings": {"model_instance": embedder_model_instance}
|
|
|
32 |
}
|
33 |
|
|
|
34 |
def scrape_and_summarize(prompt, source):
|
35 |
smart_scraper_graph = SmartScraperGraph(
|
36 |
prompt=prompt,
|
|
|
39 |
)
|
40 |
result = smart_scraper_graph.run()
|
41 |
exec_info = smart_scraper_graph.get_execution_info()
|
42 |
+
|
43 |
+
# Ensure the result is properly formatted as JSON
|
44 |
+
if isinstance(result, dict):
|
45 |
+
result_json = result
|
46 |
+
else:
|
47 |
+
try:
|
48 |
+
result_json = json.loads(result)
|
49 |
+
except json.JSONDecodeError as e:
|
50 |
+
# Attempt to extract JSON from the result
|
51 |
+
start_index = result.find("[")
|
52 |
+
end_index = result.rfind("]")
|
53 |
+
if start_index != -1 and end_index != -1:
|
54 |
+
json_str = result[start_index:end_index+1]
|
55 |
+
try:
|
56 |
+
result_json = json.loads(json_str)
|
57 |
+
except json.JSONDecodeError as inner_e:
|
58 |
+
raise ValueError(f"Invalid JSON output: {result}") from inner_e
|
59 |
+
else:
|
60 |
+
raise ValueError(f"Invalid JSON output: {result}") from e
|
61 |
|
62 |
+
return result_json, prettify_exec_info(exec_info)
|
63 |
+
|
64 |
+
# Gradio interface
|
65 |
with gr.Blocks() as demo:
|
66 |
+
gr.Markdown("# Scrape websites, no-code version")
|
67 |
+
gr.Markdown("""Easily scrape and summarize web content using advanced AI models on the Hugging Face Hub without writing any code. Input your desired prompt and source URL to get started.
|
68 |
+
This is a no-code version of the excellent lib [ScrapeGraphAI](https://github.com/VinciGit00/Scrapegraph-ai).
|
69 |
+
It's a basic demo and a work in progress. Please contribute to it to make it more useful!""")
|
70 |
+
|
71 |
with gr.Row():
|
72 |
with gr.Column():
|
73 |
+
model_dropdown = gr.Textbox(label="Model", value="Mistral-7B-Instruct-v0.2")
|
74 |
+
prompt_input = gr.Textbox(label="Prompt", value="List me all the press releases with their headlines and urls.")
|
75 |
+
source_input = gr.Textbox(label="Source URL", value="https://www.whitehouse.gov/")
|
76 |
+
scrape_button = gr.Button("Scrape and Summarize")
|
|
|
77 |
|
78 |
with gr.Column():
|
79 |
result_output = gr.JSON(label="Result")
|
80 |
+
exec_info_output = gr.Textbox(label="Execution Info")
|
81 |
|
82 |
scrape_button.click(
|
83 |
scrape_and_summarize,
|