IamVicky111 commited on
Commit
4c71ef0
·
verified ·
1 Parent(s): b7ea448

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -22
app.py CHANGED
@@ -6,10 +6,8 @@ from langchain_community.llms import HuggingFaceEndpoint
6
  from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
7
  import gradio as gr
8
  import subprocess
9
- import redis
10
- from langchain_community.vectorstores.redis import RedisVectorStore
11
 
12
- #Using Mistral Modal
13
  # Ensure Playwright installs required browsers and dependencies
14
  subprocess.run(["playwright", "install"])
15
  #subprocess.run(["playwright", "install-deps"])
@@ -23,22 +21,16 @@ repo_id = "mistralai/Mistral-7B-Instruct-v0.2"
23
  llm_model_instance = HuggingFaceEndpoint(
24
  repo_id=repo_id, max_length=128, temperature=0.5, token=HUGGINGFACEHUB_API_TOKEN
25
  )
26
- #Calling Sentence Transformer
27
  embedder_model_instance = HuggingFaceInferenceAPIEmbeddings(
28
  api_key=HUGGINGFACEHUB_API_TOKEN, model_name="sentence-transformers/all-MiniLM-l6-v2"
29
  )
30
 
31
-
32
- r = redis.Redis(host="localhost", port=6379)
33
- vector_store = RedisVectorStore(redis=r)
34
-
35
  graph_config = {
36
  "llm": {"model_instance": llm_model_instance},
37
- "embeddings": {"model_instance": embedder_model_instance},
38
- "vector_store": {"model_instance": vector_store}
39
  }
40
 
41
- #To Scrape the data and summarize it
42
  def scrape_and_summarize(prompt, source):
43
  smart_scraper_graph = SmartScraperGraph(
44
  prompt=prompt,
@@ -47,24 +39,45 @@ def scrape_and_summarize(prompt, source):
47
  )
48
  result = smart_scraper_graph.run()
49
  exec_info = smart_scraper_graph.get_execution_info()
50
- return result, prettify_exec_info(exec_info)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
- # Gradio User interface
 
 
53
  with gr.Blocks() as demo:
54
- gr.Markdown("A project on WEB-SCRAPING using Mistral model")
55
- gr.Markdown("""Effortlessly extract and condense web content using cutting-edge AI models from the Hugging Face Hub—no coding required! Simply provide your desired prompt and source URL to begin. This no-code solution is inspired by the impressive library ScrapeGraphAI, and while it’s currently a basic demo, we encourage contributions to enhance its utility!""")
56
- #(https://github.com/VinciGit00/Scrapegraph-ai) is suggested by the tutor
 
 
57
  with gr.Row():
58
  with gr.Column():
59
-
60
- model_dropdown = gr.Textbox(label="Model", value="Mistral-7B-Instruct-v0.2, As all-MiniLM-l6-v2")
61
- prompt_input = gr.Textbox(label="Prompt", value="List me all the doctors name and their timing")
62
- source_input = gr.Textbox(label="Source URL", value="https://www.yelp.com/search?find_desc=dentist&find_loc=San+Francisco%2C+CA")
63
- scrape_button = gr.Button("Scrape the data")
64
 
65
  with gr.Column():
66
  result_output = gr.JSON(label="Result")
67
- exec_info_output = gr.Textbox(label="Output Info")
68
 
69
  scrape_button.click(
70
  scrape_and_summarize,
 
6
  from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
7
  import gradio as gr
8
  import subprocess
9
+ import json
 
10
 
 
11
  # Ensure Playwright installs required browsers and dependencies
12
  subprocess.run(["playwright", "install"])
13
  #subprocess.run(["playwright", "install-deps"])
 
21
  llm_model_instance = HuggingFaceEndpoint(
22
  repo_id=repo_id, max_length=128, temperature=0.5, token=HUGGINGFACEHUB_API_TOKEN
23
  )
24
+
25
  embedder_model_instance = HuggingFaceInferenceAPIEmbeddings(
26
  api_key=HUGGINGFACEHUB_API_TOKEN, model_name="sentence-transformers/all-MiniLM-l6-v2"
27
  )
28
 
 
 
 
 
29
  graph_config = {
30
  "llm": {"model_instance": llm_model_instance},
31
+ "embeddings": {"model_instance": embedder_model_instance}
 
32
  }
33
 
 
34
  def scrape_and_summarize(prompt, source):
35
  smart_scraper_graph = SmartScraperGraph(
36
  prompt=prompt,
 
39
  )
40
  result = smart_scraper_graph.run()
41
  exec_info = smart_scraper_graph.get_execution_info()
42
+
43
+ # Ensure the result is properly formatted as JSON
44
+ if isinstance(result, dict):
45
+ result_json = result
46
+ else:
47
+ try:
48
+ result_json = json.loads(result)
49
+ except json.JSONDecodeError as e:
50
+ # Attempt to extract JSON from the result
51
+ start_index = result.find("[")
52
+ end_index = result.rfind("]")
53
+ if start_index != -1 and end_index != -1:
54
+ json_str = result[start_index:end_index+1]
55
+ try:
56
+ result_json = json.loads(json_str)
57
+ except json.JSONDecodeError as inner_e:
58
+ raise ValueError(f"Invalid JSON output: {result}") from inner_e
59
+ else:
60
+ raise ValueError(f"Invalid JSON output: {result}") from e
61
 
62
+ return result_json, prettify_exec_info(exec_info)
63
+
64
+ # Gradio interface
65
  with gr.Blocks() as demo:
66
+ gr.Markdown("# Scrape websites, no-code version")
67
+ gr.Markdown("""Easily scrape and summarize web content using advanced AI models on the Hugging Face Hub without writing any code. Input your desired prompt and source URL to get started.
68
+ This is a no-code version of the excellent lib [ScrapeGraphAI](https://github.com/VinciGit00/Scrapegraph-ai).
69
+ It's a basic demo and a work in progress. Please contribute to it to make it more useful!""")
70
+
71
  with gr.Row():
72
  with gr.Column():
73
+ model_dropdown = gr.Textbox(label="Model", value="Mistral-7B-Instruct-v0.2")
74
+ prompt_input = gr.Textbox(label="Prompt", value="List me all the press releases with their headlines and urls.")
75
+ source_input = gr.Textbox(label="Source URL", value="https://www.whitehouse.gov/")
76
+ scrape_button = gr.Button("Scrape and Summarize")
 
77
 
78
  with gr.Column():
79
  result_output = gr.JSON(label="Result")
80
+ exec_info_output = gr.Textbox(label="Execution Info")
81
 
82
  scrape_button.click(
83
  scrape_and_summarize,