import os from dotenv import load_dotenv from scrapegraphai.graphs import SmartScraperGraph from scrapegraphai.utils import prettify_exec_info from langchain_community.llms import HuggingFaceEndpoint from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings import gradio as gr import subprocess import json # Ensure Playwright installs required browsers and dependencies subprocess.run(["playwright", "install"]) #subprocess.run(["playwright", "install-deps"]) # Load environment variables load_dotenv() HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN') # Initialize the model instances repo_id = "mistralai/Mistral-7B-Instruct-v0.2" llm_model_instance = HuggingFaceEndpoint( repo_id=repo_id, max_length=128, temperature=0.5, token=HUGGINGFACEHUB_API_TOKEN ) embedder_model_instance = HuggingFaceInferenceAPIEmbeddings( api_key=HUGGINGFACEHUB_API_TOKEN, model_name="sentence-transformers/all-MiniLM-l6-v2" ) graph_config = { "llm": {"model_instance": llm_model_instance}, "embeddings": {"model_instance": embedder_model_instance} } def scrape_and_summarize(prompt, source): smart_scraper_graph = SmartScraperGraph( prompt=prompt, source=source, config=graph_config ) result = smart_scraper_graph.run() exec_info = smart_scraper_graph.get_execution_info() # Ensure the result is properly formatted as JSON if isinstance(result, dict): result_json = result else: try: result_json = json.loads(result) except json.JSONDecodeError as e: # Attempt to extract JSON from the result start_index = result.find("[") end_index = result.rfind("]") if start_index != -1 and end_index != -1: json_str = result[start_index:end_index+1] try: result_json = json.loads(json_str) except json.JSONDecodeError as inner_e: raise ValueError(f"Invalid JSON output: {result}") from inner_e else: raise ValueError(f"Invalid JSON output: {result}") from e return result_json, prettify_exec_info(exec_info) # Gradio interface with gr.Blocks() as demo: gr.Markdown("