import os from dotenv import load_dotenv from scrapegraphai.graphs import SmartScraperGraph from scrapegraphai.utils import prettify_exec_info from langchain_community.llms import HuggingFaceEndpoint from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings import gradio as gr import subprocess import redis from langchain_community.vectorstores.redis import RedisVectorStore #Using Mistral Modal # Ensure Playwright installs required browsers and dependencies subprocess.run(["playwright", "install"]) #subprocess.run(["playwright", "install-deps"]) # Load environment variables load_dotenv() HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN') # Initialize the model instances repo_id = "mistralai/Mistral-7B-Instruct-v0.2" llm_model_instance = HuggingFaceEndpoint( repo_id=repo_id, max_length=128, temperature=0.5, token=HUGGINGFACEHUB_API_TOKEN ) #Calling Sentence Transformer embedder_model_instance = HuggingFaceInferenceAPIEmbeddings( api_key=HUGGINGFACEHUB_API_TOKEN, model_name="sentence-transformers/all-MiniLM-l6-v2" ) r = redis.Redis(host="localhost", port=6379) vector_store = RedisVectorStore(redis=r) graph_config = { "llm": {"model_instance": llm_model_instance}, "embeddings": {"model_instance": embedder_model_instance}, "vector_store": {"model_instance": vector_store} } } #To Scrape the data and summarize it def scrape_and_summarize(prompt, source): smart_scraper_graph = SmartScraperGraph( prompt=prompt, source=source, config=graph_config ) result = smart_scraper_graph.run() exec_info = smart_scraper_graph.get_execution_info() return result, prettify_exec_info(exec_info) # Gradio User interface with gr.Blocks() as demo: gr.Markdown("A project on WEB-SCRAPING using Mistral model") gr.Markdown("""Effortlessly extract and condense web content using cutting-edge AI models from the Hugging Face Hub—no coding required! Simply provide your desired prompt and source URL to begin. This no-code solution is inspired by the impressive library ScrapeGraphAI, and while it’s currently a basic demo, we encourage contributions to enhance its utility!""") #(https://github.com/VinciGit00/Scrapegraph-ai) is suggested by the tutor with gr.Row(): with gr.Column(): model_dropdown = gr.Textbox(label="Model", value="Mistral-7B-Instruct-v0.2, As all-MiniLM-l6-v2") prompt_input = gr.Textbox(label="Prompt", value="List me all the doctors name and their timing") source_input = gr.Textbox(label="Source URL", value="https://www.yelp.com/search?find_desc=dentist&find_loc=San+Francisco%2C+CA") scrape_button = gr.Button("Scrape the data") with gr.Column(): result_output = gr.JSON(label="Result") exec_info_output = gr.Textbox(label="Output Info") scrape_button.click( scrape_and_summarize, inputs=[prompt_input, source_input], outputs=[result_output, exec_info_output] ) # Launch the Gradio app if __name__ == "__main__": demo.launch()