Spaces:

IamVicky111
/

MistralScrapy

Sleeping

App Files Files Community

MistralScrapy / app.py

IamVicky111

Update app.py

6cdcd01 verified 10 months ago

raw

history blame

3.12 kB

	import os
	from dotenv import load_dotenv
	from scrapegraphai.graphs import SmartScraperGraph
	from scrapegraphai.utils import prettify_exec_info
	from langchain_community.llms import HuggingFaceEndpoint
	from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
	import gradio as gr
	import subprocess
	import redis
	from langchain_community.vectorstores.redis import RedisVectorStore

	#Using Mistral Modal
	# Ensure Playwright installs required browsers and dependencies
	subprocess.run(["playwright", "install"])
	#subprocess.run(["playwright", "install-deps"])

	# Load environment variables
	load_dotenv()
	HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')

	# Initialize the model instances
	repo_id = "mistralai/Mistral-7B-Instruct-v0.2"
	llm_model_instance = HuggingFaceEndpoint(
	repo_id=repo_id, max_length=128, temperature=0.5, token=HUGGINGFACEHUB_API_TOKEN
	)
	#Calling Sentence Transformer
	embedder_model_instance = HuggingFaceInferenceAPIEmbeddings(
	api_key=HUGGINGFACEHUB_API_TOKEN, model_name="sentence-transformers/all-MiniLM-l6-v2"
	)


	r = redis.Redis(host="localhost", port=6379)
	vector_store = RedisVectorStore(redis=r)

	graph_config = {
	"llm": {"model_instance": llm_model_instance},
	"embeddings": {"model_instance": embedder_model_instance},
	"vector_store": {"model_instance": vector_store}
	}
	}
	#To Scrape the data and summarize it
	def scrape_and_summarize(prompt, source):
	smart_scraper_graph = SmartScraperGraph(
	prompt=prompt,
	source=source,
	config=graph_config
	)
	result = smart_scraper_graph.run()
	exec_info = smart_scraper_graph.get_execution_info()
	return result, prettify_exec_info(exec_info)

	# Gradio User interface
	with gr.Blocks() as demo:
	gr.Markdown("A project on WEB-SCRAPING using Mistral model")
	gr.Markdown("""Effortlessly extract and condense web content using cutting-edge AI models from the Hugging Face Hub—no coding required! Simply provide your desired prompt and source URL to begin. This no-code solution is inspired by the impressive library ScrapeGraphAI, and while it’s currently a basic demo, we encourage contributions to enhance its utility!""")
	#(https://github.com/VinciGit00/Scrapegraph-ai) is suggested by the tutor
	with gr.Row():
	with gr.Column():

	model_dropdown = gr.Textbox(label="Model", value="Mistral-7B-Instruct-v0.2, As all-MiniLM-l6-v2")
	prompt_input = gr.Textbox(label="Prompt", value="List me all the doctors name and their timing")
	source_input = gr.Textbox(label="Source URL", value="https://www.yelp.com/search?find_desc=dentist&find_loc=San+Francisco%2C+CA")
	scrape_button = gr.Button("Scrape the data")

	with gr.Column():
	result_output = gr.JSON(label="Result")
	exec_info_output = gr.Textbox(label="Output Info")

	scrape_button.click(
	scrape_and_summarize,
	inputs=[prompt_input, source_input],
	outputs=[result_output, exec_info_output]
	)

	# Launch the Gradio app
	if __name__ == "__main__":
	demo.launch()