Spaces:

alpcansoydas
/

product-demo

Sleeping

App Files Files Community

product-demo / app.py

alpcansoydas

Update app.py

aa6b072 verified 7 months ago

raw

history blame contribute delete

4.04 kB

	import gradio as gr
	import requests
	from duckduckgo_search import DDGS
	import itertools
	import time
	from langchain.prompts import PromptTemplate
	from langchain_huggingface import HuggingFaceEndpoint
	from langchain_core.output_parsers import JsonOutputParser
	from langdetect import detect

	# Fetch proxy list from GitHub
	def get_proxies():
	url = "https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt"
	response = requests.get(url)
	proxies = response.text.splitlines()
	return proxies

	# Proxy cycle for rotation
	proxy_list = get_proxies()
	proxy_cycle = itertools.cycle(proxy_list)

	# Proxy-enabled DDGS
	class ProxiedDDGS(DDGS):
	def __init__(self, proxy):
	super().__init__()
	self.proxy = proxy

	def _get(self, url, headers=None):
	response = requests.get(
	url, headers=headers, proxies={"http": self.proxy, "https": self.proxy}
	)
	response.raise_for_status()
	return response

	# Search function with retries
	def search_with_retries(query, max_results=3, max_retries=5, backoff_factor=1):
	retries = 0
	while retries < max_retries:
	try:
	proxy = next(proxy_cycle)
	searcher = ProxiedDDGS(proxy)
	results = searcher.text(query, max_results=max_results)
	return results, proxy
	except Exception:
	retries += 1
	time.sleep(backoff_factor * retries)
	raise RuntimeError(f"All retries failed for query: {query}")

	# Initialize the LLM
	llm = HuggingFaceEndpoint(
	repo_id="mistralai/Mistral-7B-Instruct-v0.3",
	task="text-generation",
	max_new_tokens=128,
	temperature=0.7,
	do_sample=False,
	)

	# Prompt template for feature extraction
	template_extract_features = '''
	You are a product feature extractor bot. Your task is to determine features like Brand, Model, Type, RAM, Storage, etc., from the given product description and web search results.

	Return features in JSON format with keys like Brand, Model, Type, RAM, Storage, and others.
	Your response MUST only include a valid JSON object and nothing else.

	Example:
	{{
	"Brand": "Apple",
	"Model": "iPhone 14",
	"Type": "Smartphone",
	"RAM": "4GB",
	"Storage": "128GB"
	}}
	Answer with JSON for the following:
	Given product description and web search results: {TEXT} {SEARCH_RESULTS}
	'''

	json_output_parser = JsonOutputParser()

	# Define the classify_text function
	def extract_features(description):
	global llm
	start = time.time()

	try:
	lang = detect(description)
	except:
	lang = "en"

	# Perform web search
	try:
	search_results, _ = search_with_retries(description, max_results=3)
	search_text = "\n".join([res.get('snippet', '') for res in search_results])
	except RuntimeError as e:
	search_text = "No search results available."

	# Format the prompt
	prompt_extract = PromptTemplate(
	template=template_extract_features,
	input_variables=["TEXT", "SEARCH_RESULTS"]
	)
	formatted_prompt = prompt_extract.format(TEXT=description, SEARCH_RESULTS=search_text)

	# LLM response
	response = llm.invoke(formatted_prompt)
	parsed_output = json_output_parser.parse(response)
	end = time.time()

	return lang, parsed_output, end - start

	# Create the Gradio interface
	def create_gradio_interface():
	with gr.Blocks() as iface:
	text_input = gr.Textbox(label="Item Description")
	lang_output = gr.Textbox(label="Detected Language")
	feature_output = gr.Textbox(label="Extracted Features (JSON)")
	time_taken = gr.Textbox(label="Time Taken (seconds)")
	submit_btn = gr.Button("Extract Features")

	def on_submit(text):
	lang, features, duration = extract_features(text)
	return lang, features, f"{duration:.2f} seconds"

	submit_btn.click(fn=on_submit, inputs=text_input, outputs=[lang_output, feature_output, time_taken])

	iface.launch()

	if __name__ == "__main__":
	create_gradio_interface()