product-demo / app.py
alpcansoydas's picture
Update app.py
aa6b072 verified
import gradio as gr
import requests
from duckduckgo_search import DDGS
import itertools
import time
from langchain.prompts import PromptTemplate
from langchain_huggingface import HuggingFaceEndpoint
from langchain_core.output_parsers import JsonOutputParser
from langdetect import detect
# Fetch proxy list from GitHub
def get_proxies():
url = "https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt"
response = requests.get(url)
proxies = response.text.splitlines()
return proxies
# Proxy cycle for rotation
proxy_list = get_proxies()
proxy_cycle = itertools.cycle(proxy_list)
# Proxy-enabled DDGS
class ProxiedDDGS(DDGS):
def __init__(self, proxy):
super().__init__()
self.proxy = proxy
def _get(self, url, headers=None):
response = requests.get(
url, headers=headers, proxies={"http": self.proxy, "https": self.proxy}
)
response.raise_for_status()
return response
# Search function with retries
def search_with_retries(query, max_results=3, max_retries=5, backoff_factor=1):
retries = 0
while retries < max_retries:
try:
proxy = next(proxy_cycle)
searcher = ProxiedDDGS(proxy)
results = searcher.text(query, max_results=max_results)
return results, proxy
except Exception:
retries += 1
time.sleep(backoff_factor * retries)
raise RuntimeError(f"All retries failed for query: {query}")
# Initialize the LLM
llm = HuggingFaceEndpoint(
repo_id="mistralai/Mistral-7B-Instruct-v0.3",
task="text-generation",
max_new_tokens=128,
temperature=0.7,
do_sample=False,
)
# Prompt template for feature extraction
template_extract_features = '''
You are a product feature extractor bot. Your task is to determine features like Brand, Model, Type, RAM, Storage, etc., from the given product description and web search results.
Return features in JSON format with keys like Brand, Model, Type, RAM, Storage, and others.
Your response MUST only include a valid JSON object and nothing else.
Example:
{{
"Brand": "Apple",
"Model": "iPhone 14",
"Type": "Smartphone",
"RAM": "4GB",
"Storage": "128GB"
}}
Answer with JSON for the following:
Given product description and web search results: {TEXT} {SEARCH_RESULTS}
'''
json_output_parser = JsonOutputParser()
# Define the classify_text function
def extract_features(description):
global llm
start = time.time()
try:
lang = detect(description)
except:
lang = "en"
# Perform web search
try:
search_results, _ = search_with_retries(description, max_results=3)
search_text = "\n".join([res.get('snippet', '') for res in search_results])
except RuntimeError as e:
search_text = "No search results available."
# Format the prompt
prompt_extract = PromptTemplate(
template=template_extract_features,
input_variables=["TEXT", "SEARCH_RESULTS"]
)
formatted_prompt = prompt_extract.format(TEXT=description, SEARCH_RESULTS=search_text)
# LLM response
response = llm.invoke(formatted_prompt)
parsed_output = json_output_parser.parse(response)
end = time.time()
return lang, parsed_output, end - start
# Create the Gradio interface
def create_gradio_interface():
with gr.Blocks() as iface:
text_input = gr.Textbox(label="Item Description")
lang_output = gr.Textbox(label="Detected Language")
feature_output = gr.Textbox(label="Extracted Features (JSON)")
time_taken = gr.Textbox(label="Time Taken (seconds)")
submit_btn = gr.Button("Extract Features")
def on_submit(text):
lang, features, duration = extract_features(text)
return lang, features, f"{duration:.2f} seconds"
submit_btn.click(fn=on_submit, inputs=text_input, outputs=[lang_output, feature_output, time_taken])
iface.launch()
if __name__ == "__main__":
create_gradio_interface()