Spaces:
Sleeping
Sleeping
File size: 4,039 Bytes
d1e081c 9aa4e56 d1e081c 9aa4e56 d1e081c 9aa4e56 d1e081c 9aa4e56 aa6b072 d1e081c 9aa4e56 d1e081c 9aa4e56 d1e081c 9aa4e56 d1e081c 9aa4e56 d1e081c 9aa4e56 d1e081c 9aa4e56 d1e081c 9aa4e56 d1e081c 9aa4e56 d1e081c 9aa4e56 d1e081c 9aa4e56 d1e081c 9aa4e56 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
import gradio as gr
import requests
from duckduckgo_search import DDGS
import itertools
import time
from langchain.prompts import PromptTemplate
from langchain_huggingface import HuggingFaceEndpoint
from langchain_core.output_parsers import JsonOutputParser
from langdetect import detect
# Fetch proxy list from GitHub
def get_proxies():
url = "https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt"
response = requests.get(url)
proxies = response.text.splitlines()
return proxies
# Proxy cycle for rotation
proxy_list = get_proxies()
proxy_cycle = itertools.cycle(proxy_list)
# Proxy-enabled DDGS
class ProxiedDDGS(DDGS):
def __init__(self, proxy):
super().__init__()
self.proxy = proxy
def _get(self, url, headers=None):
response = requests.get(
url, headers=headers, proxies={"http": self.proxy, "https": self.proxy}
)
response.raise_for_status()
return response
# Search function with retries
def search_with_retries(query, max_results=3, max_retries=5, backoff_factor=1):
retries = 0
while retries < max_retries:
try:
proxy = next(proxy_cycle)
searcher = ProxiedDDGS(proxy)
results = searcher.text(query, max_results=max_results)
return results, proxy
except Exception:
retries += 1
time.sleep(backoff_factor * retries)
raise RuntimeError(f"All retries failed for query: {query}")
# Initialize the LLM
llm = HuggingFaceEndpoint(
repo_id="mistralai/Mistral-7B-Instruct-v0.3",
task="text-generation",
max_new_tokens=128,
temperature=0.7,
do_sample=False,
)
# Prompt template for feature extraction
template_extract_features = '''
You are a product feature extractor bot. Your task is to determine features like Brand, Model, Type, RAM, Storage, etc., from the given product description and web search results.
Return features in JSON format with keys like Brand, Model, Type, RAM, Storage, and others.
Your response MUST only include a valid JSON object and nothing else.
Example:
{{
"Brand": "Apple",
"Model": "iPhone 14",
"Type": "Smartphone",
"RAM": "4GB",
"Storage": "128GB"
}}
Answer with JSON for the following:
Given product description and web search results: {TEXT} {SEARCH_RESULTS}
'''
json_output_parser = JsonOutputParser()
# Define the classify_text function
def extract_features(description):
global llm
start = time.time()
try:
lang = detect(description)
except:
lang = "en"
# Perform web search
try:
search_results, _ = search_with_retries(description, max_results=3)
search_text = "\n".join([res.get('snippet', '') for res in search_results])
except RuntimeError as e:
search_text = "No search results available."
# Format the prompt
prompt_extract = PromptTemplate(
template=template_extract_features,
input_variables=["TEXT", "SEARCH_RESULTS"]
)
formatted_prompt = prompt_extract.format(TEXT=description, SEARCH_RESULTS=search_text)
# LLM response
response = llm.invoke(formatted_prompt)
parsed_output = json_output_parser.parse(response)
end = time.time()
return lang, parsed_output, end - start
# Create the Gradio interface
def create_gradio_interface():
with gr.Blocks() as iface:
text_input = gr.Textbox(label="Item Description")
lang_output = gr.Textbox(label="Detected Language")
feature_output = gr.Textbox(label="Extracted Features (JSON)")
time_taken = gr.Textbox(label="Time Taken (seconds)")
submit_btn = gr.Button("Extract Features")
def on_submit(text):
lang, features, duration = extract_features(text)
return lang, features, f"{duration:.2f} seconds"
submit_btn.click(fn=on_submit, inputs=text_input, outputs=[lang_output, feature_output, time_taken])
iface.launch()
if __name__ == "__main__":
create_gradio_interface()
|