Spaces:
Sleeping
Sleeping
# Gradio_UI.py | |
import gradio as gr | |
from smolagents import CodeAgent | |
from typing import Optional, Dict, List, Tuple | |
import re | |
import logging | |
from functools import lru_cache | |
import json | |
from datetime import datetime | |
import time | |
logger = logging.getLogger(__name__) | |
class GradioUI: | |
def __init__(self, agent: CodeAgent): | |
self.agent = agent | |
self.cache = {} | |
self.rate_limit = {} | |
def validate_url(self, url: str) -> bool: | |
"""Validate URL format.""" | |
url_pattern = re.compile( | |
r'^https?://' | |
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|' | |
r'localhost|' | |
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' | |
r'(?::\d+)?' | |
r'(?:/?|[/?]\S+)$', re.IGNORECASE) | |
return bool(url_pattern.match(url)) | |
def check_rate_limit(self, url: str) -> bool: | |
"""Check if URL has been requested too frequently.""" | |
current_time = time.time() | |
if url in self.rate_limit: | |
last_request = self.rate_limit[url] | |
if current_time - last_request < 60: # 1 minute cooldown | |
return False | |
self.rate_limit[url] = current_time | |
return True | |
def get_cached_analysis(self, url: str, analysis_types: tuple) -> Optional[Dict]: | |
"""Get cached analysis results if available.""" | |
cache_key = f"{url}_{','.join(analysis_types)}" | |
return self.cache.get(cache_key) | |
def store_cache(self, url: str, analysis_types: List[str], results: Dict): | |
"""Store analysis results in cache.""" | |
cache_key = f"{url}_{','.join(analysis_types)}" | |
self.cache[cache_key] = { | |
'results': results, | |
'timestamp': datetime.now().isoformat() | |
} | |
def process_query(self, url: str, analysis_types: List[str]) -> Tuple[str, str, str, str]: | |
"""Process the analysis query and return results for all output tabs.""" | |
try: | |
# Input validation | |
if not url: | |
raise ValueError("Please enter a URL") | |
if not self.validate_url(url): | |
raise ValueError("Invalid URL format") | |
if not self.check_rate_limit(url): | |
raise ValueError("Please wait before analyzing this URL again") | |
# Check cache | |
cached = self.get_cached_analysis(url, tuple(analysis_types)) | |
if cached: | |
logger.info(f"Returning cached results for {url}") | |
results = cached['results'] | |
return ( | |
results.get('clean_text', ''), | |
results.get('summary', ''), | |
results.get('sentiment', ''), | |
results.get('topics', '') | |
) | |
# Create analysis prompt | |
prompt = self.create_analysis_prompt(url, analysis_types) | |
# Run analysis | |
response = self.agent.run(prompt) | |
# Parse response | |
try: | |
results = json.loads(response) if isinstance(response, str) else response | |
except json.JSONDecodeError: | |
results = { | |
'clean_text': response, | |
'summary': '', | |
'sentiment': '', | |
'topics': '' | |
} | |
# Cache results | |
self.store_cache(url, analysis_types, results) | |
return ( | |
results.get('clean_text', ''), | |
results.get('summary', ''), | |
results.get('sentiment', ''), | |
results.get('topics', '') | |
) | |
except Exception as e: | |
logger.error(f"Error processing query: {str(e)}") | |
error_msg = f"β οΈ Error: {str(e)}" | |
return error_msg, error_msg, error_msg, error_msg | |
def create_analysis_prompt(self, url: str, types: List[str]) -> str: | |
"""Create the analysis prompt based on selected types.""" | |
if not types: | |
types = ["summarize"] # Default analysis type | |
type_str = ", ".join(types) | |
return f"Analyze the content at {url} and provide the following analysis: {type_str}. Return results in JSON format with keys: clean_text, summary, sentiment, topics." | |
def launch(self, | |
server_name: Optional[str] = None, | |
server_port: Optional[int] = None, | |
share: bool = False): | |
"""Launch the Gradio interface.""" | |
# Create the interface | |
with gr.Blocks(title="Smart Web Analyzer Plus", theme=gr.themes.Soft()) as demo: | |
# Header | |
gr.Markdown("# π Smart Web Analyzer Plus") | |
gr.Markdown("Analyze web content using AI to extract summaries, determine sentiment, and identify topics.") | |
# Input section | |
with gr.Row(): | |
with gr.Column(scale=3): | |
url_input = gr.Textbox( | |
label="Enter URL", | |
placeholder="https://example.com", | |
show_label=True | |
) | |
with gr.Column(scale=2): | |
analysis_types = gr.CheckboxGroup( | |
choices=["summarize", "sentiment", "topics"], | |
label="Analysis Types", | |
value=["summarize"], | |
show_label=True | |
) | |
with gr.Column(scale=1): | |
analyze_btn = gr.Button( | |
"Analyze", | |
variant="primary" | |
) | |
# Status indicator | |
status = gr.Markdown(visible=False) | |
# Output tabs | |
with gr.Tabs() as tabs: | |
with gr.TabItem("π Clean Text"): | |
clean_text_output = gr.Markdown() | |
with gr.TabItem("π Summary"): | |
summary_output = gr.Markdown() | |
with gr.TabItem("π Sentiment"): | |
sentiment_output = gr.Markdown() | |
with gr.TabItem("π Topics"): | |
topics_output = gr.Markdown() | |
# Examples | |
gr.Examples( | |
examples=[ | |
["https://www.bbc.com/news/technology-67881954", ["summarize", "sentiment"]], | |
["https://arxiv.org/html/2312.17296v1", ["topics", "summarize"]] | |
], | |
inputs=[url_input, analysis_types], | |
label="Try these examples" | |
) | |
# Event handlers | |
def on_analyze_click(): | |
return gr.update(value="β³ Analysis in progress...", visible=True) | |
def on_analyze_complete(): | |
return gr.update(value="", visible=False) | |
analyze_btn.click( | |
fn=on_analyze_click, | |
outputs=[status], | |
queue=False | |
).then( | |
fn=self.process_query, | |
inputs=[url_input, analysis_types], | |
outputs=[clean_text_output, summary_output, sentiment_output, topics_output] | |
).then( | |
fn=on_analyze_complete, | |
outputs=[status] | |
) | |
# Launch the interface | |
demo.launch( | |
server_name=server_name, | |
server_port=server_port, | |
share=share | |
) |