Spaces:

MHamdan
/

SmartWebAnalyzerPlus

Sleeping

File size: 7,504 Bytes

# Gradio_UI.py
import gradio as gr
from smolagents import CodeAgent
from typing import Optional, Dict, List, Tuple
import re
import logging
from functools import lru_cache
import json
from datetime import datetime
import time

logger = logging.getLogger(__name__)

class GradioUI:
    def __init__(self, agent: CodeAgent):
        self.agent = agent
        self.cache = {}
        self.rate_limit = {}

    def validate_url(self, url: str) -> bool:
        """Validate URL format."""
        url_pattern = re.compile(
            r'^https?://'
            r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|'
            r'localhost|'
            r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})'
            r'(?::\d+)?'
            r'(?:/?|[/?]\S+)$', re.IGNORECASE)
        return bool(url_pattern.match(url))

    def check_rate_limit(self, url: str) -> bool:
        """Check if URL has been requested too frequently."""
        current_time = time.time()
        if url in self.rate_limit:
            last_request = self.rate_limit[url]
            if current_time - last_request < 60:  # 1 minute cooldown
                return False
        self.rate_limit[url] = current_time
        return True

    @lru_cache(maxsize=100)
    def get_cached_analysis(self, url: str, analysis_types: tuple) -> Optional[Dict]:
        """Get cached analysis results if available."""
        cache_key = f"{url}_{','.join(analysis_types)}"
        return self.cache.get(cache_key)

    def store_cache(self, url: str, analysis_types: List[str], results: Dict):
        """Store analysis results in cache."""
        cache_key = f"{url}_{','.join(analysis_types)}"
        self.cache[cache_key] = {
            'results': results,
            'timestamp': datetime.now().isoformat()
        }

    def process_query(self, url: str, analysis_types: List[str]) -> Tuple[str, str, str, str]:
        """Process the analysis query and return results for all output tabs."""
        try:
            # Input validation
            if not url:
                raise ValueError("Please enter a URL")
            if not self.validate_url(url):
                raise ValueError("Invalid URL format")
            if not self.check_rate_limit(url):
                raise ValueError("Please wait before analyzing this URL again")

            # Check cache
            cached = self.get_cached_analysis(url, tuple(analysis_types))
            if cached:
                logger.info(f"Returning cached results for {url}")
                results = cached['results']
                return (
                    results.get('clean_text', ''),
                    results.get('summary', ''),
                    results.get('sentiment', ''),
                    results.get('topics', '')
                )

            # Create analysis prompt
            prompt = self.create_analysis_prompt(url, analysis_types)
            
            # Run analysis
            response = self.agent.run(prompt)
            
            # Parse response
            try:
                results = json.loads(response) if isinstance(response, str) else response
            except json.JSONDecodeError:
                results = {
                    'clean_text': response,
                    'summary': '',
                    'sentiment': '',
                    'topics': ''
                }

            # Cache results
            self.store_cache(url, analysis_types, results)

            return (
                results.get('clean_text', ''),
                results.get('summary', ''),
                results.get('sentiment', ''),
                results.get('topics', '')
            )

        except Exception as e:
            logger.error(f"Error processing query: {str(e)}")
            error_msg = f"⚠️ Error: {str(e)}"
            return error_msg, error_msg, error_msg, error_msg

    def create_analysis_prompt(self, url: str, types: List[str]) -> str:
        """Create the analysis prompt based on selected types."""
        if not types:
            types = ["summarize"]  # Default analysis type
        type_str = ", ".join(types)
        return f"Analyze the content at {url} and provide the following analysis: {type_str}. Return results in JSON format with keys: clean_text, summary, sentiment, topics."

    def launch(self, 
               server_name: Optional[str] = None,
               server_port: Optional[int] = None,
               share: bool = False):
        """Launch the Gradio interface."""
        
        # Create the interface
        with gr.Blocks(title="Smart Web Analyzer Plus", theme=gr.themes.Soft()) as demo:
            # Header
            gr.Markdown("# 🌐 Smart Web Analyzer Plus")
            gr.Markdown("Analyze web content using AI to extract summaries, determine sentiment, and identify topics.")
            
            # Input section
            with gr.Row():
                with gr.Column(scale=3):
                    url_input = gr.Textbox(
                        label="Enter URL",
                        placeholder="https://example.com",
                        show_label=True
                    )
                with gr.Column(scale=2):
                    analysis_types = gr.CheckboxGroup(
                        choices=["summarize", "sentiment", "topics"],
                        label="Analysis Types",
                        value=["summarize"],
                        show_label=True
                    )
                with gr.Column(scale=1):
                    analyze_btn = gr.Button(
                        "Analyze",
                        variant="primary"
                    )
            
            # Status indicator
            status = gr.Markdown(visible=False)
            
            # Output tabs
            with gr.Tabs() as tabs:
                with gr.TabItem("📄 Clean Text"):
                    clean_text_output = gr.Markdown()
                with gr.TabItem("📝 Summary"):
                    summary_output = gr.Markdown()
                with gr.TabItem("🎭 Sentiment"):
                    sentiment_output = gr.Markdown()
                with gr.TabItem("📊 Topics"):
                    topics_output = gr.Markdown()
            
            # Examples
            gr.Examples(
                examples=[
                    ["https://www.bbc.com/news/technology-67881954", ["summarize", "sentiment"]],
                    ["https://arxiv.org/html/2312.17296v1", ["topics", "summarize"]]
                ],
                inputs=[url_input, analysis_types],
                label="Try these examples"
            )
            
            # Event handlers
            def on_analyze_click():
                return gr.update(value="⏳ Analysis in progress...", visible=True)

            def on_analyze_complete():
                return gr.update(value="", visible=False)

            analyze_btn.click(
                fn=on_analyze_click,
                outputs=[status],
                queue=False
            ).then(
                fn=self.process_query,
                inputs=[url_input, analysis_types],
                outputs=[clean_text_output, summary_output, sentiment_output, topics_output]
            ).then(
                fn=on_analyze_complete,
                outputs=[status]
            )
        
        # Launch the interface
        demo.launch(
            server_name=server_name,
            server_port=server_port,
            share=share
        )