from smolagents import Tool from typing import Any, Optional class SimpleTool(Tool): name = "analyze_content" description = "Enhanced web content analyzer with multiple analysis modes." inputs = {"input_text":{"type":"string","description":"URL or direct text to analyze."},"mode":{"type":"string","nullable":True,"description":"Analysis mode ('analyze', 'summarize', 'sentiment', 'topics')."}} output_type = "string" def forward(self, input_text: str, mode: str = "analyze") -> str: """Enhanced web content analyzer with multiple analysis modes. Args: input_text: URL or direct text to analyze. mode: Analysis mode ('analyze', 'summarize', 'sentiment', 'topics'). Returns: str: JSON-formatted analysis results """ import requests from bs4 import BeautifulSoup import re from transformers import pipeline import json try: # Setup request headers headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'} # Process input if input_text.startswith(('http://', 'https://')): response = requests.get(input_text, headers=headers, timeout=10) soup = BeautifulSoup(response.text, 'html.parser') # Clean page content for tag in soup(['script', 'style', 'meta']): tag.decompose() title = soup.title.string if soup.title else "No title found" content = soup.get_text() else: title = "Text Analysis" content = input_text # Clean text clean_text = re.sub(r'\s+', ' ', content).strip() if len(clean_text) < 100: return json.dumps({ "status": "error", "message": "Content too short for analysis (minimum 100 characters)" }) # Initialize models summarizer = pipeline("summarization", model="facebook/bart-large-cnn") classifier = pipeline("text-classification", model="nlptown/bert-base-multilingual-uncased-sentiment") # Basic stats stats = { "title": title, "characters": len(clean_text), "words": len(clean_text.split()), "paragraphs": len([p for p in clean_text.split("\n") if p.strip()]), "reading_time": f"{len(clean_text.split()) // 200} minutes" } result = {"status": "success", "stats": stats} # Mode-specific processing if mode == "analyze": # Get summary summary = summarizer(clean_text[:1024], max_length=100, min_length=30)[0]['summary_text'] # Get overall sentiment sentiment = classifier(clean_text[:512])[0] score = int(sentiment['label'][0]) sentiment_text = ["Very Negative", "Negative", "Neutral", "Positive", "Very Positive"][score-1] result.update({ "summary": summary, "sentiment": { "overall": sentiment_text, "score": score, "confidence": f"{score/5*100:.1f}%" } }) elif mode == "sentiment": # Analyze paragraphs paragraphs = [p for p in clean_text.split("\n") if len(p.strip()) > 50] sentiments = [] for i, para in enumerate(paragraphs[:5]): sent = classifier(para[:512])[0] score = int(sent['label'][0]) sentiments.append({ "section": i + 1, "text": para[:100] + "...", "sentiment": ["Very Negative", "Negative", "Neutral", "Positive", "Very Positive"][score-1], "score": score }) result.update({ "sentiment_analysis": { "sections": sentiments, "total_sections": len(sentiments) } }) elif mode == "summarize": # Process in chunks chunks = [clean_text[i:i+1024] for i in range(0, min(len(clean_text), 3072), 1024)] summaries = [] for chunk in chunks: if len(chunk) > 100: summary = summarizer(chunk, max_length=100, min_length=30)[0]['summary_text'] summaries.append(summary) result.update({ "summaries": summaries, "chunks_analyzed": len(summaries) }) elif mode == "topics": # Basic topic categorization categories = { "Technology": r"tech|software|hardware|digital|computer|AI|data", "Business": r"business|market|finance|economy|industry", "Science": r"science|research|study|discovery", "Health": r"health|medical|medicine|wellness", "General": r"news|world|people|life" } topic_scores = {} for topic, pattern in categories.items(): matches = len(re.findall(pattern, clean_text.lower())) topic_scores[topic] = matches result.update({ "topic_analysis": { "detected_topics": topic_scores, "primary_topic": max(topic_scores.items(), key=lambda x: x[1])[0] } }) return json.dumps(result, indent=2) except requests.exceptions.RequestException as e: return json.dumps({ "status": "error", "message": f"Failed to fetch content: {str(e)}", "type": "request_error" }) except Exception as e: return json.dumps({ "status": "error", "message": f"Analysis failed: {str(e)}", "type": "general_error" })