MHamdan commited on
Commit
5952adf
·
1 Parent(s): ec149b5

Initial commit with full functionality extend app req tools

Browse files
Files changed (5) hide show
  1. Gradio_UI.py +129 -29
  2. app.py +52 -27
  3. prompts.yaml +27 -19
  4. requirements.txt +20 -6
  5. tools/final_answer.py +61 -8
Gradio_UI.py CHANGED
@@ -1,29 +1,132 @@
1
  # Gradio_UI.py
2
  import gradio as gr
3
  from smolagents import CodeAgent
4
- from typing import Optional
 
 
 
 
 
 
 
 
5
 
6
  class GradioUI:
7
  def __init__(self, agent: CodeAgent):
8
  self.agent = agent
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- def process_query(self, query: str) -> str:
 
 
 
 
 
 
 
 
 
11
  try:
12
- response = self.agent.run(query)
13
- return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  except Exception as e:
15
- return f"Error processing query: {str(e)}"
 
 
 
 
 
 
 
 
 
16
 
17
  def launch(self,
18
  server_name: Optional[str] = None,
19
  server_port: Optional[int] = None,
20
  share: bool = False):
 
21
 
22
  # Create the interface
23
- with gr.Blocks(title="Smart Web Analyzer Plus") as demo:
 
24
  gr.Markdown("# 🌐 Smart Web Analyzer Plus")
25
  gr.Markdown("Analyze web content using AI to extract summaries, determine sentiment, and identify topics.")
26
 
 
27
  with gr.Row():
28
  with gr.Column(scale=3):
29
  url_input = gr.Textbox(
@@ -44,20 +147,20 @@ class GradioUI:
44
  variant="primary"
45
  )
46
 
47
- # Output display
 
 
 
48
  with gr.Tabs() as tabs:
49
- with gr.Tab("📄 Clean Text"):
50
  clean_text_output = gr.Markdown()
51
- with gr.Tab("📝 Summary"):
52
  summary_output = gr.Markdown()
53
- with gr.Tab("🎭 Sentiment"):
54
  sentiment_output = gr.Markdown()
55
- with gr.Tab("📊 Topics"):
56
  topics_output = gr.Markdown()
57
 
58
- # Loading indicator
59
- status = gr.Markdown(visible=False)
60
-
61
  # Examples
62
  gr.Examples(
63
  examples=[
@@ -68,26 +171,23 @@ class GradioUI:
68
  label="Try these examples"
69
  )
70
 
71
- def create_analysis_prompt(url: str, types: list) -> str:
72
- type_str = ", ".join(types)
73
- return f"Analyze the content at {url} and provide {type_str} analysis."
74
-
75
- def on_analyze_start():
76
  return gr.update(value="⏳ Analysis in progress...", visible=True)
77
-
78
- def on_analyze_end():
79
  return gr.update(value="", visible=False)
80
-
81
- # Event handlers
82
  analyze_btn.click(
83
- fn=on_analyze_start,
84
- outputs=[status]
 
85
  ).then(
86
- fn=lambda url, types: self.process_query(create_analysis_prompt(url, types)),
87
  inputs=[url_input, analysis_types],
88
- outputs=[clean_text_output] # The agent will format the output appropriately
89
  ).then(
90
- fn=on_analyze_end,
91
  outputs=[status]
92
  )
93
 
@@ -96,4 +196,4 @@ class GradioUI:
96
  server_name=server_name,
97
  server_port=server_port,
98
  share=share
99
- )
 
1
  # Gradio_UI.py
2
  import gradio as gr
3
  from smolagents import CodeAgent
4
+ from typing import Optional, Dict, List, Tuple
5
+ import re
6
+ import logging
7
+ from functools import lru_cache
8
+ import json
9
+ from datetime import datetime
10
+ import time
11
+
12
+ logger = logging.getLogger(__name__)
13
 
14
  class GradioUI:
15
  def __init__(self, agent: CodeAgent):
16
  self.agent = agent
17
+ self.cache = {}
18
+ self.rate_limit = {}
19
+
20
+ def validate_url(self, url: str) -> bool:
21
+ """Validate URL format."""
22
+ url_pattern = re.compile(
23
+ r'^https?://'
24
+ r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|'
25
+ r'localhost|'
26
+ r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})'
27
+ r'(?::\d+)?'
28
+ r'(?:/?|[/?]\S+)$', re.IGNORECASE)
29
+ return bool(url_pattern.match(url))
30
+
31
+ def check_rate_limit(self, url: str) -> bool:
32
+ """Check if URL has been requested too frequently."""
33
+ current_time = time.time()
34
+ if url in self.rate_limit:
35
+ last_request = self.rate_limit[url]
36
+ if current_time - last_request < 60: # 1 minute cooldown
37
+ return False
38
+ self.rate_limit[url] = current_time
39
+ return True
40
+
41
+ @lru_cache(maxsize=100)
42
+ def get_cached_analysis(self, url: str, analysis_types: tuple) -> Optional[Dict]:
43
+ """Get cached analysis results if available."""
44
+ cache_key = f"{url}_{','.join(analysis_types)}"
45
+ return self.cache.get(cache_key)
46
 
47
+ def store_cache(self, url: str, analysis_types: List[str], results: Dict):
48
+ """Store analysis results in cache."""
49
+ cache_key = f"{url}_{','.join(analysis_types)}"
50
+ self.cache[cache_key] = {
51
+ 'results': results,
52
+ 'timestamp': datetime.now().isoformat()
53
+ }
54
+
55
+ def process_query(self, url: str, analysis_types: List[str]) -> Tuple[str, str, str, str]:
56
+ """Process the analysis query and return results for all output tabs."""
57
  try:
58
+ # Input validation
59
+ if not url:
60
+ raise ValueError("Please enter a URL")
61
+ if not self.validate_url(url):
62
+ raise ValueError("Invalid URL format")
63
+ if not self.check_rate_limit(url):
64
+ raise ValueError("Please wait before analyzing this URL again")
65
+
66
+ # Check cache
67
+ cached = self.get_cached_analysis(url, tuple(analysis_types))
68
+ if cached:
69
+ logger.info(f"Returning cached results for {url}")
70
+ results = cached['results']
71
+ return (
72
+ results.get('clean_text', ''),
73
+ results.get('summary', ''),
74
+ results.get('sentiment', ''),
75
+ results.get('topics', '')
76
+ )
77
+
78
+ # Create analysis prompt
79
+ prompt = self.create_analysis_prompt(url, analysis_types)
80
+
81
+ # Run analysis
82
+ response = self.agent.run(prompt)
83
+
84
+ # Parse response
85
+ try:
86
+ results = json.loads(response) if isinstance(response, str) else response
87
+ except json.JSONDecodeError:
88
+ results = {
89
+ 'clean_text': response,
90
+ 'summary': '',
91
+ 'sentiment': '',
92
+ 'topics': ''
93
+ }
94
+
95
+ # Cache results
96
+ self.store_cache(url, analysis_types, results)
97
+
98
+ return (
99
+ results.get('clean_text', ''),
100
+ results.get('summary', ''),
101
+ results.get('sentiment', ''),
102
+ results.get('topics', '')
103
+ )
104
+
105
  except Exception as e:
106
+ logger.error(f"Error processing query: {str(e)}")
107
+ error_msg = f"⚠️ Error: {str(e)}"
108
+ return error_msg, error_msg, error_msg, error_msg
109
+
110
+ def create_analysis_prompt(self, url: str, types: List[str]) -> str:
111
+ """Create the analysis prompt based on selected types."""
112
+ if not types:
113
+ types = ["summarize"] # Default analysis type
114
+ type_str = ", ".join(types)
115
+ return f"Analyze the content at {url} and provide the following analysis: {type_str}. Return results in JSON format with keys: clean_text, summary, sentiment, topics."
116
 
117
  def launch(self,
118
  server_name: Optional[str] = None,
119
  server_port: Optional[int] = None,
120
  share: bool = False):
121
+ """Launch the Gradio interface."""
122
 
123
  # Create the interface
124
+ with gr.Blocks(title="Smart Web Analyzer Plus", theme=gr.themes.Soft()) as demo:
125
+ # Header
126
  gr.Markdown("# 🌐 Smart Web Analyzer Plus")
127
  gr.Markdown("Analyze web content using AI to extract summaries, determine sentiment, and identify topics.")
128
 
129
+ # Input section
130
  with gr.Row():
131
  with gr.Column(scale=3):
132
  url_input = gr.Textbox(
 
147
  variant="primary"
148
  )
149
 
150
+ # Status indicator
151
+ status = gr.Markdown(visible=False)
152
+
153
+ # Output tabs
154
  with gr.Tabs() as tabs:
155
+ with gr.TabItem("📄 Clean Text"):
156
  clean_text_output = gr.Markdown()
157
+ with gr.TabItem("📝 Summary"):
158
  summary_output = gr.Markdown()
159
+ with gr.TabItem("🎭 Sentiment"):
160
  sentiment_output = gr.Markdown()
161
+ with gr.TabItem("📊 Topics"):
162
  topics_output = gr.Markdown()
163
 
 
 
 
164
  # Examples
165
  gr.Examples(
166
  examples=[
 
171
  label="Try these examples"
172
  )
173
 
174
+ # Event handlers
175
+ def on_analyze_click():
 
 
 
176
  return gr.update(value="⏳ Analysis in progress...", visible=True)
177
+
178
+ def on_analyze_complete():
179
  return gr.update(value="", visible=False)
180
+
 
181
  analyze_btn.click(
182
+ fn=on_analyze_click,
183
+ outputs=[status],
184
+ queue=False
185
  ).then(
186
+ fn=self.process_query,
187
  inputs=[url_input, analysis_types],
188
+ outputs=[clean_text_output, summary_output, sentiment_output, topics_output]
189
  ).then(
190
+ fn=on_analyze_complete,
191
  outputs=[status]
192
  )
193
 
 
196
  server_name=server_name,
197
  server_port=server_port,
198
  share=share
199
+ )
app.py CHANGED
@@ -3,34 +3,59 @@ from smolagents import CodeAgent, HfApiModel
3
  from tools.final_answer import FinalAnswerTool
4
  import yaml
5
  from Gradio_UI import GradioUI
 
 
 
 
 
 
 
 
6
 
7
  def create_agent():
8
- # Initialize the final answer tool
9
- final_answer = FinalAnswerTool()
10
- # Load prompt templates
11
- with open("prompts.yaml", 'r') as stream:
12
- prompt_templates = yaml.safe_load(stream)
13
-
14
- # Create the model
15
- model = HfApiModel(
16
- model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
17
- max_tokens=2096,
18
- temperature=0.5
19
- )
20
-
21
- # Create and return the agent
22
- return CodeAgent(
23
- model=model,
24
- tools=[final_answer],
25
- max_steps=6,
26
- verbosity_level=1,
27
- prompt_templates=prompt_templates
28
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  if __name__ == "__main__":
31
- # Create and launch the agent
32
- agent = create_agent()
33
- GradioUI(agent).launch(
34
- server_name="0.0.0.0",
35
- server_port=7860
36
- )
 
3
  from tools.final_answer import FinalAnswerTool
4
  import yaml
5
  from Gradio_UI import GradioUI
6
+ import logging
7
+
8
+ # Configure logging
9
+ logging.basicConfig(
10
+ level=logging.INFO,
11
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
12
+ )
13
+ logger = logging.getLogger(__name__)
14
 
15
  def create_agent():
16
+ try:
17
+ # Initialize the final answer tool
18
+ final_answer = FinalAnswerTool()
19
+
20
+ # Load prompt templates
21
+ with open("prompts.yaml", 'r', encoding='utf-8') as stream:
22
+ prompt_templates = yaml.safe_load(stream)
23
+
24
+ # Create the model
25
+ model = HfApiModel(
26
+ model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
27
+ max_tokens=2096,
28
+ temperature=0.5
29
+ )
30
+
31
+ # Create and return the agent
32
+ return CodeAgent(
33
+ model=model,
34
+ tools=[final_answer],
35
+ max_steps=6,
36
+ verbosity_level=1,
37
+ prompt_templates=prompt_templates
38
+ )
39
+ except Exception as e:
40
+ logger.error(f"Error creating agent: {str(e)}")
41
+ raise
42
+
43
+ def main():
44
+ try:
45
+ # Create and launch the agent
46
+ logger.info("Initializing Smart Web Analyzer Plus...")
47
+ agent = create_agent()
48
+
49
+ # Initialize and launch UI
50
+ ui = GradioUI(agent)
51
+ ui.launch(
52
+ server_name="0.0.0.0",
53
+ server_port=7860,
54
+ share=False # Set to True if you want to share publicly
55
+ )
56
+ except Exception as e:
57
+ logger.error(f"Application startup failed: {str(e)}")
58
+ raise
59
 
60
  if __name__ == "__main__":
61
+ main()
 
 
 
 
 
prompts.yaml CHANGED
@@ -1,42 +1,50 @@
1
  # prompts.yaml
2
  system_prompt: |
3
- You are a helpful AI assistant that can analyze web content. You have access to several tools:
4
- - Text extraction to get content from URLs
5
- - Sentiment analysis to determine the emotional tone
6
- - Summarization to create concise summaries
7
- - Topic detection to identify main themes
8
- - Web search for additional context
9
- - Time zone checking for temporal context
10
 
11
- Always think step by step and use the most appropriate tools for the task.
 
 
 
 
 
 
12
 
13
  user: |
14
  User query: {input}
15
 
16
- Think through this step-by-step:
17
- 1. Understand what analysis is being requested
18
- 2. Plan which tools to use
19
- 3. Execute the analysis in a logical order
20
- 4. Provide a clear, formatted response
21
 
22
  Available tools: {tools}
23
 
24
- assistant: |
25
- I'll help analyze that content. Let me think about this step by step:
26
 
27
  {thoughts}
28
 
29
- Let me take action using the appropriate tools.
30
 
31
  observation: |
32
  Tool response: {output}
33
 
34
  final: |
35
- Based on the analysis, here is the final answer:
36
 
37
  {response}
38
 
39
  error: |
40
- I encountered an error: {error}
 
 
41
 
42
- Let me try a different approach or tool to help you.
 
1
  # prompts.yaml
2
  system_prompt: |
3
+ You are a sophisticated AI assistant specialized in web content analysis. You have access to these capabilities:
4
+ - Text extraction: Clean and extract meaningful content from URLs
5
+ - Sentiment analysis: Determine emotional tone and context
6
+ - Summarization: Create concise, informative summaries
7
+ - Topic detection: Identify main themes and subjects
8
+ - Web search: Gather additional context when needed
9
+ - Temporal analysis: Consider time-based context
10
 
11
+ Always structure your responses in JSON format with these keys:
12
+ - clean_text: The extracted and cleaned content
13
+ - summary: A concise summary if requested
14
+ - sentiment: Sentiment analysis if requested
15
+ - topics: Main topics if requested
16
+
17
+ Think step by step and use the most appropriate tools for each task.
18
 
19
  user: |
20
  User query: {input}
21
 
22
+ Analysis process:
23
+ 1. Validate and process the URL
24
+ 2. Determine required analysis types
25
+ 3. Execute analysis in order: extraction → summary → sentiment → topics
26
+ 4. Format results in JSON structure
27
 
28
  Available tools: {tools}
29
 
30
+ A: |
31
+ I'll analyze the content systematically:
32
 
33
  {thoughts}
34
 
35
+ Executing analysis with appropriate tools...
36
 
37
  observation: |
38
  Tool response: {output}
39
 
40
  final: |
41
+ Analysis complete. Results formatted in JSON:
42
 
43
  {response}
44
 
45
  error: |
46
+ An error occurred: {error}
47
+
48
+ Technical details: {error_details}
49
 
50
+ I'll attempt an alternative approach or provide partial results if possible.
requirements.txt CHANGED
@@ -1,6 +1,20 @@
1
- smolagents
2
- gradio
3
- requests
4
- pytz
5
- pyyaml
6
- duckduckgo-search
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # requirements.txt
2
+ smolagents>=0.2.0
3
+ gradio>=4.0.0
4
+ requests>=2.31.0
5
+ pytz>=2023.3
6
+ pyyaml>=6.0.1
7
+ python-dotenv>=1.0.0
8
+ beautifulsoup4>=4.12.2
9
+ numpy>=1.24.3
10
+ pandas>=2.0.3
11
+ scikit-learn>=1.3.0
12
+ nltk>=3.8.1
13
+ spacy>=3.7.2
14
+ transformers>=4.35.0
15
+ torch>=2.0.1
16
+ tqdm>=4.65.0
17
+ validators>=0.22.0
18
+ aiohttp>=3.8.5
19
+ cachetools>=5.3.1
20
+ python-dateutil>=2.8.2
tools/final_answer.py CHANGED
@@ -1,38 +1,91 @@
1
  # tools/final_answer.py
2
  from smolagents import Tool
3
  from typing import Optional, Dict, Any
 
 
 
 
4
 
5
  class FinalAnswerTool(Tool):
6
- """Tool for providing final answers to user queries."""
7
 
8
  name = "final_answer"
9
- description = "Tool for providing the final answer to the agent's task"
10
 
11
- # Define inputs with the correct type string
12
  inputs: Dict[str, Any] = {
13
  "answer": {
14
  "type": "string",
15
- "description": "The final answer to be returned"
16
  }
17
  }
18
 
19
- # Specify output type with correct string
20
  output_type = "string"
21
 
22
  def __init__(self, description: Optional[str] = None):
23
  super().__init__()
24
  self.description = description or self.description
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  def forward(self, answer: str) -> str:
27
- """Process and return the final answer.
28
 
29
  Args:
30
  answer: The answer text to be returned
31
 
32
  Returns:
33
- str: The processed answer
34
  """
35
- return answer
 
 
 
 
 
 
 
 
 
 
36
 
37
  def __call__(self, answer: str) -> str:
38
  """Alias for forward method to maintain compatibility"""
 
1
  # tools/final_answer.py
2
  from smolagents import Tool
3
  from typing import Optional, Dict, Any
4
+ import json
5
+ import logging
6
+
7
+ logger = logging.getLogger(__name__)
8
 
9
  class FinalAnswerTool(Tool):
10
+ """Tool for providing final answers to user queries with improved error handling and validation."""
11
 
12
  name = "final_answer"
13
+ description = "Tool for providing the final answer to the agent's task with structured output"
14
 
 
15
  inputs: Dict[str, Any] = {
16
  "answer": {
17
  "type": "string",
18
+ "description": "The final answer to be returned in JSON format"
19
  }
20
  }
21
 
 
22
  output_type = "string"
23
 
24
  def __init__(self, description: Optional[str] = None):
25
  super().__init__()
26
  self.description = description or self.description
27
 
28
+ def validate_json(self, answer: str) -> bool:
29
+ """Validate if the answer is proper JSON format."""
30
+ try:
31
+ if isinstance(answer, str):
32
+ json.loads(answer)
33
+ return True
34
+ except json.JSONDecodeError:
35
+ return False
36
+
37
+ def format_response(self, answer: str) -> str:
38
+ """Format the response to ensure consistent structure."""
39
+ try:
40
+ if isinstance(answer, str):
41
+ # Try to parse as JSON first
42
+ if self.validate_json(answer):
43
+ return answer
44
+
45
+ # If not JSON, create a structured response
46
+ return json.dumps({
47
+ 'clean_text': answer,
48
+ 'summary': '',
49
+ 'sentiment': '',
50
+ 'topics': ''
51
+ })
52
+
53
+ # If answer is already a dict, convert to JSON
54
+ if isinstance(answer, dict):
55
+ return json.dumps(answer)
56
+
57
+ raise ValueError("Invalid answer format")
58
+
59
+ except Exception as e:
60
+ logger.error(f"Error formatting response: {str(e)}")
61
+ return json.dumps({
62
+ 'error': str(e),
63
+ 'clean_text': str(answer),
64
+ 'summary': '',
65
+ 'sentiment': '',
66
+ 'topics': ''
67
+ })
68
+
69
  def forward(self, answer: str) -> str:
70
+ """Process and return the final answer with improved error handling.
71
 
72
  Args:
73
  answer: The answer text to be returned
74
 
75
  Returns:
76
+ str: The processed answer in JSON format
77
  """
78
+ try:
79
+ return self.format_response(answer)
80
+ except Exception as e:
81
+ logger.error(f"Error in forward method: {str(e)}")
82
+ return json.dumps({
83
+ 'error': str(e),
84
+ 'clean_text': 'An error occurred while processing the response',
85
+ 'summary': '',
86
+ 'sentiment': '',
87
+ 'topics': ''
88
+ })
89
 
90
  def __call__(self, answer: str) -> str:
91
  """Alias for forward method to maintain compatibility"""