Alaaeldin commited on
Commit
13b6e34
·
verified ·
1 Parent(s): 5ee2dd5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +188 -56
app.py CHANGED
@@ -2,8 +2,11 @@ from smolagents import CodeAgent, tool
2
  import datetime
3
  import pytz
4
  import yaml
5
- import requests
6
- import json
 
 
 
7
  from tools.final_answer import FinalAnswerTool
8
  from Gradio_UI import GradioUI
9
 
@@ -65,67 +68,196 @@ def get_current_time_in_timezone(timezone: str) -> str:
65
  except Exception as e:
66
  return f"Error fetching time for timezone '{timezone}': {str(e)}"
67
 
68
- # Weather Forecast Tool
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  @tool
70
- def weather_forecast(location: str) -> str:
71
- """Fetches weather forecast for a specified location.
72
 
73
  Args:
74
- location: The location to get weather forecast for (city name or coordinates).
 
 
 
 
75
  """
76
  try:
77
- # Connect to a public weather API
78
- api_url = f"https://wttr.in/{location}?format=j1"
79
-
80
- # Make the API request
81
- response = requests.get(api_url, timeout=10)
82
- response.raise_for_status() # Raise an exception for HTTP errors
83
-
84
- # Parse the JSON response
85
- weather_data = response.json()
86
-
87
- # Extract relevant information
88
- current_condition = weather_data.get("current_condition", [{}])[0]
89
- weather_desc = current_condition.get("weatherDesc", [{}])[0].get("value", "Unknown")
90
- temp_c = current_condition.get("temp_C", "Unknown")
91
- temp_f = current_condition.get("temp_F", "Unknown")
92
- feels_like_c = current_condition.get("FeelsLikeC", "Unknown")
93
- humidity = current_condition.get("humidity", "Unknown")
94
- wind_speed = current_condition.get("windspeedKmph", "Unknown")
95
- wind_dir = current_condition.get("winddir16Point", "Unknown")
96
-
97
- # Get forecast for upcoming days
98
- forecast = weather_data.get("weather", [])
99
- forecast_info = ""
100
-
101
- if forecast:
102
- forecast_info = "\n\nForecast for the next few days:\n"
103
- for day in forecast[:3]: # Limit to 3 days
104
- date = day.get("date", "Unknown")
105
- max_temp_c = day.get("maxtempC", "Unknown")
106
- min_temp_c = day.get("mintempC", "Unknown")
107
- desc = day.get("hourly", [{}])[0].get("weatherDesc", [{}])[0].get("value", "Unknown")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
- forecast_info += f"- {date}: {desc}, Max: {max_temp_c}°C, Min: {min_temp_c}°C\n"
110
-
111
- # Format the response
112
- weather_report = f"""
113
- Weather for {location}:
114
- Current Conditions: {weather_desc}
115
- Temperature: {temp_c}°C / {temp_f}°F (Feels like: {feels_like_c}°C)
116
- Humidity: {humidity}%
117
- Wind: {wind_speed} km/h, Direction: {wind_dir}
118
- {forecast_info}
119
- """
120
 
121
- return weather_report.strip()
 
 
 
122
 
123
- except requests.exceptions.RequestException as e:
124
- return f"Error fetching weather for {location}: Connection error - {str(e)}"
125
- except json.JSONDecodeError:
126
- return f"Error fetching weather for {location}: Invalid response from weather service"
127
  except Exception as e:
128
- return f"Error fetching weather for {location}: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
  # Set up the agent with our tools
131
  final_answer = FinalAnswerTool()
@@ -142,10 +274,10 @@ model = HfApiModel(
142
  custom_role_conversions=None,
143
  )
144
 
145
- # Create agent with our tools (now 4 including final_answer)
146
  agent = CodeAgent(
147
  model=model,
148
- tools=[text_analyzer, get_current_time_in_timezone, weather_forecast, final_answer],
149
  max_steps=6,
150
  verbosity_level=1,
151
  grammar=None,
 
2
  import datetime
3
  import pytz
4
  import yaml
5
+ import os
6
+ import re
7
+ import numpy as np
8
+ from typing import List, Optional, Dict, Any
9
+ import io
10
  from tools.final_answer import FinalAnswerTool
11
  from Gradio_UI import GradioUI
12
 
 
68
  except Exception as e:
69
  return f"Error fetching time for timezone '{timezone}': {str(e)}"
70
 
71
+ # Simple vector embedding function using basic word frequency
72
+ def get_embedding(text: str, normalize: bool = True) -> np.ndarray:
73
+ """Create a simple bag-of-words embedding for the text"""
74
+ # Lowercase and clean text
75
+ text = text.lower()
76
+ words = re.findall(r'\b\w+\b', text)
77
+
78
+ # Create a basic vocabulary (this is very simplified)
79
+ vocabulary = {}
80
+ for word in words:
81
+ if word not in vocabulary:
82
+ vocabulary[word] = len(vocabulary)
83
+
84
+ # Create vector
85
+ vector = np.zeros(max(1, len(vocabulary)))
86
+ for word in words:
87
+ if word in vocabulary:
88
+ vector[vocabulary[word]] += 1
89
+
90
+ # Normalize if requested
91
+ if normalize and np.sum(vector) > 0:
92
+ vector = vector / np.sqrt(np.sum(vector ** 2))
93
+
94
+ return vector
95
+
96
+ def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
97
+ """Calculate cosine similarity between two vectors"""
98
+ # Handle zero vectors
99
+ if np.sum(a) == 0 or np.sum(b) == 0:
100
+ return 0
101
+ return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
102
+
103
+ def extract_text_from_pdf_bytes(pdf_bytes: bytes) -> str:
104
+ """Extract text from PDF bytes"""
105
+ try:
106
+ # First try to import PyPDF2
107
+ try:
108
+ import PyPDF2
109
+ except ImportError:
110
+ return "PDF processing requires PyPDF2 library which is not available."
111
+
112
+ with io.BytesIO(pdf_bytes) as pdf_file:
113
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
114
+ text = ""
115
+ for page_num in range(len(pdf_reader.pages)):
116
+ page = pdf_reader.pages[page_num]
117
+ text += page.extract_text() + "\n"
118
+ return text
119
+ except Exception as e:
120
+ return f"Error extracting text from PDF: {str(e)}"
121
+
122
+ def extract_text_from_pdf(file_path: str) -> str:
123
+ """Extract text from PDF file"""
124
+ try:
125
+ # First try to import PyPDF2
126
+ try:
127
+ import PyPDF2
128
+ except ImportError:
129
+ return "PDF processing requires PyPDF2 library which is not available."
130
+
131
+ with open(file_path, 'rb') as pdf_file:
132
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
133
+ text = ""
134
+ for page_num in range(len(pdf_reader.pages)):
135
+ page = pdf_reader.pages[page_num]
136
+ text += page.extract_text() + "\n"
137
+ return text
138
+ except Exception as e:
139
+ return f"Error extracting text from PDF: {str(e)}"
140
+
141
  @tool
142
+ def semantic_search(corpus: str, query: str, top_k: int = 3, file_path: Optional[str] = None) -> str:
143
+ """Performs semantic search on a corpus of text or uploaded PDF.
144
 
145
  Args:
146
+ corpus: The text corpus to search within (could be a large text or list of documents).
147
+ If empty and file_path is provided, will extract text from the PDF.
148
+ query: The search query.
149
+ top_k: Number of top results to return.
150
+ file_path: Optional path to a PDF file to extract text from.
151
  """
152
  try:
153
+ final_corpus = corpus
154
+
155
+ # Try to handle PDF file if specified
156
+ if not corpus and file_path:
157
+ # Check if file exists
158
+ if os.path.exists(file_path):
159
+ # Check if this is a PDF by extension
160
+ if file_path.lower().endswith('.pdf'):
161
+ pdf_text = extract_text_from_pdf(file_path)
162
+ if pdf_text.startswith("Error") or pdf_text.startswith("PDF processing requires"):
163
+ return pdf_text
164
+ final_corpus = pdf_text
165
+ else:
166
+ # If not PDF, try to read as text
167
+ try:
168
+ with open(file_path, 'r', encoding='utf-8') as f:
169
+ final_corpus = f.read()
170
+ except Exception as e:
171
+ return f"Error reading file: {str(e)}"
172
+ else:
173
+ return f"File not found: {file_path}"
174
+
175
+ if not final_corpus:
176
+ return "Error: No text corpus provided for search."
177
+
178
+ # Split corpus into chunks/sentences for searching
179
+ # This is a simple approach - in a real system you would use a more sophisticated chunking method
180
+ chunks = re.split(r'(?<=[.!?])\s+', final_corpus)
181
+ chunks = [chunk.strip() for chunk in chunks if len(chunk.strip()) > 10]
182
+
183
+ if not chunks:
184
+ return "No valid text chunks found in the corpus."
185
+
186
+ # Get query embedding
187
+ query_embedding = get_embedding(query)
188
+
189
+ # Get embeddings for each chunk and calculate similarity
190
+ results = []
191
+ for i, chunk in enumerate(chunks):
192
+ chunk_embedding = get_embedding(chunk)
193
+ similarity = cosine_similarity(query_embedding, chunk_embedding)
194
+ results.append((i, chunk, similarity))
195
+
196
+ # Sort by similarity score (descending)
197
+ results.sort(key=lambda x: x[2], reverse=True)
198
+
199
+ # Format results
200
+ output = f"Search results for: '{query}'\n\n"
201
+
202
+ for i, (chunk_idx, chunk, score) in enumerate(results[:top_k]):
203
+ # Truncate long chunks for display
204
+ display_chunk = chunk
205
+ if len(display_chunk) > 200:
206
+ display_chunk = display_chunk[:197] + "..."
207
 
208
+ output += f"{i+1}. [Score: {score:.2f}] {display_chunk}\n\n"
 
 
 
 
 
 
 
 
 
 
209
 
210
+ if not results:
211
+ output += "No matching results found."
212
+
213
+ return output
214
 
 
 
 
 
215
  except Exception as e:
216
+ return f"Error performing semantic search: {str(e)}"
217
+
218
+ @tool
219
+ def list_available_tools() -> str:
220
+ """Lists all available tools and provides usage examples for each."""
221
+ tools_documentation = """
222
+ # Available Tools
223
+
224
+ This agent has the following tools available:
225
+
226
+ ## 1. Text Analyzer
227
+
228
+ Analyzes text and provides statistics including word count, character count, unique words count, average word length, and most common words.
229
+
230
+ **Example usage:**
231
+ - "Analyze this text: The quick brown fox jumps over the lazy dog."
232
+ - "Give me statistics about this paragraph: Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."
233
+
234
+ ## 2. Current Time in Timezone
235
+
236
+ Fetches the current local time for a specified timezone.
237
+
238
+ **Example usage:**
239
+ - "What time is it in Tokyo?"
240
+ - "Get the current time in America/New_York"
241
+ - "Tell me the time in UTC"
242
+
243
+ ## 3. Semantic Search
244
+
245
+ Performs semantic search on a corpus of text or uploaded PDF document to find the most relevant sections matching a query.
246
+
247
+ **Example usage:**
248
+ - "Search for 'climate change' in this text: Global warming is the long-term heating of Earth's surface observed since the pre-industrial period due to human activities, primarily fossil fuel burning, which increases heat-trapping greenhouse gas levels in Earth's atmosphere."
249
+ - "If I have uploaded a PDF file called 'research.pdf', search for 'vaccination' in it"
250
+ - "Find information about 'neural networks' in this text: [your long text here]"
251
+
252
+ ## How to Use This Agent
253
+
254
+ 1. Type your request in the chat box below
255
+ 2. The agent will process your request and use the appropriate tool
256
+ 3. Results will be displayed in this conversation area
257
+
258
+ For complex tasks, you may need to provide additional context or data. Be as specific as possible in your requests.
259
+ """
260
+ return tools_documentation
261
 
262
  # Set up the agent with our tools
263
  final_answer = FinalAnswerTool()
 
274
  custom_role_conversions=None,
275
  )
276
 
277
+ # Create agent with our tools (including the new list_available_tools)
278
  agent = CodeAgent(
279
  model=model,
280
+ tools=[text_analyzer, get_current_time_in_timezone, semantic_search, list_available_tools, final_answer],
281
  max_steps=6,
282
  verbosity_level=1,
283
  grammar=None,