huytofu92 commited on
Commit
33b9b1f
·
1 Parent(s): 8d48756

Remove usage of open() functions

Browse files
Files changed (5) hide show
  1. audio_tools.py +10 -6
  2. community_tools.py +17 -2
  3. mini_agents.py +2 -2
  4. utils.py +0 -22
  5. vlm_tools.py +34 -9
audio_tools.py CHANGED
@@ -115,12 +115,16 @@ def speaker_diarization(audio: str) -> list:
115
  """
116
  # Decode the base64 audio
117
  audio_data = base64.b64decode(audio)
118
- audio_path = "temp_audio.wav"
119
- with open(audio_path, "wb") as f:
120
- f.write(audio_data)
121
-
122
- # Perform speaker diarization
123
- [flags, classes, centers] = aS.speakerDiarization(audio_path, 2) # Assuming 2 speakers
 
 
 
 
124
 
125
  # Process the output
126
  speaker_segments = []
 
115
  """
116
  # Decode the base64 audio
117
  audio_data = base64.b64decode(audio)
118
+ audio_buffer = BytesIO(audio_data)
119
+
120
+ # Create a temporary BytesIO object for processing
121
+ temp_buffer = BytesIO()
122
+ audio_segment = AudioSegment.from_file(audio_buffer)
123
+ audio_segment.export(temp_buffer, format="wav")
124
+ temp_buffer.seek(0)
125
+
126
+ # Perform speaker diarization using the buffer
127
+ [flags, classes, centers] = aS.speakerDiarization(temp_buffer, 2) # Assuming 2 speakers
128
 
129
  # Process the output
130
  speaker_segments = []
community_tools.py CHANGED
@@ -3,6 +3,8 @@ from langchain_community.agent_toolkits.load_tools import load_tools
3
  from langchain_community.document_loaders import YoutubeLoader
4
  from smolagents.tools import Tool, tool
5
  from youtube_transcript_api import YouTubeTranscriptApi
 
 
6
 
7
  google_map_tool = Tool.from_langchain(GooglePlacesTool())
8
 
@@ -18,7 +20,7 @@ community_tools = [google_map_tool, wikipedia_tool, *arxiv_tools]
18
  @tool
19
  def get_youtube_transcript_from_url(video_url: str)->str:
20
  """
21
- Get the transcript of a YouTube video
22
  Args:
23
  video_url: The URL of the YouTube video (e.g. https://www.youtube.com/watch?v=dQw4w9WgXcQ)
24
  Returns:
@@ -26,7 +28,20 @@ def get_youtube_transcript_from_url(video_url: str)->str:
26
  """
27
  video_id = video_url.split("=")[1]
28
  try:
29
- ytt_api = YouTubeTranscriptApi()
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  fetched_transcript = ytt_api.fetch(video_id)
31
 
32
  # is iterable
 
3
  from langchain_community.document_loaders import YoutubeLoader
4
  from smolagents.tools import Tool, tool
5
  from youtube_transcript_api import YouTubeTranscriptApi
6
+ from youtube_transcript_api.proxies import WebshareProxyConfig
7
+ import os
8
 
9
  google_map_tool = Tool.from_langchain(GooglePlacesTool())
10
 
 
20
  @tool
21
  def get_youtube_transcript_from_url(video_url: str)->str:
22
  """
23
+ Get the transcript of a YouTube video using proxy configuration
24
  Args:
25
  video_url: The URL of the YouTube video (e.g. https://www.youtube.com/watch?v=dQw4w9WgXcQ)
26
  Returns:
 
28
  """
29
  video_id = video_url.split("=")[1]
30
  try:
31
+ # Get proxy credentials from environment variables
32
+ proxy_username = os.getenv("WEBSHARE_PROXY_USERNAME")
33
+ proxy_password = os.getenv("WEBSHARE_PROXY_PASSWORD")
34
+
35
+ # Configure proxy if credentials are available
36
+ if proxy_username and proxy_password:
37
+ proxy_config = WebshareProxyConfig(
38
+ proxy_username=proxy_username,
39
+ proxy_password=proxy_password,
40
+ )
41
+ ytt_api = YouTubeTranscriptApi(proxy_config=proxy_config)
42
+ else:
43
+ ytt_api = YouTubeTranscriptApi()
44
+
45
  fetched_transcript = ytt_api.fetch(video_id)
46
 
47
  # is iterable
mini_agents.py CHANGED
@@ -41,7 +41,7 @@ audio_agent = CodeAgent(
41
  tools=[transcribe_audio_tool, audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization],
42
  max_steps=6,
43
  # prompt_templates=PROMPT_TEMPLATE["audio_agent"],
44
- additional_authorized_imports=["pydub", "pyAudioAnalysis", "base64", "io", "sklearn", "scipy", "numpy", "pandas", "json", "os", "logging", "yaml", "pyplot", "matplotlib", 'hmmlearn', 'pickle'],
45
  name="audio_agent",
46
  description="This agent is responsible for rocessing audio, transcribing audio and extracting text from it."
47
  )
@@ -56,7 +56,7 @@ vlm_agent = CodeAgent(
56
  tools=[download_image, image_processing, object_detection_tool, ocr_scan_tool, extract_frames_from_video, get_image_from_file],
57
  max_steps=6,
58
  # prompt_templates=PROMPT_TEMPLATE["vlm_agent"],
59
- additional_authorized_imports=["cv2", "numpy", "pytesseract", "requests", "base64", "onnxruntime", "PIL", "io", "os", "logging", "yaml", "pyplot", "matplotlib", 'hmmlearn', 'pickle'],
60
  name="vlm_agent",
61
  description="This agent is responsible for downloading images, processing images, detecting objects in them and extracting text from them."
62
  )
 
41
  tools=[transcribe_audio_tool, audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization],
42
  max_steps=6,
43
  # prompt_templates=PROMPT_TEMPLATE["audio_agent"],
44
+ additional_authorized_imports=["pytube", "pydub", "pyAudioAnalysis", "base64", "io", "sklearn", "scipy", "numpy", "pandas", "json", "os", "logging", "yaml", "pyplot", "matplotlib", 'hmmlearn', 'pickle'],
45
  name="audio_agent",
46
  description="This agent is responsible for rocessing audio, transcribing audio and extracting text from it."
47
  )
 
56
  tools=[download_image, image_processing, object_detection_tool, ocr_scan_tool, extract_frames_from_video, get_image_from_file],
57
  max_steps=6,
58
  # prompt_templates=PROMPT_TEMPLATE["vlm_agent"],
59
+ additional_authorized_imports=["cv2", "numpy", "pytesseract", "requests", "base64", "onnxruntime", "PIL", "io", "os", "logging", "yaml", "pyplot", "matplotlib", 'hmmlearn', 'pickle', 'youtube_dl', 'bs4'],
60
  name="vlm_agent",
61
  description="This agent is responsible for downloading images, processing images, detecting objects in them and extracting text from them."
62
  )
utils.py CHANGED
@@ -1,7 +1,6 @@
1
  import json
2
  import os
3
  from typing import Optional
4
- from smolagents.tools import tool
5
 
6
  def find_file_by_task_id(task_id: str, metadata_path: str = "Final_Assignment_Template/validation/metadata.jsonl") -> Optional[str]:
7
  """
@@ -58,24 +57,3 @@ def get_full_file_path(task_id: str, base_dir: str = "Final_Assignment_Template/
58
 
59
  full_path = os.path.join(base_dir, filename)
60
  return full_path if os.path.exists(full_path) else None
61
-
62
- @tool
63
- def load_file_from_task_id(task_id: str) -> str:
64
- """
65
- Load a file related to a given task_id if it exists.
66
- Args:
67
- task_id: The task_id to load the file for
68
- Returns:
69
- The file content if found, None if not found
70
- """
71
- file_path = get_full_file_path(task_id)
72
- if not file_path:
73
- return "File not found"
74
- with open(file_path, 'r') as file:
75
- try:
76
- return file.read()
77
- except Exception as e:
78
- current_dir = os.path.dirname(os.path.abspath(__file__))
79
- file_path = os.path.join(current_dir, file_path.replace("Final_Assignment_Template", ""))
80
- with open(file_path, 'r') as file:
81
- return file.read()
 
1
  import json
2
  import os
3
  from typing import Optional
 
4
 
5
  def find_file_by_task_id(task_id: str, metadata_path: str = "Final_Assignment_Template/validation/metadata.jsonl") -> Optional[str]:
6
  """
 
57
 
58
  full_path = os.path.join(base_dir, filename)
59
  return full_path if os.path.exists(full_path) else None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vlm_tools.py CHANGED
@@ -114,14 +114,28 @@ def get_image_from_file(file_path: str)->str:
114
  The image as a base64 string
115
  """
116
  try:
117
- with open(file_path, 'rb') as image_file:
118
- image = base64.b64encode(image_file.read()).decode('utf-8')
 
 
 
 
 
 
 
 
119
  except Exception as e:
120
  current_file_path = os.path.abspath(__file__)
121
  current_file_dir = os.path.dirname(current_file_path)
122
  file_path = os.path.join(current_file_dir, file_path.replace("Final_Assignment_Template", ""))
123
- with open(file_path, 'rb') as image_file:
124
- image = base64.b64encode(image_file.read()).decode('utf-8')
 
 
 
 
 
 
125
  return image
126
 
127
  @tool
@@ -167,12 +181,23 @@ class ObjectDetectionTool(Tool):
167
  self.onnx_path = onnx_path
168
  self.names_path = names_path
169
  self.onnx_model = onnxruntime.InferenceSession(self.onnx_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
  def forward(self, frames: any)->any:
172
- # Load class labels
173
- with open(self.names_path, 'r') as f:
174
- classes = [line.strip() for line in f.readlines()]
175
-
176
  detected_objects = []
177
  for frame in frames:
178
  img = pre_processing(frame)
@@ -182,7 +207,7 @@ class ObjectDetectionTool(Tool):
182
  onnx_input = {self.onnx_model.get_inputs()[0].name: blob}
183
  onnx_output = self.onnx_model.run(None, onnx_input)
184
 
185
- detected_objects.append(post_processing(onnx_output, classes, img.shape))
186
 
187
  return detected_objects
188
 
 
114
  The image as a base64 string
115
  """
116
  try:
117
+ # Use BytesIO to read the file
118
+ with BytesIO() as buffer:
119
+ # Use cv2 to read the image
120
+ img = cv2.imread(file_path)
121
+ if img is None:
122
+ raise FileNotFoundError(f"Could not read image at {file_path}")
123
+ # Encode to jpg and write to buffer
124
+ _, buffer_data = cv2.imencode('.jpg', img)
125
+ buffer.write(buffer_data.tobytes())
126
+ image = base64.b64encode(buffer.getvalue()).decode('utf-8')
127
  except Exception as e:
128
  current_file_path = os.path.abspath(__file__)
129
  current_file_dir = os.path.dirname(current_file_path)
130
  file_path = os.path.join(current_file_dir, file_path.replace("Final_Assignment_Template", ""))
131
+ # Try again with the new path
132
+ with BytesIO() as buffer:
133
+ img = cv2.imread(file_path)
134
+ if img is None:
135
+ raise FileNotFoundError(f"Could not read image at {file_path}")
136
+ _, buffer_data = cv2.imencode('.jpg', img)
137
+ buffer.write(buffer_data.tobytes())
138
+ image = base64.b64encode(buffer.getvalue()).decode('utf-8')
139
  return image
140
 
141
  @tool
 
181
  self.onnx_path = onnx_path
182
  self.names_path = names_path
183
  self.onnx_model = onnxruntime.InferenceSession(self.onnx_path)
184
+
185
+ # Load class labels - using a predefined list since we can't use open()
186
+ # These are the standard COCO dataset classes that YOLOv3 uses
187
+ self.classes = [
188
+ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',
189
+ 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat',
190
+ 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
191
+ 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
192
+ 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
193
+ 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
194
+ 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
195
+ 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
196
+ 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book',
197
+ 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
198
+ ]
199
 
200
  def forward(self, frames: any)->any:
 
 
 
 
201
  detected_objects = []
202
  for frame in frames:
203
  img = pre_processing(frame)
 
207
  onnx_input = {self.onnx_model.get_inputs()[0].name: blob}
208
  onnx_output = self.onnx_model.run(None, onnx_input)
209
 
210
+ detected_objects.append(post_processing(onnx_output, self.classes, img.shape))
211
 
212
  return detected_objects
213