Final_Assignment_Template

Sleeping

App Files Files Community

huytofu92 commited on May 19

Commit

33b9b1f

1 Parent(s): 8d48756

Remove usage of open() functions

Browse files

Files changed (5) hide show

audio_tools.py +10 -6
community_tools.py +17 -2
mini_agents.py +2 -2
utils.py +0 -22
vlm_tools.py +34 -9

audio_tools.py CHANGED Viewed

@@ -115,12 +115,16 @@ def speaker_diarization(audio: str) -> list:
     """
     # Decode the base64 audio
     audio_data = base64.b64decode(audio)
-    audio_path = "temp_audio.wav"
-    with open(audio_path, "wb") as f:
-        f.write(audio_data)
-    # Perform speaker diarization
-    [flags, classes, centers] = aS.speakerDiarization(audio_path, 2)  # Assuming 2 speakers
     # Process the output
     speaker_segments = []

     """
     # Decode the base64 audio
     audio_data = base64.b64decode(audio)
+    audio_buffer = BytesIO(audio_data)
+    # Create a temporary BytesIO object for processing
+    temp_buffer = BytesIO()
+    audio_segment = AudioSegment.from_file(audio_buffer)
+    audio_segment.export(temp_buffer, format="wav")
+    temp_buffer.seek(0)
+    # Perform speaker diarization using the buffer
+    [flags, classes, centers] = aS.speakerDiarization(temp_buffer, 2)  # Assuming 2 speakers
     # Process the output
     speaker_segments = []

community_tools.py CHANGED Viewed

@@ -3,6 +3,8 @@ from langchain_community.agent_toolkits.load_tools import load_tools
 from langchain_community.document_loaders import YoutubeLoader
 from smolagents.tools import Tool, tool
 from youtube_transcript_api import YouTubeTranscriptApi
 google_map_tool = Tool.from_langchain(GooglePlacesTool())
@@ -18,7 +20,7 @@ community_tools = [google_map_tool, wikipedia_tool, *arxiv_tools]
 @tool
 def get_youtube_transcript_from_url(video_url: str)->str:
     """
-    Get the transcript of a YouTube video
     Args:
         video_url: The URL of the YouTube video (e.g. https://www.youtube.com/watch?v=dQw4w9WgXcQ)
     Returns:
@@ -26,7 +28,20 @@ def get_youtube_transcript_from_url(video_url: str)->str:
     """
     video_id = video_url.split("=")[1]
     try:
-        ytt_api = YouTubeTranscriptApi()
         fetched_transcript = ytt_api.fetch(video_id)
         # is iterable

 from langchain_community.document_loaders import YoutubeLoader
 from smolagents.tools import Tool, tool
 from youtube_transcript_api import YouTubeTranscriptApi
+from youtube_transcript_api.proxies import WebshareProxyConfig
+import os
 google_map_tool = Tool.from_langchain(GooglePlacesTool())
 @tool
 def get_youtube_transcript_from_url(video_url: str)->str:
     """
+    Get the transcript of a YouTube video using proxy configuration
     Args:
         video_url: The URL of the YouTube video (e.g. https://www.youtube.com/watch?v=dQw4w9WgXcQ)
     Returns:
     """
     video_id = video_url.split("=")[1]
     try:
+        # Get proxy credentials from environment variables
+        proxy_username = os.getenv("WEBSHARE_PROXY_USERNAME")
+        proxy_password = os.getenv("WEBSHARE_PROXY_PASSWORD")
+        # Configure proxy if credentials are available
+        if proxy_username and proxy_password:
+            proxy_config = WebshareProxyConfig(
+                proxy_username=proxy_username,
+                proxy_password=proxy_password,
+            )
+            ytt_api = YouTubeTranscriptApi(proxy_config=proxy_config)
+        else:
+            ytt_api = YouTubeTranscriptApi()
         fetched_transcript = ytt_api.fetch(video_id)
         # is iterable

mini_agents.py CHANGED Viewed

@@ -41,7 +41,7 @@ audio_agent = CodeAgent(
     tools=[transcribe_audio_tool, audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization],
     max_steps=6,
     # prompt_templates=PROMPT_TEMPLATE["audio_agent"],
-    additional_authorized_imports=["pydub", "pyAudioAnalysis", "base64", "io", "sklearn", "scipy", "numpy", "pandas", "json", "os", "logging", "yaml", "pyplot", "matplotlib", 'hmmlearn', 'pickle'],
     name="audio_agent",
     description="This agent is responsible for rocessing audio, transcribing audio and extracting text from it."
 )
@@ -56,7 +56,7 @@ vlm_agent = CodeAgent(
     tools=[download_image, image_processing, object_detection_tool, ocr_scan_tool, extract_frames_from_video, get_image_from_file],
     max_steps=6,
     # prompt_templates=PROMPT_TEMPLATE["vlm_agent"],
-    additional_authorized_imports=["cv2", "numpy", "pytesseract", "requests", "base64", "onnxruntime", "PIL", "io", "os", "logging", "yaml", "pyplot", "matplotlib", 'hmmlearn', 'pickle'],
     name="vlm_agent",
     description="This agent is responsible for downloading images, processing images, detecting objects in them and extracting text from them."
 )

     tools=[transcribe_audio_tool, audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization],
     max_steps=6,
     # prompt_templates=PROMPT_TEMPLATE["audio_agent"],
+    additional_authorized_imports=["pytube", "pydub", "pyAudioAnalysis", "base64", "io", "sklearn", "scipy", "numpy", "pandas", "json", "os", "logging", "yaml", "pyplot", "matplotlib", 'hmmlearn', 'pickle'],
     name="audio_agent",
     description="This agent is responsible for rocessing audio, transcribing audio and extracting text from it."
 )
     tools=[download_image, image_processing, object_detection_tool, ocr_scan_tool, extract_frames_from_video, get_image_from_file],
     max_steps=6,
     # prompt_templates=PROMPT_TEMPLATE["vlm_agent"],
+    additional_authorized_imports=["cv2", "numpy", "pytesseract", "requests", "base64", "onnxruntime", "PIL", "io", "os", "logging", "yaml", "pyplot", "matplotlib", 'hmmlearn', 'pickle', 'youtube_dl', 'bs4'],
     name="vlm_agent",
     description="This agent is responsible for downloading images, processing images, detecting objects in them and extracting text from them."
 )

utils.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import json
 import os
 from typing import Optional
-from smolagents.tools import tool
 def find_file_by_task_id(task_id: str, metadata_path: str = "Final_Assignment_Template/validation/metadata.jsonl") -> Optional[str]:
     """
@@ -58,24 +57,3 @@ def get_full_file_path(task_id: str, base_dir: str = "Final_Assignment_Template/
     full_path = os.path.join(base_dir, filename)
     return full_path if os.path.exists(full_path) else None
-@tool
-def load_file_from_task_id(task_id: str) -> str:
-    """
-    Load a file related to a given task_id if it exists.
-    Args:
-        task_id: The task_id to load the file for
-    Returns:
-        The file content if found, None if not found
-    """
-    file_path = get_full_file_path(task_id)
-    if not file_path:
-        return "File not found"
-    with open(file_path, 'r') as file:
-        try:
-            return file.read()
-        except Exception as e:
-            current_dir = os.path.dirname(os.path.abspath(__file__))
-            file_path = os.path.join(current_dir, file_path.replace("Final_Assignment_Template", ""))
-            with open(file_path, 'r') as file:
-                return file.read()

 import json
 import os
 from typing import Optional
 def find_file_by_task_id(task_id: str, metadata_path: str = "Final_Assignment_Template/validation/metadata.jsonl") -> Optional[str]:
     """
     full_path = os.path.join(base_dir, filename)
     return full_path if os.path.exists(full_path) else None

vlm_tools.py CHANGED Viewed

@@ -114,14 +114,28 @@ def get_image_from_file(file_path: str)->str:
         The image as a base64 string
     """
     try:
-        with open(file_path, 'rb') as image_file:
-            image = base64.b64encode(image_file.read()).decode('utf-8')
     except Exception as e:
         current_file_path = os.path.abspath(__file__)
         current_file_dir = os.path.dirname(current_file_path)
         file_path = os.path.join(current_file_dir, file_path.replace("Final_Assignment_Template", ""))
-        with open(file_path, 'rb') as image_file:
-            image = base64.b64encode(image_file.read()).decode('utf-8')
     return image
 @tool
@@ -167,12 +181,23 @@ class ObjectDetectionTool(Tool):
         self.onnx_path = onnx_path
         self.names_path = names_path
         self.onnx_model = onnxruntime.InferenceSession(self.onnx_path)
     def forward(self, frames: any)->any:
-        # Load class labels
-        with open(self.names_path, 'r') as f:
-            classes = [line.strip() for line in f.readlines()]
         detected_objects = []
         for frame in frames:
             img = pre_processing(frame)
@@ -182,7 +207,7 @@ class ObjectDetectionTool(Tool):
             onnx_input = {self.onnx_model.get_inputs()[0].name: blob}
             onnx_output = self.onnx_model.run(None, onnx_input)
-            detected_objects.append(post_processing(onnx_output, classes, img.shape))
         return detected_objects

         The image as a base64 string
     """
     try:
+        # Use BytesIO to read the file
+        with BytesIO() as buffer:
+            # Use cv2 to read the image
+            img = cv2.imread(file_path)
+            if img is None:
+                raise FileNotFoundError(f"Could not read image at {file_path}")
+            # Encode to jpg and write to buffer
+            _, buffer_data = cv2.imencode('.jpg', img)
+            buffer.write(buffer_data.tobytes())
+            image = base64.b64encode(buffer.getvalue()).decode('utf-8')
     except Exception as e:
         current_file_path = os.path.abspath(__file__)
         current_file_dir = os.path.dirname(current_file_path)
         file_path = os.path.join(current_file_dir, file_path.replace("Final_Assignment_Template", ""))
+        # Try again with the new path
+        with BytesIO() as buffer:
+            img = cv2.imread(file_path)
+            if img is None:
+                raise FileNotFoundError(f"Could not read image at {file_path}")
+            _, buffer_data = cv2.imencode('.jpg', img)
+            buffer.write(buffer_data.tobytes())
+            image = base64.b64encode(buffer.getvalue()).decode('utf-8')
     return image
 @tool
         self.onnx_path = onnx_path
         self.names_path = names_path
         self.onnx_model = onnxruntime.InferenceSession(self.onnx_path)
+        # Load class labels - using a predefined list since we can't use open()
+        # These are the standard COCO dataset classes that YOLOv3 uses
+        self.classes = [
+            'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',
+            'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat',
+            'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
+            'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
+            'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
+            'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+            'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
+            'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
+            'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book',
+            'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
+        ]
     def forward(self, frames: any)->any:
         detected_objects = []
         for frame in frames:
             img = pre_processing(frame)
             onnx_input = {self.onnx_model.get_inputs()[0].name: blob}
             onnx_output = self.onnx_model.run(None, onnx_input)
+            detected_objects.append(post_processing(onnx_output, self.classes, img.shape))
         return detected_objects