huytofu92 commited on
Commit
0db692a
·
1 Parent(s): dde4764

second version

Browse files
Files changed (6) hide show
  1. .gitignore +7 -0
  2. app.py +50 -45
  3. audio_tools.py +95 -0
  4. mini_agents.py +75 -13
  5. tools.py +173 -3
  6. vlm_tools.py +79 -7
.gitignore ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ .venv/
2
+ __pycache__/
3
+ *.pyc
4
+ *.pyo
5
+ *.pyd
6
+ *.pyw
7
+ *.pyz
app.py CHANGED
@@ -19,7 +19,7 @@ class BasicAgent:
19
  print(f"Agent returning fixed answer: {fixed_answer}")
20
  return fixed_answer
21
 
22
- def run_and_submit_all( profile: gr.OAuthProfile | None):
23
  """
24
  Fetches all questions, runs the BasicAgent on them, submits all answers,
25
  and displays the results.
@@ -62,9 +62,9 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
62
  print(f"Error fetching questions: {e}")
63
  return f"Error fetching questions: {e}", None
64
  except requests.exceptions.JSONDecodeError as e:
65
- print(f"Error decoding JSON response from questions endpoint: {e}")
66
- print(f"Response text: {response.text[:500]}")
67
- return f"Error decoding server response for questions: {e}", None
68
  except Exception as e:
69
  print(f"An unexpected error occurred fetching questions: {e}")
70
  return f"An unexpected error occurred fetching questions: {e}", None
@@ -84,8 +84,8 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
84
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
86
  except Exception as e:
87
- print(f"Error running agent on task {task_id}: {e}")
88
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
89
 
90
  if not answers_payload:
91
  print("Agent did not produce any answers to submit.")
@@ -98,46 +98,51 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
98
 
99
  # 5. Submit
100
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
101
- try:
102
- response = requests.post(submit_url, json=submission_data, timeout=60)
103
- response.raise_for_status()
104
- result_data = response.json()
105
- final_status = (
106
- f"Submission Successful!\n"
107
- f"User: {result_data.get('username')}\n"
108
- f"Overall Score: {result_data.get('score', 'N/A')}% "
109
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
110
- f"Message: {result_data.get('message', 'No message received.')}"
111
- )
112
- print("Submission successful.")
113
- results_df = pd.DataFrame(results_log)
114
- return final_status, results_df
115
- except requests.exceptions.HTTPError as e:
116
- error_detail = f"Server responded with status {e.response.status_code}."
117
  try:
118
- error_json = e.response.json()
119
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
120
- except requests.exceptions.JSONDecodeError:
121
- error_detail += f" Response: {e.response.text[:500]}"
122
- status_message = f"Submission Failed: {error_detail}"
123
- print(status_message)
124
- results_df = pd.DataFrame(results_log)
125
- return status_message, results_df
126
- except requests.exceptions.Timeout:
127
- status_message = "Submission Failed: The request timed out."
128
- print(status_message)
129
- results_df = pd.DataFrame(results_log)
130
- return status_message, results_df
131
- except requests.exceptions.RequestException as e:
132
- status_message = f"Submission Failed: Network error - {e}"
133
- print(status_message)
134
- results_df = pd.DataFrame(results_log)
135
- return status_message, results_df
136
- except Exception as e:
137
- status_message = f"An unexpected error occurred during submission: {e}"
138
- print(status_message)
139
- results_df = pd.DataFrame(results_log)
140
- return status_message, results_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
 
143
  # --- Build Gradio Interface using Blocks ---
 
19
  print(f"Agent returning fixed answer: {fixed_answer}")
20
  return fixed_answer
21
 
22
+ def run_and_submit_all( profile: gr.OAuthProfile | None, mock_submission: bool = True):
23
  """
24
  Fetches all questions, runs the BasicAgent on them, submits all answers,
25
  and displays the results.
 
62
  print(f"Error fetching questions: {e}")
63
  return f"Error fetching questions: {e}", None
64
  except requests.exceptions.JSONDecodeError as e:
65
+ print(f"Error decoding JSON response from questions endpoint: {e}")
66
+ print(f"Response text: {response.text[:500]}")
67
+ return f"Error decoding server response for questions: {e}", None
68
  except Exception as e:
69
  print(f"An unexpected error occurred fetching questions: {e}")
70
  return f"An unexpected error occurred fetching questions: {e}", None
 
84
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
86
  except Exception as e:
87
+ print(f"Error running agent on task {task_id}: {e}")
88
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
89
 
90
  if not answers_payload:
91
  print("Agent did not produce any answers to submit.")
 
98
 
99
  # 5. Submit
100
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
101
+ if mock_submission:
102
+ answer_df = pd.DataFrame(results_log, columns=["Task ID", "Question", "Submitted Answer"])
103
+ answer_df.to_csv("answers.csv", index=False)
104
+ return "Answers saved to answers.csv", answer_df
105
+ else:
 
 
 
 
 
 
 
 
 
 
 
106
  try:
107
+ response = requests.post(submit_url, json=submission_data, timeout=60)
108
+ response.raise_for_status()
109
+ result_data = response.json()
110
+ final_status = (
111
+ f"Submission Successful!\n"
112
+ f"User: {result_data.get('username')}\n"
113
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
114
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
115
+ f"Message: {result_data.get('message', 'No message received.')}"
116
+ )
117
+ print("Submission successful.")
118
+ results_df = pd.DataFrame(results_log)
119
+ return final_status, results_df
120
+ except requests.exceptions.HTTPError as e:
121
+ error_detail = f"Server responded with status {e.response.status_code}."
122
+ try:
123
+ error_json = e.response.json()
124
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
125
+ except requests.exceptions.JSONDecodeError:
126
+ error_detail += f" Response: {e.response.text[:500]}"
127
+ status_message = f"Submission Failed: {error_detail}"
128
+ print(status_message)
129
+ results_df = pd.DataFrame(results_log)
130
+ return status_message, results_df
131
+ except requests.exceptions.Timeout:
132
+ status_message = "Submission Failed: The request timed out."
133
+ print(status_message)
134
+ results_df = pd.DataFrame(results_log)
135
+ return status_message, results_df
136
+ except requests.exceptions.RequestException as e:
137
+ status_message = f"Submission Failed: Network error - {e}"
138
+ print(status_message)
139
+ results_df = pd.DataFrame(results_log)
140
+ return status_message, results_df
141
+ except Exception as e:
142
+ status_message = f"An unexpected error occurred during submission: {e}"
143
+ print(status_message)
144
+ results_df = pd.DataFrame(results_log)
145
+ return status_message, results_df
146
 
147
 
148
  # --- Build Gradio Interface using Blocks ---
audio_tools.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.tools import tool
2
+ from pydub import AudioSegment
3
+ from pyAudioAnalysis import audioSegmentation as aS
4
+ import base64
5
+ from io import BytesIO
6
+
7
+ @tool
8
+ def audio_to_base64(file_path: str) -> str:
9
+ """
10
+ Convert an audio file to base64 format
11
+ Args:
12
+ file_path: Path to the audio file
13
+ Returns:
14
+ The audio file in base64 format
15
+ """
16
+ # Load the audio file
17
+ audio = AudioSegment.from_file(file_path)
18
+
19
+ # Export the audio to a BytesIO object
20
+ buffer = BytesIO()
21
+ audio.export(buffer, format="wav") # You can change the format if needed
22
+
23
+ # Encode the audio data to base64
24
+ audio_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
25
+ return audio_base64
26
+
27
+ @tool
28
+ def noise_reduction(audio: str) -> str:
29
+ """
30
+ Reduce noise from an audio file
31
+ Args:
32
+ audio: The audio file in base64 format
33
+ Returns:
34
+ The denoised audio file in base64 format
35
+ """
36
+ # Decode the base64 audio
37
+ audio_data = base64.b64decode(audio)
38
+ audio_segment = AudioSegment.from_file(BytesIO(audio_data))
39
+
40
+ # Apply noise reduction (simple example using low-pass filter)
41
+ denoised_audio = audio_segment.low_pass_filter(3000)
42
+
43
+ # Encode back to base64
44
+ buffer = BytesIO()
45
+ denoised_audio.export(buffer, format="wav")
46
+ return base64.b64encode(buffer.getvalue()).decode('utf-8')
47
+
48
+ @tool
49
+ def audio_segmentation(audio: str, segment_length: int = 30) -> list:
50
+ """
51
+ Segment an audio file into smaller chunks
52
+ Args:
53
+ audio: The audio file in base64 format
54
+ segment_length: Length of each segment in seconds
55
+ Returns:
56
+ List of audio segments in base64 format
57
+ """
58
+ # Decode the base64 audio
59
+ audio_data = base64.b64decode(audio)
60
+ audio_segment = AudioSegment.from_file(BytesIO(audio_data))
61
+
62
+ # Segment the audio
63
+ segments = []
64
+ for i in range(0, len(audio_segment), segment_length * 1000):
65
+ segment = audio_segment[i:i + segment_length * 1000]
66
+ buffer = BytesIO()
67
+ segment.export(buffer, format="wav")
68
+ segments.append(base64.b64encode(buffer.getvalue()).decode('utf-8'))
69
+
70
+ return segments
71
+
72
+ @tool
73
+ def speaker_diarization(audio: str) -> list:
74
+ """
75
+ Diarize an audio file into speakers
76
+ Args:
77
+ audio: The audio file in base64 format
78
+ Returns:
79
+ List of speaker segments
80
+ """
81
+ # Decode the base64 audio
82
+ audio_data = base64.b64decode(audio)
83
+ audio_path = "temp_audio.wav"
84
+ with open(audio_path, "wb") as f:
85
+ f.write(audio_data)
86
+
87
+ # Perform speaker diarization
88
+ [flags, classes, centers] = aS.speakerDiarization(audio_path, 2) # Assuming 2 speakers
89
+
90
+ # Process the output
91
+ speaker_segments = []
92
+ for i, flag in enumerate(flags):
93
+ speaker_segments.append((i, flag))
94
+
95
+ return speaker_segments
mini_agents.py CHANGED
@@ -1,17 +1,17 @@
1
  from smolagents import CodeAgent, InferenceClientModel
2
- from tools import sort_list
 
 
 
3
  import os
4
 
5
  MODEL_CHOICES = {
6
  "audio": ["whisper-large-v3"],
7
- "vlm": ["Salesforce/blip-image-captioning-base", "smolvlm/vlm-base-patch14-224"],
8
- "code": ["gpt-4o-mini"]}
9
-
10
- code_agent = CodeAgent(
11
- model=MODEL_CHOICES["code"][0],
12
- tools=[sort_list],
13
- verbose=True
14
- )
15
 
16
  audio_model = InferenceClientModel(
17
  model=MODEL_CHOICES["audio"][0],
@@ -21,8 +21,11 @@ audio_model = InferenceClientModel(
21
 
22
  audio_agent = CodeAgent(
23
  model=audio_model,
24
- tools=[],
25
- verbose=True
 
 
 
26
  )
27
 
28
  vlm_model = InferenceClientModel(
@@ -33,13 +36,72 @@ vlm_model = InferenceClientModel(
33
 
34
  vlm_agent = CodeAgent(
35
  model=vlm_model,
36
- tools=[],
37
- verbose=True
 
 
 
38
  )
39
 
 
 
 
 
 
40
 
 
 
 
 
 
 
 
 
41
 
 
 
 
 
 
42
 
 
 
 
 
 
 
 
 
43
 
 
 
 
 
 
 
 
 
 
 
44
 
 
 
 
 
 
 
 
 
 
 
45
 
 
 
 
 
 
 
 
 
 
 
 
1
  from smolagents import CodeAgent, InferenceClientModel
2
+ from tools import sort_list, operate_two_numbers, convert_number
3
+ from tools import to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby
4
+ from vlm_tools import download_image, image_processing, object_detection, ocr_scan
5
+ from audio_tools import audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization
6
  import os
7
 
8
  MODEL_CHOICES = {
9
  "audio": ["whisper-large-v3"],
10
+ "vlm": ["Qwen/Qwen2.5-VL-7B-Instruct"],
11
+ "code": ["Qwen/Qwen2.5-Coder-32B-Instruct"],
12
+ "arithmetic": ["Qwen/Qwen2.5-Coder-7B-Instruct"],
13
+ "pandas": ["Qwen/Qwen2.5-Coder-7B-Instruct"]
14
+ }
 
 
 
15
 
16
  audio_model = InferenceClientModel(
17
  model=MODEL_CHOICES["audio"][0],
 
21
 
22
  audio_agent = CodeAgent(
23
  model=audio_model,
24
+ tools=[audio_to_base64, noise_reduction, audio_segmentation, speaker_diarization],
25
+ verbose=True,
26
+ max_steps=4,
27
+ name="Audio Agent",
28
+ description="This agent is responsible for rocessing audio, transcribing audio and extracting text from it."
29
  )
30
 
31
  vlm_model = InferenceClientModel(
 
36
 
37
  vlm_agent = CodeAgent(
38
  model=vlm_model,
39
+ tools=[download_image, image_processing, object_detection, ocr_scan],
40
+ verbose=True,
41
+ max_steps=4,
42
+ name="VLM Agent",
43
+ description="This agent is responsible for downloading images, processing images, detecting objects in them and extracting text from them."
44
  )
45
 
46
+ arithmetic_model = InferenceClientModel(
47
+ model=MODEL_CHOICES["arithmetic"][0],
48
+ api_key=os.getenv("HUGGINGFACE_API_KEY"),
49
+ api_url="https://api.openai.com/v1/chat/completions"
50
+ )
51
 
52
+ arithmetic_agent = CodeAgent(
53
+ model=arithmetic_model,
54
+ tools=[operate_two_numbers, convert_number],
55
+ verbose=True,
56
+ max_steps=4,
57
+ name="Arithmetic Agent",
58
+ description="This agent is responsible for performing arithmetic operations on two numbers."
59
+ )
60
 
61
+ pandas_model = InferenceClientModel(
62
+ model=MODEL_CHOICES["pandas"][0],
63
+ api_key=os.getenv("HUGGINGFACE_API_KEY"),
64
+ api_url="https://api.openai.com/v1/chat/completions"
65
+ )
66
 
67
+ pandas_agent = CodeAgent(
68
+ model=pandas_model,
69
+ tools=[to_dataframe, to_json, get_dataframe_data, get_dataframe_column, get_dataframe_row, get_dataframe_groupby],
70
+ verbose=True,
71
+ max_steps=4,
72
+ name="Pandas Agent",
73
+ description="This agent is responsible for converting data to a dataframe, performing pandas operations on such dataframe and converting the dataframe back to a json or a csv file."
74
+ )
75
 
76
+ multimodal_manager = CodeAgent(
77
+ model=MODEL_CHOICES["code"][0],
78
+ managed_agents=[audio_agent, vlm_agent],
79
+ tools=[sort_list],
80
+ verbose=True,
81
+ max_steps=8,
82
+ planning_steps=4,
83
+ name="Multimodal Manager",
84
+ description="This agent is responsible for managing the audio and vlm agents."
85
+ )
86
 
87
+ operation_manager = CodeAgent(
88
+ model=MODEL_CHOICES["code"][0],
89
+ managed_agents=[arithmetic_agent, pandas_agent],
90
+ tools=[sort_list],
91
+ verbose=True,
92
+ max_steps=8,
93
+ planning_steps=4,
94
+ name="Operation Manager",
95
+ description="This agent is responsible for managing the arithmetic and pandas agents."
96
+ )
97
 
98
+ master_agent = CodeAgent(
99
+ model=MODEL_CHOICES["code"][0],
100
+ managed_agents=[multimodal_manager, operation_manager],
101
+ tools=[sort_list],
102
+ verbose=True,
103
+ max_steps=16,
104
+ planning_steps=4,
105
+ name="Master Agent",
106
+ description="This agent is responsible for managing the multimodal and operation managers."
107
+ )
tools.py CHANGED
@@ -1,10 +1,11 @@
1
  from langchain_core.tools import tool
2
  from datetime import datetime
3
- from typing import Literal, List
4
  from smolagents import WebSearchTool, DuckDuckGoSearchTool, VisitWebpageTool, WikipediaSearchTool
 
5
 
6
  @tool
7
- def get_current_time(timezone: str = "America/New_York", format: str = "%Y-%m-%d %H:%M:%S"):
8
  """
9
  Get the current time
10
  Args:
@@ -16,7 +17,7 @@ def get_current_time(timezone: str = "America/New_York", format: str = "%Y-%m-%d
16
  return datetime.now(timezone).strftime(format)
17
 
18
  @tool
19
- def sort_list(my_list: List[int], order: Literal["asc", "desc", "alphabetize", "alphabetize_reverse"]):
20
  """
21
  Sort a list in ascending or descending order if the list contains numbers.
22
  Sort it in alphabetically or alphabetically in reverse order if the list contains strings or mixed types.
@@ -61,3 +62,172 @@ duckduckgo_search_tool = DuckDuckGoSearchTool()
61
  visit_webpage_tool = VisitWebpageTool()
62
  wikipedia_search_tool = WikipediaSearchTool()
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from langchain_core.tools import tool
2
  from datetime import datetime
3
+ from typing import Literal, List, Union
4
  from smolagents import WebSearchTool, DuckDuckGoSearchTool, VisitWebpageTool, WikipediaSearchTool
5
+ import pandas as pd
6
 
7
  @tool
8
+ def get_current_time(timezone: str = "America/New_York", format: str = "%Y-%m-%d %H:%M:%S")->str:
9
  """
10
  Get the current time
11
  Args:
 
17
  return datetime.now(timezone).strftime(format)
18
 
19
  @tool
20
+ def sort_list(my_list: List[int], order: Literal["asc", "desc", "alphabetize", "alphabetize_reverse"])->List[int]:
21
  """
22
  Sort a list in ascending or descending order if the list contains numbers.
23
  Sort it in alphabetically or alphabetically in reverse order if the list contains strings or mixed types.
 
62
  visit_webpage_tool = VisitWebpageTool()
63
  wikipedia_search_tool = WikipediaSearchTool()
64
 
65
+ @tool
66
+ def operate_two_numbers(num1: float, num2: float, operation: Literal["add", "subtract", "multiply", "divide", "power", "modulo"], decimal_places: int = 2)->float:
67
+ """
68
+ Operate on two numbers
69
+ Args:
70
+ num1: The first number to operate on. Must be a float.
71
+ num2: The second number to operate on. Must be a float.
72
+ operation: The operation to perform. Must be one of the following:
73
+ - "add": Add the two numbers
74
+ - "subtract": Subtract the two numbers
75
+ - "multiply": Multiply the two numbers
76
+ - "divide": Divide the two numbers
77
+ - "power": Raise the first number to the power of the second number
78
+ - "modulo": Return the remainder of the division of the first number by the second number
79
+ decimal_places: The number of decimal places to round the result to. Default is 2.
80
+ Returns:
81
+ The result of the operation
82
+ """
83
+ if operation == "add":
84
+ return round(num1 + num2, decimal_places)
85
+ elif operation == "subtract":
86
+ return round(num1 - num2, decimal_places)
87
+ elif operation == "multiply":
88
+ return round(num1 * num2, decimal_places)
89
+ elif operation == "divide":
90
+ return round(num1 / num2, decimal_places)
91
+ elif operation == "power":
92
+ return round(num1 ** num2, decimal_places)
93
+ elif operation == "modulo":
94
+ return round(num1 % num2, decimal_places)
95
+ else:
96
+ raise ValueError("operation must be one of the following: add, subtract, multiply, divide, power, modulo")
97
+
98
+ @tool
99
+ def convert_number(orig_num: Union[float, int], operation: Literal["to_base", "type_cast"], new_base: Literal["binary", "octal", "hexadecimal", "int", "float"], decimal_places: int = 2)->Union[int, float]:
100
+ """
101
+ Convert a number to a new base
102
+ Args:
103
+ orig_num: The number to convert. Must be a float or int.
104
+ operation: The operation to perform. Must be one of the following:
105
+ - "to_base": Convert the number to a new base.
106
+ - "type_cast": Convert the number to a new type.
107
+ new_base: The new base to convert the number to. Must be one of the following:
108
+ - "binary": Convert the number to binary.
109
+ - "octal": Convert the number to octal.
110
+ - "hexadecimal": Convert the number to hexadecimal.
111
+ - "int": Convert the number to an int.
112
+ - "float": Convert the number to a float.
113
+ decimal_places: The number of decimal places to round the result to. Default is 2. Only used if operation is "type_cast" and new_base is "float".
114
+ Returns:
115
+ The converted number
116
+ """
117
+ if operation == "to_base":
118
+ if new_base == "binary":
119
+ return bin(orig_num)
120
+ elif new_base == "octal":
121
+ return oct(orig_num)
122
+ elif new_base == "hexadecimal":
123
+ return hex(orig_num)
124
+ else:
125
+ raise ValueError("new_base must be one of the following: binary, octal, hexadecimal, int, float")
126
+ elif operation == "type_cast":
127
+ if new_base == "int":
128
+ return int(orig_num)
129
+ elif new_base == "float":
130
+ return round(float(orig_num), decimal_places)
131
+ else:
132
+ raise ValueError("new_base must be one of the following: int, float")
133
+ else:
134
+ raise ValueError("operation must be one of the following: to_base, type_cast")
135
+
136
+ @tool
137
+ def to_dataframe(data: List[dict], columns: List[str])->pd.DataFrame:
138
+ """
139
+ Convert a list of dictionaries to a pandas DataFrame
140
+ """
141
+ return pd.DataFrame(data, columns=columns)
142
+
143
+ @tool
144
+ def to_json(data: pd.DataFrame)->str:
145
+ """
146
+ Convert a pandas DataFrame to a JSON string
147
+ """
148
+ return data.to_json(orient="records")
149
+
150
+ @tool
151
+ def get_dataframe_data(data: pd.DataFrame, column: Union[str, int], row: Union[str, int])->Union[str, int, float]:
152
+ """
153
+ Get a specific cell from a pandas DataFrame
154
+ Args:
155
+ data: The pandas DataFrame to get the data from.
156
+ column: The column to get the data from. Must be a string or int. If int then it is the index of the column.
157
+ row: The row to get the data from. Must be a string or int. If int then it is the index of the row.
158
+ Returns:
159
+ The data from the specified cell
160
+ """
161
+ if isinstance(column, int):
162
+ column = data.iloc[:, column]
163
+ if isinstance(row, int):
164
+ row = data.iloc[row, :]
165
+ return data.loc[row, column]
166
+
167
+ @tool
168
+ def get_dataframe_column(data: pd.DataFrame, column: Union[str, int])->pd.Series:
169
+ """
170
+ Get a specific column from a pandas DataFrame
171
+ Args:
172
+ data: The pandas DataFrame to get the column from.
173
+ column: The column to get the data from. Must be a string or int. If int then it is the index of the column.
174
+ Returns:
175
+ The data from the specified column
176
+ """
177
+ return data.iloc[:, column]
178
+
179
+ @tool
180
+ def get_dataframe_row(data: pd.DataFrame, row: Union[str, int])->pd.Series:
181
+ """
182
+ Get a specific row from a pandas DataFrame
183
+ Args:
184
+ data: The pandas DataFrame to get the row from.
185
+ row: The row to get the data from. Must be a string or int. If int then it is the index of the row.
186
+ Returns:
187
+ The data from the specified row
188
+ """
189
+ return data.iloc[row, :]
190
+
191
+ @tool
192
+ def get_dataframe_groupby(data: pd.DataFrame, column: str, operation: Literal["mean", "sum", "count", "min", "max", "median", "std", "var"])->pd.DataFrame:
193
+ """
194
+ Group a pandas DataFrame by a specific column and perform an operation on the grouped data
195
+ Args:
196
+ data: The pandas DataFrame to group.
197
+ column: The column to group the data by.
198
+ operation: The operation to perform on the grouped data. Must be one of the following:
199
+ - "mean": Calculate the mean of the grouped data.
200
+ - "sum": Calculate the sum of the grouped data.
201
+ - "count": Count the number of rows in the grouped data.
202
+ - "min": Calculate the minimum of the grouped data.
203
+ - "max": Calculate the maximum of the grouped data.
204
+ - "median": Calculate the median of the grouped data.
205
+ - "std": Calculate the standard deviation of the grouped data.
206
+ - "var": Calculate the variance of the grouped data.
207
+ Returns:
208
+ The grouped data
209
+ """
210
+ if operation == "mean":
211
+ return data.groupby(column).mean()
212
+ elif operation == "sum":
213
+ return data.groupby(column).sum()
214
+ elif operation == "count":
215
+ return data.groupby(column).count()
216
+ elif operation == "min":
217
+ return data.groupby(column).min()
218
+ elif operation == "max":
219
+ return data.groupby(column).max()
220
+ elif operation == "median":
221
+ return data.groupby(column).median()
222
+ elif operation == "std":
223
+ return data.groupby(column).std()
224
+ elif operation == "var":
225
+ return data.groupby(column).var()
226
+ else:
227
+ raise ValueError("operation must be one of the following: mean, sum, count, min, max, median, std, var")
228
+
229
+
230
+
231
+
232
+
233
+
vlm_tools.py CHANGED
@@ -1,3 +1,10 @@
 
 
 
 
 
 
 
1
  from langchain_core.tools import tool
2
 
3
  @tool
@@ -9,32 +16,95 @@ def download_image(image_url: str):
9
  Returns:
10
  The image as a base64 string
11
  """
12
- #download the image into a base64 string
13
- image = None
14
  return image
15
 
16
  @tool
17
- def image_processing(image: str):
18
  """
19
  Process an image
20
  Args:
21
  image: The image in base64 format to process
 
 
22
  Returns:
23
  The processed image
24
  """
25
- processed_image = None
 
 
 
 
 
 
 
 
26
  return processed_image
27
 
 
 
 
 
28
  @tool
29
- def object_detection(image: str):
30
  """
31
  Detect objects in an image
32
  Args:
33
  image: The image in base64 format to detect objects in
 
 
 
34
  Returns:
35
  The detected objects
36
  """
37
- detected_objects = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  return detected_objects
39
 
40
  @tool
@@ -46,7 +116,9 @@ def ocr_scan(image: str):
46
  Returns:
47
  The text in the image
48
  """
49
- scanned_text = None
 
 
50
  return scanned_text
51
 
52
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import pytesseract
4
+ import requests
5
+ import base64
6
+ from io import BytesIO
7
+ from PIL import Image
8
  from langchain_core.tools import tool
9
 
10
  @tool
 
16
  Returns:
17
  The image as a base64 string
18
  """
19
+ response = requests.get(image_url)
20
+ image = base64.b64encode(response.content).decode('utf-8')
21
  return image
22
 
23
  @tool
24
+ def image_processing(image: str, brightness: float = 1.0, contrast: float = 1.0):
25
  """
26
  Process an image
27
  Args:
28
  image: The image in base64 format to process
29
+ brightness: The brightness of the image on scale of 0-10
30
+ contrast: The contrast of the image on scale of 0-10
31
  Returns:
32
  The processed image
33
  """
34
+ image_data = base64.b64decode(image)
35
+ np_image = np.frombuffer(image_data, np.uint8)
36
+ img = cv2.imdecode(np_image, cv2.IMREAD_COLOR)
37
+
38
+ # Adjust brightness and contrast
39
+ img = cv2.convertScaleAbs(img, alpha=contrast, beta=brightness)
40
+
41
+ _, buffer = cv2.imencode('.jpg', img)
42
+ processed_image = base64.b64encode(buffer).decode('utf-8')
43
  return processed_image
44
 
45
+ weights_path = "vlm_assets/yolo11n.weights"
46
+ config_path = "vlm_assets/yolo11n.cfg"
47
+ names_path = "vlm_assets/obj.names"
48
+
49
  @tool
50
+ def object_detection(image: str, weights_path: str = weights_path, config_path: str = config_path, names_path: str = names_path):
51
  """
52
  Detect objects in an image
53
  Args:
54
  image: The image in base64 format to detect objects in
55
+ weights_path: The path to the weights file
56
+ config_path: The path to the config file
57
+ names_path: The path to the names file
58
  Returns:
59
  The detected objects
60
  """
61
+ image_data = base64.b64decode(image)
62
+ np_image = np.frombuffer(image_data, np.uint8)
63
+ img = cv2.imdecode(np_image, cv2.IMREAD_COLOR)
64
+
65
+ # Load YOLO
66
+ net = cv2.dnn.readNet(weights_path, config_path)
67
+ layer_names = net.getLayerNames()
68
+ output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
69
+
70
+ # Load class labels
71
+ with open(names_path, 'r') as f:
72
+ classes = [line.strip() for line in f.readlines()]
73
+
74
+ # Detect objects
75
+ blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
76
+ net.setInput(blob)
77
+ outs = net.forward(output_layers)
78
+
79
+ # Process detections
80
+ class_ids = []
81
+ confidences = []
82
+ boxes = []
83
+ for out in outs:
84
+ for detection in out:
85
+ scores = detection[5:]
86
+ class_id = np.argmax(scores)
87
+ confidence = scores[class_id]
88
+ if confidence > 0.5:
89
+ center_x = int(detection[0] * img.shape[1])
90
+ center_y = int(detection[1] * img.shape[0])
91
+ w = int(detection[2] * img.shape[1])
92
+ h = int(detection[3] * img.shape[0])
93
+ x = int(center_x - w / 2)
94
+ y = int(center_y - h / 2)
95
+ boxes.append([x, y, w, h])
96
+ confidences.append(float(confidence))
97
+ class_ids.append(class_id)
98
+
99
+ # Apply non-max suppression
100
+ indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
101
+ detected_objects = []
102
+ for i in indices:
103
+ i = i[0]
104
+ box = boxes[i]
105
+ label = str(classes[class_ids[i]])
106
+ detected_objects.append((label, confidences[i], box))
107
+
108
  return detected_objects
109
 
110
  @tool
 
116
  Returns:
117
  The text in the image
118
  """
119
+ image_data = base64.b64decode(image)
120
+ img = Image.open(BytesIO(image_data))
121
+ scanned_text = pytesseract.image_to_string(img)
122
  return scanned_text
123
 
124