keenthinker's picture
Update agent.py
966e533 verified
from smolagents import Tool, tool, CodeAgent, OpenAIServerModel, DuckDuckGoSearchTool
import time
import os
import requests
import markdownify
#for the mp3 file reading
import whisper
import tempfile
import io
class Mod4Agent:
def __init__(self):
self.api_key=os.getenv("OPENAI_KEY")
#base model
self.model = OpenAIServerModel(
model_id="gpt-4o",
api_base="https://api.openai.com/v1",
temperature=0.0,
api_key=self.api_key)
#base_prompt
self.base_prompt="""
You are an agent with a set of tools for answering to questions.
You need to be accurate and get the best possible answer in the simplest possible way.
You need to think step-by-step, and if at some point there is an error, backtrack and use a different method.
It is important to adhere to the instructions of the question as close as possible.
IMPORTANT: always answer according to the format required to the best of your abilities. Stating that you do not know, or explaining why, will give a score of 0 therefore it is to be avoided.
You can do it!
Question:
"""
@tool
def audio_interpreter(input: bytes)->str:
"""
Function to transcribe an mp3 file from raw bytes or file path into the corresponding text
Args:
input: raw bytes content of the input mp3 file, or its file path
Return:
str: a string with the text corresponding to the mp3 input file
"""
model = whisper.load_model("tiny")
if isinstance(input, bytes):
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=True) as tmp:
tmp.write(input)
tmp.flush()
result = model.transcribe(tmp.name)
elif isinstance(input, str) and os.path.exists(input):
# Safe if the HF environment mounts the file
result = model.transcribe(input)
else:
raise TypeError("Unsupported input type. Expected bytes or a valid file path.")
return result["text"]
self.list_tools=[DuckDuckGoSearchTool(), audio_interpreter]
self.agent = CodeAgent(tools=self.list_tools,
model=self.model,
additional_authorized_imports=['pandas','io', 'requests','markdownify'],
max_steps=10,
add_base_tools=True # Add any additional base tools
#planning_interval=3 # Enable planning every 3 steps) #-1 to suppress display of reasoning steps
)
print("BasicAgent initialized.")
#Retry policy if quota exceeded
def retry(self, prompt):
backoff = 20
while True:
try:
response = self.agent.run(prompt)
return response
break # Success
except Exception as e:
if "429" in str(e):
print(f"Rate limit hit. Sleeping for {backoff} seconds...")
time.sleep(backoff)
backoff = min(backoff * 2, 80) # max backoff = 80 seconds
else:
print("Error:", e)
break
def __call__(self, question: str) -> str:
print(f"Agent received question (first 50 chars): {question[:50]}...")
prompt=f'{self.base_prompt}\n {question}'
answer = self.retry(prompt)
print(f"Agent returning fixed answer: {answer}")
return answer