CoralLeiCN commited on
Commit
2198402
·
1 Parent(s): 8994bf1

Update TranscribeAudioBytes tool: modify model to "gemini-2.5-pro" and add content generation configuration for more deterministic result.

Browse files
Files changed (1) hide show
  1. agent/tools.py +9 -1
agent/tools.py CHANGED
@@ -172,8 +172,15 @@ class TranscribeAudioBytes(Tool):
172
  def forward(self, audio_bytes: str, question: str = ""):
173
  client = genai.Client()
174
  prompt = "Transcribe the audio from this byte array, giving timestamps for salient events in the audio. Also provide visual descriptions."
 
 
 
 
 
 
 
175
  response = client.models.generate_content(
176
- model="gemini-2.5-flash-preview-05-20",
177
  contents=[
178
  f"{prompt} And also try to answer the question: {question}",
179
  types.Part.from_bytes(
@@ -181,6 +188,7 @@ class TranscribeAudioBytes(Tool):
181
  mime_type="audio/mp3",
182
  ),
183
  ],
 
184
  )
185
  transcript = response.text
186
 
 
172
  def forward(self, audio_bytes: str, question: str = ""):
173
  client = genai.Client()
174
  prompt = "Transcribe the audio from this byte array, giving timestamps for salient events in the audio. Also provide visual descriptions."
175
+ config = types.GenerateContentConfig(
176
+ temperature=0,
177
+ candidate_count=1,
178
+ response_mime_type="application/json",
179
+ top_p=0.95,
180
+ seed=42,
181
+ )
182
  response = client.models.generate_content(
183
+ model="gemini-2.5-pro",
184
  contents=[
185
  f"{prompt} And also try to answer the question: {question}",
186
  types.Part.from_bytes(
 
188
  mime_type="audio/mp3",
189
  ),
190
  ],
191
+ config=config,
192
  )
193
  transcript = response.text
194