import sounddevice as sd import scipy.io.wavfile as wavfile import numpy as np import gradio as gr from groq import Groq import tempfile import os class Recorder: def __init__(self, sample_rate=44100): self.recording = False self.frames = [] self.sample_rate = sample_rate self.stream = None def toggle_recording(self): if not self.recording: self.frames = [] self.stream = sd.InputStream(callback=self.callback, channels=2, samplerate=self.sample_rate) self.stream.start() self.recording = True return "Recording... Press to Stop" else: self.stream.stop() self.stream.close() self.recording = False return "Recording stopped. Press to Record" def callback(self, indata, frames, time, status): if self.recording: self.frames.append(indata.copy()) def save_audio(self): if self.frames: audio_data = np.concatenate(self.frames, axis=0) with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav_file: wavfile.write(temp_wav_file.name, self.sample_rate, audio_data) return temp_wav_file.name else: return None recorder = Recorder() def record(): return recorder.toggle_recording() def transcribe(): audio_file = recorder.save_audio() if audio_file: client = Groq(api_key="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") with open(audio_file, "rb") as file: transcription = client.audio.transcriptions.create( file=(audio_file, file.read()), model="whisper-large-v3", prompt="Specify context or spelling", # Optional response_format="json", # Optional language="en", # Optional temperature=0.0 # Optional ) os.remove(audio_file) # Clean up the temporary file # Inspect the transcription object to find the text print(transcription) # Access the text attribute directly if available if hasattr(transcription, 'text'): return transcription.text else: return "Transcription text not found." else: return "No audio recorded." with gr.Blocks() as gradio_interface: with gr.Column(): record_button = gr.Button("Press to Record") record_button.click(fn=record, outputs=record_button) transcription_output = gr.Textbox(label="Transcription") record_button.click(fn=transcribe, outputs=transcription_output) if __name__ == "__main__": gradio_interface.launch()