Spaces:

VanguardAI
/

MultiModal_OpenSource_AI

Sleeping

File size: 2,746 Bytes

d5685b0
 
 
5f52293
d5685b0
 
69f2e98
b1f3cf3
d5685b0

import sounddevice as sd
import scipy.io.wavfile as wavfile
import numpy as np
import gradio as gr
from groq import Groq
import tempfile
import os

class Recorder:
    def __init__(self, sample_rate=44100):
        self.recording = False
        self.frames = []
        self.sample_rate = sample_rate
        self.stream = None

    def toggle_recording(self):
        if not self.recording:
            self.frames = []
            self.stream = sd.InputStream(callback=self.callback, channels=2, samplerate=self.sample_rate)
            self.stream.start()
            self.recording = True
            return "Recording... Press to Stop"
        else:
            self.stream.stop()
            self.stream.close()
            self.recording = False
            return "Recording stopped. Press to Record"

    def callback(self, indata, frames, time, status):
        if self.recording:
            self.frames.append(indata.copy())

    def save_audio(self):
        if self.frames:
            audio_data = np.concatenate(self.frames, axis=0)
            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav_file:
                wavfile.write(temp_wav_file.name, self.sample_rate, audio_data)
                return temp_wav_file.name
        else:
            return None

recorder = Recorder()

def record():
    return recorder.toggle_recording()

def transcribe():
    audio_file = recorder.save_audio()
    if audio_file:
        client = Groq(api_key="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")
        with open(audio_file, "rb") as file:
            transcription = client.audio.transcriptions.create(
                file=(audio_file, file.read()),
                model="whisper-large-v3",
                prompt="Specify context or spelling",  # Optional
                response_format="json",  # Optional
                language="en",  # Optional
                temperature=0.0  # Optional
            )
        os.remove(audio_file)  # Clean up the temporary file

        # Inspect the transcription object to find the text
        print(transcription)
        
        # Access the text attribute directly if available
        if hasattr(transcription, 'text'):
            return transcription.text
        else:
            return "Transcription text not found."

    else:
        return "No audio recorded."

with gr.Blocks() as gradio_interface:
    with gr.Column():
        record_button = gr.Button("Press to Record")
        record_button.click(fn=record, outputs=record_button)
        transcription_output = gr.Textbox(label="Transcription")
        record_button.click(fn=transcribe, outputs=transcription_output)

if __name__ == "__main__":
    gradio_interface.launch()