File size: 2,746 Bytes
d5685b0
 
 
5f52293
d5685b0
 
69f2e98
b1f3cf3
d5685b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import sounddevice as sd
import scipy.io.wavfile as wavfile
import numpy as np
import gradio as gr
from groq import Groq
import tempfile
import os

class Recorder:
    def __init__(self, sample_rate=44100):
        self.recording = False
        self.frames = []
        self.sample_rate = sample_rate
        self.stream = None

    def toggle_recording(self):
        if not self.recording:
            self.frames = []
            self.stream = sd.InputStream(callback=self.callback, channels=2, samplerate=self.sample_rate)
            self.stream.start()
            self.recording = True
            return "Recording... Press to Stop"
        else:
            self.stream.stop()
            self.stream.close()
            self.recording = False
            return "Recording stopped. Press to Record"

    def callback(self, indata, frames, time, status):
        if self.recording:
            self.frames.append(indata.copy())

    def save_audio(self):
        if self.frames:
            audio_data = np.concatenate(self.frames, axis=0)
            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav_file:
                wavfile.write(temp_wav_file.name, self.sample_rate, audio_data)
                return temp_wav_file.name
        else:
            return None

recorder = Recorder()

def record():
    return recorder.toggle_recording()

def transcribe():
    audio_file = recorder.save_audio()
    if audio_file:
        client = Groq(api_key="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")
        with open(audio_file, "rb") as file:
            transcription = client.audio.transcriptions.create(
                file=(audio_file, file.read()),
                model="whisper-large-v3",
                prompt="Specify context or spelling",  # Optional
                response_format="json",  # Optional
                language="en",  # Optional
                temperature=0.0  # Optional
            )
        os.remove(audio_file)  # Clean up the temporary file

        # Inspect the transcription object to find the text
        print(transcription)
        
        # Access the text attribute directly if available
        if hasattr(transcription, 'text'):
            return transcription.text
        else:
            return "Transcription text not found."

    else:
        return "No audio recorded."

with gr.Blocks() as gradio_interface:
    with gr.Column():
        record_button = gr.Button("Press to Record")
        record_button.click(fn=record, outputs=record_button)
        transcription_output = gr.Textbox(label="Transcription")
        record_button.click(fn=transcribe, outputs=transcription_output)

if __name__ == "__main__":
    gradio_interface.launch()