File size: 4,076 Bytes
d558c26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import speech_recognition as sr
import difflib
import wave
import pyaudio
import gradio as gr

# Step 1: Record audio
def record_audio(filename):
    chunk = 1024  # Record in chunks of 1024 samples
    sample_format = pyaudio.paInt16  # 16 bits per sample
    channels = 1
    fs = 44100  # Record at 44100 samples per second
    seconds = 10  # Length of recording

    p = pyaudio.PyAudio()  # Create an interface to PortAudio

    print("Recording...")
    stream = p.open(format=sample_format,
                    channels=channels,
                    rate=fs,
                    frames_per_buffer=chunk,
                    input=True)

    frames = []  # Initialize array to store frames

    # Store data in chunks for the specified duration
    for _ in range(0, int(fs / chunk * seconds)):
        data = stream.read(chunk)
        frames.append(data)

    # Stop and close the stream
    stream.stop_stream()
    stream.close()
    p.terminate()

    # Save the recorded audio as a WAV file
    wf = wave.open(filename, 'wb')
    wf.setnchannels(channels)
    wf.setsampwidth(p.get_sample_size(sample_format))
    wf.setframerate(fs)
    wf.writeframes(b''.join(frames))
    wf.close()

    print("Recording completed.")

# Step 2: Transcribe the audio file
def transcribe_audio(filename):
    recognizer = sr.Recognizer()

    # Open the audio file for transcription
    with sr.AudioFile(filename) as source:
        audio = recognizer.record(source)
        try:
            # Recognize the audio using Google Web Speech API
            print("Transcribing the audio...")
            transcription = recognizer.recognize_google(audio)
            print("Transcription completed.")
            return transcription
        except sr.UnknownValueError:
            print("Google Speech Recognition could not understand the audio")
            return ""
        except sr.RequestError as e:
            print(f"Error with Google Speech Recognition service: {e}")
            return ""

# Step 3: Compare the transcribed text with the input paragraph
def compare_texts(reference_text, transcribed_text):
    word_scores = []
    reference_words = reference_text.split()
    transcribed_words = transcribed_text.split()

    sm = difflib.SequenceMatcher(None, reference_text, transcribed_text)
    similarity_score = round(sm.ratio() * 100, 2)

    for i, word in enumerate(reference_words):
        try:
            if word.lower() == transcribed_words[i].lower():
                word_scores.append({"word": word, "quality_score": 100})
            else:
                word_scores.append({"word": word, "quality_score": 50})  # Assuming 50 if it's wrong
        except IndexError:
            word_scores.append({"word": word, "quality_score": 0})

    fidelity_class = "CORRECT" if similarity_score > 50 else "INCORRECT"

    output = {
        "quota_remaining": -1,
        "reference_text_from_application": reference_text,
        "status": "success",
        "text_score": {
            "fidelity_class": fidelity_class,
            "quality_score": similarity_score,
            "text": reference_text,
            "transcribedText": transcribed_text,
            "word_score_list": word_scores
        },
        "version": "1.1"
    }

    return output

# Gradio Interface Function
def gradio_function(paragraph):
    # Record the audio (the filename will be 'recorded_audio.wav')
    record_audio("recorded_audio.wav")

    # Transcribe the audio
    transcribed_text = transcribe_audio("recorded_audio.wav")

    # Compare the original paragraph with the transcribed text
    comparison_result = compare_texts(paragraph, transcribed_text)

    # Return comparison result
    return comparison_result

# Gradio Interface
interface = gr.Interface(
    fn=gradio_function, 
    inputs=gr.inputs.Textbox(lines=5, label="Input Paragraph"), 
    outputs="json",
    title="Speech Recognition Comparison",
    description="Input a paragraph, record your audio, and compare the transcription to the original text."
)

# Launch Gradio app
interface.launch()