File size: 2,870 Bytes
d558c26
 
 
 
8ef9310
 
d558c26
 
8ef9310
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d558c26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8ef9310
d558c26
8ef9310
d558c26
 
 
 
 
 
 
 
 
 
8ef9310
 
 
 
d558c26
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import speech_recognition as sr
import difflib
import gradio as gr

# Step 1: Transcribe the audio file
def transcribe_audio(audio):
    recognizer = sr.Recognizer()

    # Convert audio into recognizable format for the Recognizer
    audio_file = sr.AudioFile(audio.name)
    
    with audio_file as source:
        audio_data = recognizer.record(source)

    try:
        # Recognize the audio using Google Web Speech API
        print("Transcribing the audio...")
        transcription = recognizer.recognize_google(audio_data)
        print("Transcription completed.")
        return transcription
    except sr.UnknownValueError:
        return "Google Speech Recognition could not understand the audio"
    except sr.RequestError as e:
        return f"Error with Google Speech Recognition service: {e}"

# Step 2: Compare the transcribed text with the input paragraph
def compare_texts(reference_text, transcribed_text):
    word_scores = []
    reference_words = reference_text.split()
    transcribed_words = transcribed_text.split()

    sm = difflib.SequenceMatcher(None, reference_text, transcribed_text)
    similarity_score = round(sm.ratio() * 100, 2)

    for i, word in enumerate(reference_words):
        try:
            if word.lower() == transcribed_words[i].lower():
                word_scores.append({"word": word, "quality_score": 100})
            else:
                word_scores.append({"word": word, "quality_score": 50})  # Assuming 50 if it's wrong
        except IndexError:
            word_scores.append({"word": word, "quality_score": 0})

    fidelity_class = "CORRECT" if similarity_score > 50 else "INCORRECT"

    output = {
        "quota_remaining": -1,
        "reference_text_from_application": reference_text,
        "status": "success",
        "text_score": {
            "fidelity_class": fidelity_class,
            "quality_score": similarity_score,
            "text": reference_text,
            "transcribedText": transcribed_text,
            "word_score_list": word_scores
        },
        "version": "1.1"
    }

    return output

# Gradio Interface Function
def gradio_function(paragraph, audio):
    # Transcribe the audio
    transcribed_text = transcribe_audio(audio)

    # Compare the original paragraph with the transcribed text
    comparison_result = compare_texts(paragraph, transcribed_text)

    # Return comparison result
    return comparison_result

# Gradio Interface
interface = gr.Interface(
    fn=gradio_function, 
    inputs=[
        gr.inputs.Textbox(lines=5, label="Input Paragraph"),
        gr.inputs.Audio(source="microphone", type="file", label="Record Audio")
    ], 
    outputs="json",
    title="Speech Recognition Comparison",
    description="Input a paragraph, record your audio, and compare the transcription to the original text."
)

# Launch Gradio app
interface.launch()